From 3ab481a1cfe1511b94e142b648e2c5ade9175ed3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Mar 2019 06:47:45 -0800 Subject: perf script: Support insn output for normal samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf script -F +insn was only working for PT traces because the PT instruction decoder was filling in the insn/insn_len sample attributes. Support it for non PT samples too on x86 using the existing x86 instruction decoder. This adds some extra checking to ensure that we don't try to decode instructions when using perf.data from a different architecture. % perf record -a sleep 1 % perf script -F ip,sym,insn --xed ffffffff811704c9 remote_function movl %eax, 0x18(%rbx) ffffffff8100bb50 intel_bts_enable_local retq ffffffff81048612 native_apic_mem_write movl %esi, -0xa04000(%rdi) ffffffff81048612 native_apic_mem_write movl %esi, -0xa04000(%rdi) ffffffff81048612 native_apic_mem_write movl %esi, -0xa04000(%rdi) ffffffff810f1f79 generic_exec_single xor %eax, %eax ffffffff811704c9 remote_function movl %eax, 0x18(%rbx) ffffffff8100bb34 intel_bts_enable_local movl 0x2000(%rax), %edx ffffffff81048610 native_apic_mem_write mov %edi, %edi ... Committer testing: Before: # perf script -F ip,sym,insn --xed | head -5 ffffffffa4068804 native_write_msr addb %al, (%rax) ffffffffa4068804 native_write_msr addb %al, (%rax) ffffffffa4068804 native_write_msr addb %al, (%rax) ffffffffa4068806 native_write_msr addb %al, (%rax) ffffffffa4068806 native_write_msr addb %al, (%rax) # perf script -F ip,sym,insn --xed | grep -v "addb %al, (%rax)" # After: # perf script -F ip,sym,insn --xed | head -5 ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) # perf script -F ip,sym,insn --xed | grep -v "addb %al, (%rax)" | head -5 ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) # More examples: # perf script -F ip,sym,insn --xed | grep -v native_write_msr | head ffffffffa416b90e tick_check_broadcast_expired btq %rax, 0x1a5f42a(%rip) ffffffffa4956bd0 nmi_cpu_backtrace pushq %r13 ffffffffa415b95e __hrtimer_next_event_base movq 0x18(%rax), %rdx ffffffffa4956bf3 nmi_cpu_backtrace popq %r12 ffffffffa4171d5c smp_call_function_single pause ffffffffa4956bdd nmi_cpu_backtrace mov %ebp, %r12d ffffffffa4797e4d menu_select cmp $0x190, %rax ffffffffa4171d5c smp_call_function_single pause ffffffffa405a7d8 nmi_cpu_backtrace_handler callq 0xffffffffa4956bd0 ffffffffa4797f7a menu_select shr $0x3, %rax # Which matches the annotate output modulo resolving callqs: # perf annotate --stdio2 nmi_cpu_backtrace_handler Samples: 4 of event 'cycles:ppp', 4000 Hz, Event count (approx.): 35908, [percent: local period] nmi_cpu_backtrace_handler() /lib/modules/5.0.0+/build/vmlinux Percent Disassembly of section .text: ffffffff8105a7d0 : nmi_cpu_backtrace_handler(): nmi_trigger_cpumask_backtrace(mask, exclude_self, nmi_raise_cpu_backtrace); } static int nmi_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { 24.45 → callq __fentry__ if (nmi_cpu_backtrace(regs)) mov %rsi,%rdi 75.55 → callq nmi_cpu_backtrace return NMI_HANDLED; movzbl %al,%eax return NMI_DONE; } ← retq # # perf annotate --stdio2 __hrtimer_next_event_base Samples: 4 of event 'cycles:ppp', 4000 Hz, Event count (approx.): 767977, [percent: local period] __hrtimer_next_event_base() /lib/modules/5.0.0+/build/vmlinux Percent Disassembly of section .text: ffffffff8115b910 <__hrtimer_next_event_base>: __hrtimer_next_event_base(): static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, const struct hrtimer *exclude, unsigned int active, ktime_t expires_next) { → callq __fentry__ 4a: add $0x1,%r14 77.31 mov 0x18(%rax),%rdx shl $0x6,%r14 sub 0x38(%rbx,%r14,1),%rdx if (expires < expires_next) { cmp %r12,%rdx ↓ jge 68 Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190305144758.12397-3-andi@firstfloor.org [ Converted fetch_exe() to use the name it ended up having when merged: thread__memcpy() ] [ archinsn.c needs the instruction decoder that is only build when CONFIG_AUXTRACE=y, fix that ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 53f78cf3113f..a5080afd361d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,10 +29,12 @@ #include "util/time-utils.h" #include "util/path.h" #include "print_binary.h" +#include "archinsn.h" #include #include #include #include +#include #include "asm/bug.h" #include "util/mem-events.h" #include "util/dump-insn.h" @@ -63,6 +65,7 @@ static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; static int max_blocks; +static bool native_arch; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -1227,6 +1230,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, return len + dlen; } +__weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, + struct machine *machine __maybe_unused) +{ +} + static int perf_sample__fprintf_insn(struct perf_sample *sample, struct perf_event_attr *attr, struct thread *thread, @@ -1234,9 +1243,12 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, { int printed = 0; + if (sample->insn_len == 0 && native_arch) + arch_fetch_insn(sample, thread, machine); + if (PRINT_FIELD(INSNLEN)) printed += fprintf(fp, " ilen: %d", sample->insn_len); - if (PRINT_FIELD(INSN)) { + if (PRINT_FIELD(INSN) && sample->insn_len) { int i; printed += fprintf(fp, " insn:"); @@ -3277,6 +3289,7 @@ int cmd_script(int argc, const char **argv) .set = false, .default_no_sample = true, }; + struct utsname uts; char *script_path = NULL; const char **__argv; int i, j, err = 0; @@ -3615,6 +3628,12 @@ int cmd_script(int argc, const char **argv) if (symbol__init(&session->header.env) < 0) goto out_delete; + uname(&uts); + if (!strcmp(uts.machine, session->header.env.arch) || + (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386"))) + native_arch = true; + script.session = session; script__setup_sample_type(&script); -- cgit v1.2.3 From 52bab8868211b7c504146f6239e101421d4d125b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Mar 2019 06:47:47 -0800 Subject: perf report: Support output in nanoseconds Upcoming changes add timestamp output in perf report. Add a --ns argument similar to perf script to support nanoseconds resolution when needed. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190305144758.12397-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a5080afd361d..111787e83784 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -60,7 +60,6 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; -static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; @@ -691,7 +690,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, secs = nsecs / NSEC_PER_SEC; nsecs -= secs * NSEC_PER_SEC; - if (nanosecs) + if (symbol_conf.nanosecs) printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs); else { char sample_time[32]; @@ -3244,7 +3243,7 @@ static int parse_insn_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "+insn,-event,-period", 0); itrace_parse_synth_opts(opt, "i0ns", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3262,7 +3261,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); itrace_parse_synth_opts(opt, "cewp", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3272,7 +3271,7 @@ static int parse_callret_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0); itrace_parse_synth_opts(opt, "crewp", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3408,7 +3407,7 @@ int cmd_script(int argc, const char **argv) OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_INTEGER(0, "max-blocks", &max_blocks, "Maximum number of code blocks to dump with brstackinsn"), - OPT_BOOLEAN(0, "ns", &nanosecs, + OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options\n" ITRACE_HELP, -- cgit v1.2.3 From e87e548126cdc66fd4f194b38b59f351b6e5d3e8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:52 -0700 Subject: perf script: Filter COMM/FORK/.. events by CPU The --cpu option only filtered samples. Filter other perf events, such as COMM, FORK, SWITCH by the CPU too. Reported-by: Jiri Olsa Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 71 ++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 24 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 111787e83784..b695b20ffc8a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1933,6 +1933,13 @@ static int cleanup_scripting(void) return scripting_ops ? scripting_ops->stop_script() : 0; } +static bool filter_cpu(struct perf_sample *sample) +{ + if (cpu_list) + return !test_bit(sample->cpu, cpu_bitmap); + return false; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -1967,7 +1974,7 @@ static int process_sample_event(struct perf_tool *tool, if (al.filtered) goto out_put; - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) + if (filter_cpu(sample)) goto out_put; if (scripting_ops) @@ -2052,9 +2059,11 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, PERF_RECORD_COMM, stdout); - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, stdout); + } ret = 0; out: thread__put(thread); @@ -2088,9 +2097,11 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_NAMESPACES, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); + perf_event__fprintf(event, stdout); + } ret = 0; out: thread__put(thread); @@ -2122,9 +2133,11 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_FORK, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; @@ -2152,9 +2165,11 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_EXIT, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); + perf_event__fprintf(event, stdout); + } if (perf_event__process_exit(tool, event, sample, machine) < 0) err = -1; @@ -2188,9 +2203,11 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2220,9 +2237,11 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP2, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2247,9 +2266,11 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_SWITCH, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2270,9 +2291,11 @@ process_lost_event(struct perf_tool *tool, if (thread == NULL) return -1; - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_LOST, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } -- cgit v1.2.3 From 905e4aff31382c3f9b2014d1361f4a1be4479ba2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:45:01 -0700 Subject: perf script: Add array bound checking to list_scripts Don't overflow array when the scripts directory is too large, or the script file name is too long. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b695b20ffc8a..2f93d60c5a17 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2982,7 +2982,8 @@ static int check_ev_match(char *dir_name, char *scriptname, * will list all statically runnable scripts, select one, execute it and * show the output in a perf browser. */ -int find_scripts(char **scripts_array, char **scripts_path_array) +int find_scripts(char **scripts_array, char **scripts_path_array, int num, + int pathlen) { struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; @@ -3027,7 +3028,10 @@ int find_scripts(char **scripts_array, char **scripts_path_array) /* Skip those real time scripts: xxxtop.p[yl] */ if (strstr(script_dirent->d_name, "top.")) continue; - sprintf(scripts_path_array[i], "%s/%s", lang_path, + if (i >= num) + break; + snprintf(scripts_path_array[i], pathlen, "%s/%s", + lang_path, script_dirent->d_name); temp = strchr(script_dirent->d_name, '.'); snprintf(scripts_array[i], -- cgit v1.2.3 From 90b10f47c0ee2a70bd036d9da5e810f522b54a8f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:50:00 -0700 Subject: perf script: Support relative time When comparing time stamps in 'perf script' traces it can be annoying to work with the full perf time stamps. Add a --reltime option that displays time stamps relative to the trace start to make it easier to read the traces. Note: not currently supported for --time. Report an error in this case. Before: % perf script swapper 0 [000] 245402.891216: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 245402.891223: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 245402.891227: 5 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 245402.891231: 41 cycles:ppp: ffffffffa0068816 native_write_msr+0x6 ([kernel.kallsyms]) swapper 0 [000] 245402.891235: 355 cycles:ppp: ffffffffa000dd51 intel_bts_enable_local+0x21 ([kernel.kallsyms]) swapper 0 [000] 245402.891239: 3084 cycles:ppp: ffffffffa0a0150a end_repeat_nmi+0x48 ([kernel.kallsyms]) After: % perf script --reltime swapper 0 [000] 0.000000: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 0.000006: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 0.000010: 5 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 0.000014: 41 cycles:ppp: ffffffffa0068816 native_write_msr+0x6 ([kernel.kallsyms]) swapper 0 [000] 0.000018: 355 cycles:ppp: ffffffffa000dd51 intel_bts_enable_local+0x21 ([kernel.kallsyms]) swapper 0 [000] 0.000022: 3084 cycles:ppp: ffffffffa0a0150a end_repeat_nmi+0x48 ([kernel.kallsyms]) Committer notes: Do not use 'time' as the name of a variable, as this breaks the build on older glibcs: cc1: warnings being treated as errors builtin-script.c: In function 'perf_sample__fprintf_start': builtin-script.c:691: warning: declaration of 'time' shadows a global declaration /usr/include/time.h:187: warning: shadowed declaration is here Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-8-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-bpahyi6pr9r399mvihu65fvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2f93d60c5a17..61cfd8f70989 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -53,6 +53,8 @@ static char const *script_name; static char const *generate_script_lang; +static bool reltime; +static u64 initial_time; static bool debug_mode; static u64 last_timestamp; static u64 nr_unordered; @@ -686,7 +688,13 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, } if (PRINT_FIELD(TIME)) { - nsecs = sample->time; + u64 t = sample->time; + if (reltime) { + if (!initial_time) + initial_time = sample->time; + t = sample->time - initial_time; + } + nsecs = t; secs = nsecs / NSEC_PER_SEC; nsecs -= secs * NSEC_PER_SEC; @@ -694,7 +702,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs); else { char sample_time[32]; - timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); + timestamp__scnprintf_usec(t, sample_time, sizeof(sample_time)); printed += fprintf(fp, "%12s: ", sample_time); } } @@ -3413,6 +3421,7 @@ int cmd_script(int argc, const char **argv) "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), + OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -3487,6 +3496,11 @@ int cmd_script(int argc, const char **argv) } } + if (script.time_str && reltime) { + fprintf(stderr, "Don't combine --reltime with --time\n"); + return -1; + } + if (itrace_synth_opts.callchain && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; -- cgit v1.2.3