From 52deff71bc2b2c24587ab71f588ff5e4c9279349 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 May 2012 22:58:26 -0600 Subject: perf script: Fix regression in callchain dso name $ perf script -i /tmp/perf.data ... gcc 13623 544315.062858: context-switches: ffffffff815f65c9 __schedule ([kernel.kallsyms]) ffffffff81087cea __cond_resched ([kernel.kallsyms]) ffffffff815f6b92 _cond_resched ([kernel.kallsyms]) ffffffff815fb87a do_page_fault ([kernel.kallsyms]) ffffffff815f8465 page_fault ([kernel.kallsyms]) 2b7a71ea0303 _dl_lookup_symbol_x ([kernel.kallsyms]) 2b7a71ea1eb5 _dl_relocate_object ([kernel.kallsyms]) 2b7a71e99b2e dl_main ([kernel.kallsyms]) 2b7a71eab7f4 _dl_sysdep_start ([kernel.kallsyms]) All DSO's in a callchain are printed as [kernel.kallsyms]. git bisect chased it to: 547a92e0aedb88129e7fbd804697a11949de2e5a is the first bad commit commit 547a92e0aedb88129e7fbd804697a11949de2e5a Author: Akihiro Nagai Date: Mon Jan 30 13:42:57 2012 +0900 perf script: Unify the expressions indicating "unknown" The perf script command uses various expressions to indicate "unknown". It is unfriendly for user scripts to parse it. So, this patch unifies the expressions to "[unknown]". Looks like a copy-paste in that the other references use al.map but this one should be node->map. With this patch you get: $ perf script -i /tmp/perf.data ... gcc 13623 544315.062858: context-switches: ffffffff815f65c9 __schedule ([kernel.kallsyms]) ffffffff81087cea __cond_resched ([kernel.kallsyms]) ffffffff815f6b92 _cond_resched ([kernel.kallsyms]) ffffffff815fb87a do_page_fault ([kernel.kallsyms]) ffffffff815f8465 page_fault ([kernel.kallsyms]) 2b7a71ea0303 _dl_lookup_symbol_x (/lib64/ld-2.14.90.so) 2b7a71ea1eb5 _dl_relocate_object (/lib64/ld-2.14.90.so) 2b7a71e99b2e dl_main (/lib64/ld-2.14.90.so) 2b7a71eab7f4 _dl_sysdep_start (/lib64/ld-2.14.90.so) Signed-off-by: David Ahern Cc: Akihiro Nagai Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1338353906-60706-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 93d355d27109..48206144758e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1460,7 +1460,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, } if (print_dso) { printf(" ("); - map__fprintf_dsoname(al.map, stdout); + map__fprintf_dsoname(node->map, stdout); printf(")"); } printf("\n"); -- cgit v1.2.3 From 472606458f3e1ced5fe3cc5f04e90a6b5a4732cf Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:26 +0900 Subject: perf callchain: Make callchain cursors TLS perf top -G has a race on callchain cursor between main thread and display thread. Since the callchain cursors are used locally make them thread-local data would solve the problem. Signed-off-by: Namhyung Kim Reported-by: Sunjin Yang Suggested-by: Arnaldo Carvalho de Melo Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-1-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 48206144758e..3b6f8e460a31 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, +int machine__resolve_callchain(struct machine *self, + struct perf_evsel *evsel __used, struct thread *thread, struct ip_callchain *chain, struct symbol **parent) @@ -297,7 +298,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, unsigned int i; int err; - callchain_cursor_reset(&evsel->hists.callchain_cursor); + callchain_cursor_reset(&callchain_cursor); for (i = 0; i < chain->nr; i++) { u64 ip; @@ -333,7 +334,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, break; } - err = callchain_cursor_append(&evsel->hists.callchain_cursor, + err = callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); if (err) return err; @@ -1428,7 +1429,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, int print_sym, int print_dso, int print_symoffset) { struct addr_location al; - struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; struct callchain_cursor_node *node; if (perf_event__preprocess_sample(event, machine, &al, sample, @@ -1446,10 +1446,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, error("Failed to resolve callchain. Skipping\n"); return; } - callchain_cursor_commit(cursor); + callchain_cursor_commit(&callchain_cursor); while (1) { - node = callchain_cursor_current(cursor); + node = callchain_cursor_current(&callchain_cursor); if (!node) break; @@ -1465,7 +1465,7 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, } printf("\n"); - callchain_cursor_advance(cursor); + callchain_cursor_advance(&callchain_cursor); } } else { -- cgit v1.2.3 From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:27 +0900 Subject: perf tools: Check if callchain is corrupted We faced segmentation fault on perf top -G at very high sampling rate due to a corrupted callchain. While the root cause was not revealed (I failed to figure it out), this patch tries to protect us from the segfault on such cases. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b6f8e460a31..04d1e33f4592 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -300,6 +300,11 @@ int machine__resolve_callchain(struct machine *self, callchain_cursor_reset(&callchain_cursor); + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + for (i = 0; i < chain->nr; i++) { u64 ip; struct addr_location al; @@ -318,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, case PERF_CONTEXT_USER: cpumode = PERF_RECORD_MISC_USER; break; default: - break; + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; } continue; } -- cgit v1.2.3 From 268fb20f832e1eb4afd5113ee31fef9332986b13 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 30 May 2012 14:23:43 +0200 Subject: perf session: Handle endianity swap on sample_id_all header data Adding endianity swapping for event header attached via sample_id_all. Currently we dont do that and it's causing wrong data to be read when running report on architecture with different endianity than the record. The perf is currently able to process 32-bit PPC samples on 32-bit and 64-bit x86. Together with other endianity patches, this change fixies perf report discrepancies on origin and target systems as described in test 1 below, e.g. following perf report diff: ... 0.12% ps [kernel.kallsyms] [k] clear_page - 0.12% awk bash [.] alloc_word_desc + 0.12% awk bash [.] yyparse 0.11% beah-rhts-task libpython2.6.so.1.0 [.] 0x5560e 0.10% perf libc-2.12.so [.] __ctype_toupper_loc - 0.09% rhts-test-runne bash [.] maybe_make_export_env + 0.09% rhts-test-runne bash [.] 0x385a0 0.09% ps [kernel.kallsyms] [k] page_fault ... Note, running following to test perf endianity handling: test 1) - origin system: # perf record -a -- sleep 10 (any perf record will do) # perf report > report.origin # perf archive perf.data - copy the perf.data, report.origin and perf.data.tar.bz2 to a target system and run: # tar xjvf perf.data.tar.bz2 -C ~/.debug # perf report > report.target # diff -u report.origin report.target - the diff should produce no output (besides some white space stuff and possibly different date/TZ output) test 2) - origin system: # perf record -ag -fo /tmp/perf.data -- sleep 1 - mount origin system root to the target system on /mnt/origin - target system: # perf script --symfs /mnt/origin -I -i /mnt/origin/tmp/perf.data \ --kallsyms /mnt/origin/proc/kallsyms - complete perf.data header is displayed Signed-off-by: Jiri Olsa Reviewed-by: David Ahern Tested-by: David Ahern Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1338380624-7443-3-git-send-email-jolsa@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 67 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 12 deletions(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 04d1e33f4592..2600916efa83 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -454,37 +454,65 @@ void mem_bswap_64(void *src, int byte_size) } } -static void perf_event__all64_swap(union perf_event *event) +static void swap_sample_id_all(union perf_event *event, void *data) +{ + void *end = (void *) event + event->header.size; + int size = end - data; + + BUG_ON(size % sizeof(u64)); + mem_bswap_64(data, size); +} + +static void perf_event__all64_swap(union perf_event *event, + bool sample_id_all __used) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); } -static void perf_event__comm_swap(union perf_event *event) +static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) { event->comm.pid = bswap_32(event->comm.pid); event->comm.tid = bswap_32(event->comm.tid); + + if (sample_id_all) { + void *data = &event->comm.comm; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__mmap_swap(union perf_event *event) +static void perf_event__mmap_swap(union perf_event *event, + bool sample_id_all) { event->mmap.pid = bswap_32(event->mmap.pid); event->mmap.tid = bswap_32(event->mmap.tid); event->mmap.start = bswap_64(event->mmap.start); event->mmap.len = bswap_64(event->mmap.len); event->mmap.pgoff = bswap_64(event->mmap.pgoff); + + if (sample_id_all) { + void *data = &event->mmap.filename; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__task_swap(union perf_event *event) +static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); event->fork.tid = bswap_32(event->fork.tid); event->fork.ppid = bswap_32(event->fork.ppid); event->fork.ptid = bswap_32(event->fork.ptid); event->fork.time = bswap_64(event->fork.time); + + if (sample_id_all) + swap_sample_id_all(event, &event->fork + 1); } -static void perf_event__read_swap(union perf_event *event) +static void perf_event__read_swap(union perf_event *event, bool sample_id_all) { event->read.pid = bswap_32(event->read.pid); event->read.tid = bswap_32(event->read.tid); @@ -492,6 +520,9 @@ static void perf_event__read_swap(union perf_event *event) event->read.time_enabled = bswap_64(event->read.time_enabled); event->read.time_running = bswap_64(event->read.time_running); event->read.id = bswap_64(event->read.id); + + if (sample_id_all) + swap_sample_id_all(event, &event->read + 1); } static u8 revbyte(u8 b) @@ -543,7 +574,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr) swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -static void perf_event__hdr_attr_swap(union perf_event *event) +static void perf_event__hdr_attr_swap(union perf_event *event, + bool sample_id_all __used) { size_t size; @@ -554,18 +586,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event) mem_bswap_64(event->attr.id, size); } -static void perf_event__event_type_swap(union perf_event *event) +static void perf_event__event_type_swap(union perf_event *event, + bool sample_id_all __used) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } -static void perf_event__tracing_data_swap(union perf_event *event) +static void perf_event__tracing_data_swap(union perf_event *event, + bool sample_id_all __used) { event->tracing_data.size = bswap_32(event->tracing_data.size); } -typedef void (*perf_event__swap_op)(union perf_event *event); +typedef void (*perf_event__swap_op)(union perf_event *event, + bool sample_id_all); static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, @@ -999,6 +1034,15 @@ static int perf_session__process_user_event(struct perf_session *session, union } } +static void event_swap(union perf_event *event, bool sample_id_all) +{ + perf_event__swap_op swap; + + swap = perf_event__swap_ops[event->header.type]; + if (swap) + swap(event, sample_id_all); +} + static int perf_session__process_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, @@ -1007,9 +1051,8 @@ static int perf_session__process_event(struct perf_session *session, struct perf_sample sample; int ret; - if (session->header.needs_swap && - perf_event__swap_ops[event->header.type]) - perf_event__swap_ops[event->header.type](event); + if (session->header.needs_swap) + event_swap(event, session->sample_id_all); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; -- cgit v1.2.3 From 80c0120a3cca30166c0ab8b24e44be67e97b79af Mon Sep 17 00:00:00 2001 From: David Ahern Date: Fri, 8 Jun 2012 11:47:51 -0300 Subject: perf tools: Fix endianity swapping for adds_features bitmask Based on Jiri's latest attempt: https://lkml.org/lkml/2012/5/16/61 Basically, adds_features should be byte swapped assuming unsigned longs are either 8-bytes (u64) or 4-bytes (u32). Fixes 32-bit ppc dumping 64-bit x86 feature data: ======== captured on: Sun May 20 19:23:23 2012 hostname : nxos-vdc-dev3 os release : 3.4.0-rc7+ perf version : 3.4.rc4.137.g978da3 arch : x86_64 nrcpus online : 16 nrcpus avail : 16 cpudesc : Intel(R) Xeon(R) CPU E5540 @ 2.53GHz cpuid : GenuineIntel,6,26,5 total memory : 24680324 kB ... Verified 64-bit x86 can still dump feature data for 32-bit ppc. Signed-off-by: David Ahern Reviewed-by: Jiri Olsa Cc: Corey Ashford Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/4FBBB539.5010805@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 2600916efa83..c3e399bcf18d 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -442,6 +442,16 @@ static void perf_tool__fill_defaults(struct perf_tool *tool) tool->finished_round = process_finished_round_stub; } } + +void mem_bswap_32(void *src, int byte_size) +{ + u32 *m = src; + while (byte_size > 0) { + *m = bswap_32(*m); + byte_size -= sizeof(u32); + ++m; + } +} void mem_bswap_64(void *src, int byte_size) { -- cgit v1.2.3 From 207b5792696206663a38e525b9793644895bad3b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Sun, 1 Jul 2012 16:11:37 -0600 Subject: perf kvm: Fix regression with guest machine creation Commit 743eb868657bdb1b26c7b24077ca21c67c82c777 reworked when the machines were created. Prior to this commit guest machines could be created in perf_event__process_kernel_mmap() while processing kernel MMAP events. This commit assumes that the machines exist by the time perf_session_deliver_event is called (e.g., during processing of build id events) - which is not always correct. One example is the use of default guest args (--guestkallsyms and --guestmodules) for short times where no samples hit within a guest module. For this case no build id is added to the file header. No build id == no machine created. That leads to the next example -- the use of no-buildid (-B) on the record for all perf-kvm invocations. In both cases perf report dies with a SEGFAULT of the form: (gdb) bt 0 0x000000000046dd7b in machine__mmap_name (self=0x0, bf=0x7fffffffbd20 "q\021", size=4096) at util/map.c:715 1 0x0000000000444161 in perf_event__process_kernel_mmap (tool=0x7fffffffdd80, event=0x7ffff7fb4120, machine=0x0) at util/event.c:562 2 0x0000000000444642 in perf_event__process_mmap (tool=0x7fffffffdd80, event=0x7ffff7fb4120, sample=0x7fffffffd210, machine=0x0) at util/event.c:668 3 0x0000000000470e0b in perf_session_deliver_event (session=0x915ca0, event=0x7ffff7fb4120, sample=0x7fffffffd210, tool=0x7fffffffdd80, file_offset=8480) at util/session.c:979 4 0x000000000047032e in flush_sample_queue (s=0x915ca0, tool=0x7fffffffdd80) at util/session.c:679 5 0x0000000000471c8d in __perf_session__process_events (session=0x915ca0, data_offset=400, data_size=150448, file_size=150848, tool= 0x7fffffffdd80) at util/session.c:1363 6 0x0000000000471d42 in perf_session__process_events (self=0x915ca0, tool=0x7fffffffdd80) at util/session.c:1379 7 0x000000000042484a in __cmd_report (rep=0x7fffffffdd80) at builtin-report.c:368 8 0x0000000000425bf1 in cmd_report (argc=0, argv=0x915b00, prefix=0x0) at builtin-report.c:756 9 0x0000000000438505 in __cmd_report (argc=4, argv=0x7fffffffe260) at builtin-kvm.c:84 10 0x000000000043882a in cmd_kvm (argc=4, argv=0x7fffffffe260, prefix=0x0) at builtin-kvm.c:131 11 0x00000000004152cd in run_builtin (p=0x7a54e8, argc=9, argv=0x7fffffffe260) at perf.c:273 12 0x00000000004154c7 in handle_internal_command (argc=9, argv=0x7fffffffe260) at perf.c:345 13 0x0000000000415613 in run_argv (argcp=0x7fffffffe14c, argv=0x7fffffffe140) at perf.c:389 14 0x0000000000415899 in main (argc=9, argv=0x7fffffffe260) at perf.c:487 Fix by allowing the machine to be created in perf_session_deliver_event. Tested with --guestmount option and default guest args, with and without -B arg on record for both and for short (10 seconds) and long (10 minutes) windows. Reported-by: Pradeep Kumar Surisetty Signed-off-by: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Pradeep Kumar Surisetty Link: http://lkml.kernel.org/r/1341180697-64515-1-git-send-email-dsahern@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index c3e399bcf18d..56142d0fb8d7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -926,7 +926,7 @@ static struct machine * else pid = event->ip.pid; - return perf_session__find_machine(session, pid); + return perf_session__findnew_machine(session, pid); } return perf_session__find_host_machine(session); -- cgit v1.2.3