From dc83e1394083d6e12625a3158bf88396dfaec633 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:24:33 +0200 Subject: perf ordered_events: Pass timestamp arg in perf_session__queue_event There's no need to pass whole sample data, because it's only timestamp that is used. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xd1hpoze3kgb1rb639o3vehb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5c412310f266..8976e417eab2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -873,9 +873,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, } int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - return ordered_events__queue(&s->ordered_events, event, sample, file_offset); + return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1517,7 +1517,7 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, &sample, file_offset); + ret = perf_session__queue_event(session, event, sample.time, file_offset); if (ret != -ETIME) return ret; } -- cgit v1.2.3 From 93d10af26bb7159349158b721ba2e258291d53c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:21:14 +0200 Subject: perf tools: Optimize sample parsing for ordered events Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8976e417eab2..df2857137908 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -27,7 +27,6 @@ static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session) static int ordered_events__deliver_event(struct ordered_events *oe, struct ordered_event *event) { - struct perf_sample sample; struct perf_session *session = container_of(oe, struct perf_session, ordered_events); - int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - - if (ret) { - pr_err("Can't parse sample, err = %d\n", ret); - return ret; - } - return perf_session__deliver_event(session, event->event, &sample, + return perf_session__deliver_event(session, event->event, session->tool, event->file_offset); } @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_sample sample; int ret; - ret = auxtrace__process_event(session, event, sample, tool); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) return ret; if (ret > 0) return 0; return machines__deliver_event(&session->machines, session->evlist, - event, sample, tool, file_offset); + event, &sample, tool, file_offset); } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session, { struct perf_evlist *evlist = session->evlist; struct perf_tool *tool = session->tool; - struct perf_sample sample; int ret; if (session->header.needs_swap) @@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, file_offset); - /* - * For all kernel events we get the sample data - */ - ret = perf_evlist__parse_sample(evlist, event, &sample); - if (ret) - return ret; - if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, sample.time, file_offset); + u64 timestamp; + + ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + if (ret) + return ret; + + ret = perf_session__queue_event(session, event, timestamp, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return perf_session__deliver_event(session, event, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) -- cgit v1.2.3 From f250b09c779550e4a7a412dae6d3ad34d5201019 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Nov 2017 15:35:04 -0300 Subject: perf report: Fix -D output for user metadata events The PERF_RECORD_USER_ events are synthesized by the tool to assist in processing the PERF_RECORD_ ones generated by the kernel, the printing of that information doesn't come with a perf_sample structure, so, when dumping the event fields using 'perf report -D' there were columns that end up not being printed. To tidy up a bit this, fake a perf_sample structure with zeroes to have the missing columns printed and avoid the occasional surprise with that. Before: 0 0x45b8 [0x68]: PERF_RECORD_MMAP -1/0: [0xffffffffc12ec000(0x4000) @ 0]: x /lib/modules/4.14.0+/kernel/fs/nls/nls_utf8.ko 0x4620 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 27820 0x4648 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x4660 [0x28]: PERF_RECORD_COMM: perf:27820/27820 0x4a58 [0x8]: PERF_RECORD_FINISHED_ROUND 447723433020976 0x4688 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 27820/27820: 0xffffffff8f1b6d7a period: 1 addr: 0 After: $ perf report -D | grep PERF_RECORD_ | head 0 0xe8 [0x20]: PERF_RECORD_TIME_CONV: unhandled! 0 0x108 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 32555 0 0x130 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x148 [0x28]: PERF_RECORD_COMM: perf:32555/32555 0 0x4e8 [0x8]: PERF_RECORD_FINISHED_ROUND 448743409421205 0x170 [0x28]: PERF_RECORD_COMM exec: sleep:32555/32555 448743409431883 0x198 [0x68]: PERF_RECORD_MMAP2 32555/32555: [0x55e11d75a000(0x208000) @ 0 fd:00 3147174 2566255743]: r-xp /usr/bin/sleep 448743409443873 0x200 [0x70]: PERF_RECORD_MMAP2 32555/32555: [0x7f0ced316000(0x229000) @ 0 fd:00 3151761 2566238119]: r-xp /usr/lib64/ld-2.25.so 448743409454790 0x270 [0x60]: PERF_RECORD_MMAP2 32555/32555: [0x7ffe84f6d000(0x2000) @ 0 00:00 0 0]: r-xp [vdso] 448743409479500 0x2d0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4002): 32555/32555: 0xffffffff8f84c7e7 period: 1 addr: 0 $ Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Thomas Gleixner Fixes: 9aefcab0de47 ("perf session: Consolidate the dump code") Link: https://lkml.kernel.org/n/tip-todcu15x0cwgppkh1gi6uhru@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index df2857137908..54e30f1bcbd7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1348,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; + struct perf_sample sample = { .time = 0, }; int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { -- cgit v1.2.3 From 075ca1ebb25e798e4072a1e3a482b829bb51afb2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:54 +0100 Subject: perf tools: Make the tool's warning messages optional I want to display the pure events status coming in the next patch and the tool's warnings are superfluous in the output. Making it optional, enabled by default. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 54e30f1bcbd7..8d0fa2f8da16 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1773,7 +1773,8 @@ done: err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); auxtrace__free_events(session); return err; @@ -1929,7 +1930,8 @@ out: err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable. -- cgit v1.2.3 From 631e8f0a9748d7ef1eb6a84d0d5b9e81a79433ef Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 10 Jan 2018 13:31:52 -0700 Subject: perf report: Fix regression when decoding intel_pt traces Commit (93d10af26bb7 perf tools: Optimize sample parsing for ordered events) breaks intelPT trace decoding by invariably returning an error if the event type isn't a PERF_SAMPLE_TIME. With this patch the timestamp is initialised and processing is allowed to continue if the error returned by function perf_evlist__parse_sample_timestamp() is not a fault. Signed-off-by: Mathieu Poirier Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: 93d10af26bb7 ("perf tools: Optimize sample parsing for ordered events") Link: http://lkml.kernel.org/r/1515616312-27645-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/session.c') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8d0fa2f8da16..c71ced7db152 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1508,10 +1508,10 @@ static s64 perf_session__process_event(struct perf_session *session, return perf_session__process_user_event(session, event, file_offset); if (tool->ordered_events) { - u64 timestamp; + u64 timestamp = -1ULL; ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); - if (ret) + if (ret && ret != -1) return ret; ret = perf_session__queue_event(session, event, timestamp, file_offset); -- cgit v1.2.3