From b0e878759452314676fbdd71df4ac67e7d08de5d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Aug 2015 14:06:07 +0200 Subject: perf/abi: Document some more aspects of the perf ABI Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 105 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 092a0e8a479a..d61ee4459eee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -140,27 +140,60 @@ struct hw_perf_event { }; #endif }; + /* + * If the event is a per task event, this will point to the task in + * question. See the comment in perf_event_alloc(). + */ struct task_struct *target; + +/* + * hw_perf_event::state flags; used to track the PERF_EF_* state. + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + int state; + + /* + * The last observed hardware counter value, updated with a + * local64_cmpxchg() such that pmu::read() can be called nested. + */ local64_t prev_count; + + /* + * The period to start the next sample with. + */ u64 sample_period; + + /* + * The period we started this sample with. + */ u64 last_period; + + /* + * However much is left of the current period; note that this is + * a full 64bit value and allows for generation of periods longer + * than hardware might allow. + */ local64_t period_left; + + /* + * State for throttling the event, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 interrupts_seq; u64 interrupts; + /* + * State for freq target events, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 freq_time_stamp; u64 freq_count_stamp; #endif }; -/* - * hw_perf_event::state flags - */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 - struct perf_event; /* @@ -210,7 +243,19 @@ struct pmu { /* * Try and initialize the event for this PMU. - * Should return -ENOENT when the @event doesn't match this PMU. + * + * Returns: + * -ENOENT -- @event is not for this PMU + * + * -ENODEV -- @event is for this PMU but PMU not present + * -EBUSY -- @event is for this PMU but PMU temporarily unavailable + * -EINVAL -- @event is for this PMU but @event is not valid + * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported + * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges + * + * 0 -- @event is for this PMU and valid + * + * Other error return values are allowed. */ int (*event_init) (struct perf_event *event); @@ -221,27 +266,61 @@ struct pmu { void (*event_mapped) (struct perf_event *event); /*optional*/ void (*event_unmapped) (struct perf_event *event); /*optional*/ + /* + * Flags for ->add()/->del()/ ->start()/->stop(). There are + * matching hw_perf_event::state flags. + */ #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ /* - * Adds/Removes a counter to/from the PMU, can be done inside - * a transaction, see the ->*_txn() methods. + * Adds/Removes a counter to/from the PMU, can be done inside a + * transaction, see the ->*_txn() methods. + * + * The add/del callbacks will reserve all hardware resources required + * to service the event, this includes any counter constraint + * scheduling etc. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on. + * + * ->add() called without PERF_EF_START should result in the same state + * as ->add() followed by ->stop(). + * + * ->del() must always PERF_EF_UPDATE stop an event. If it calls + * ->stop() that must deal with already being stopped without + * PERF_EF_UPDATE. */ int (*add) (struct perf_event *event, int flags); void (*del) (struct perf_event *event, int flags); /* - * Starts/Stops a counter present on the PMU. The PMI handler - * should stop the counter when perf_event_overflow() returns - * !0. ->start() will be used to continue. + * Starts/Stops a counter present on the PMU. + * + * The PMI handler should stop the counter when perf_event_overflow() + * returns !0. ->start() will be used to continue. + * + * Also used to change the sample period. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on -- will be called from NMI context with the PMU generates + * NMIs. + * + * ->stop() with PERF_EF_UPDATE will read the counter and update + * period/count values like ->read() would. + * + * ->start() with PERF_EF_RELOAD will reprogram the the counter + * value, must be preceded by a ->stop() with PERF_EF_UPDATE. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); /* * Updates the counter value of the event. + * + * For sampling capable PMUs this will also update the software period + * hw_perf_event::period_left field. */ void (*read) (struct perf_event *event); -- cgit v1.2.3 From fbbe07011581990ef74dfac06dc8511b1a14badb Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:45 -0700 Subject: perf/core: Add a 'flags' parameter to the PMU transactional interfaces Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a "transaction type". The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu [peterz: s390 compile fix] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-3-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d61ee4459eee..ea3b5dd21a4c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -201,6 +201,8 @@ struct perf_event; */ #define PERF_EVENT_TXN 0x1 +#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ + /** * pmu::capabilities flags */ @@ -331,20 +333,26 @@ struct pmu { * * Start the transaction, after this ->add() doesn't need to * do schedulability tests. + * + * Optional. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. + * + * Optional. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. + * + * Optional. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* * Will return the value for perf_event_mmap_page::index for this event, -- cgit v1.2.3 From 4a00c16e552ea5e71756cd29cd2df7557ec9cac4 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:51 -0700 Subject: perf/core: Define PERF_PMU_TXN_READ interface Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu->start_txn() // Initialize before first event for each event in group pmu->read(event); // Queue each event to be read rc = pmu->commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the ->read() operation to _queue_ the counters to be read and use ->commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore ->start_txn() and ->commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-9-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ea3b5dd21a4c..b83cea932f74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -202,6 +202,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags -- cgit v1.2.3 From 8f3e5684d3fbd91ead283916676fa3dac22615e5 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:53 -0700 Subject: perf/core: Drop PERF_EVENT_TXN We currently use PERF_EVENT_TXN flag to determine if we are in the middle of a transaction. If in a transaction, we defer the schedulability checks from pmu->add() operation to the pmu->commit() operation. Now that we have "transaction types" (PERF_PMU_TXN_ADD, PERF_PMU_TXN_READ) we can use the type to determine if we are in a transaction and drop the PERF_EVENT_TXN flag. When PERF_EVENT_TXN is dropped, the cpuhw->group_flag on some architectures becomes unused, so drop that field as well. This is an extension of the Powerpc patch from Peter Zijlstra to s390, Sparc and x86 architectures. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-11-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b83cea932f74..d841d33bcdc9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -199,8 +199,6 @@ struct perf_event; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_EVENT_TXN 0x1 - #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ #define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ -- cgit v1.2.3 From b9511cd761faafca7a1acc059e792c1399f9d7c6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 16 Oct 2015 16:24:05 +0300 Subject: perf/x86: Fix time_shift in perf_event_mmap_page Commit: b20112edeadf ("perf/x86: Improve accuracy of perf/sched clock") allowed the time_shift value in perf_event_mmap_page to be as much as 32. Unfortunately the documented algorithms for using time_shift have it shifting an integer, whereas to work correctly with the value 32, the type must be u64. In the case of perf tools, Intel PT decodes correctly but the timestamps that are output (for example by perf script) have lost 32-bits of granularity so they look like they are not changing at all. Fix by limiting the shift to 31 and adjusting the multiplier accordingly. Also update the documentation of perf_event_mmap_page so that new code based on it will be more future-proof. Signed-off-by: Adrian Hunter Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: b20112edeadf ("perf/x86: Improve accuracy of perf/sched clock") Link: http://lkml.kernel.org/r/1445001845-13688-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 2881145cda86..6c72e72e975c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -476,7 +476,7 @@ struct perf_event_mmap_page { * u64 delta; * * quot = (cyc >> time_shift); - * rem = cyc & ((1 << time_shift) - 1); + * rem = cyc & (((u64)1 << time_shift) - 1); * delta = time_offset + quot * time_mult + * ((rem * time_mult) >> time_shift); * @@ -507,7 +507,7 @@ struct perf_event_mmap_page { * And vice versa: * * quot = cyc >> time_shift; - * rem = cyc & ((1 << time_shift) - 1); + * rem = cyc & (((u64)1 << time_shift) - 1); * timestamp = time_zero + quot * time_mult + * ((rem * time_mult) >> time_shift); */ -- cgit v1.2.3 From c229bf9dc179d2023e185c0f705bdf68484c1e73 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 13 Oct 2015 09:09:08 +0200 Subject: perf: Add PERF_SAMPLE_BRANCH_CALL Add a new branch sample type to cover only call branches (function calls). The current ANY_CALL included direct, indirect calls and far jumps. We want to be able to differentiate indirect from direct calls. Therefore we introduce PERF_SAMPLE_BRANCH_CALL. The implementation is up to each architecture. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Cc: khandual@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1444720151-10275-2-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 6c72e72e975c..651221334f49 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -168,6 +168,7 @@ enum perf_branch_sample_type_shift { PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ + PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ }; @@ -188,6 +189,7 @@ enum perf_branch_sample_type { PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; -- cgit v1.2.3