summaryrefslogtreecommitdiff
path: root/tools/perf/util
diff options
context:
space:
mode:
authorIan Rogers <irogers@google.com>2025-07-18 20:05:07 -0700
committerNamhyung Kim <namhyung@kernel.org>2025-07-24 13:41:35 -0700
commit175c852325a1f566426e2470e5d5d67efc7621dd (patch)
treedce45d5597579302daf7ef0dd8bf2c64d9d7c513 /tools/perf/util
parentbd741d80dc65922c7d6e5fd855a934f5d2cf2309 (diff)
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core CPUs. If a cpumask is specified for the num_cpus or num_cpus_online tool events, compute the value relative to the given mask rather than for the full system. ``` $ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/, tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online, cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true Performance counter stats for 'true': 28 tool/num_cpus/ 16 tool/num_cpus,cpu=cpu_core/ 12 tool/num_cpus,cpu=cpu_atom/ 28 tool/num_cpus_online/ 16 tool/num_cpus_online,cpu=cpu_core/ 12 tool/num_cpus_online,cpu=cpu_atom/ 0.000767205 seconds time elapsed 0.000938000 seconds user 0.000000000 seconds sys ``` Reviewed-by: Thomas Falcon <thomas.falcon@intel.com> Signed-off-by: Ian Rogers <irogers@google.com> Tested-by: James Clark <james.clark@linaro.org> Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/expr.c2
-rw-r--r--tools/perf/util/tool_pmu.c56
-rw-r--r--tools/perf/util/tool_pmu.h2
3 files changed, 51 insertions, 9 deletions
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index ca70a14c7cdf..7fda0ff89c16 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -401,7 +401,7 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx
if (ev != TOOL_PMU__EVENT_NONE) {
u64 count;
- if (tool_pmu__read_event(ev, &count))
+ if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count))
result = count;
else
pr_err("Failure to read '%s'", literal);
diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c
index 4630b8cc8e52..7aa4f315b0ac 100644
--- a/tools/perf/util/tool_pmu.c
+++ b/tools/perf/util/tool_pmu.c
@@ -332,7 +332,7 @@ static bool has_pmem(void)
return has_pmem;
}
-bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
+bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result)
{
const struct cpu_topology *topology;
@@ -347,18 +347,60 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result)
return true;
case TOOL_PMU__EVENT_NUM_CPUS:
- *result = cpu__max_present_cpu().cpu;
+ if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
+ /* No evsel to be specific to. */
+ *result = cpu__max_present_cpu().cpu;
+ } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
+ /* Evsel just has specific CPUs. */
+ *result = perf_cpu_map__nr(evsel->core.cpus);
+ } else {
+ /*
+ * "Any CPU" event that can be scheduled on any CPU in
+ * the PMU's cpumask. The PMU cpumask should be saved in
+ * own_cpus. If not present fall back to max.
+ */
+ if (!perf_cpu_map__is_empty(evsel->core.own_cpus))
+ *result = perf_cpu_map__nr(evsel->core.own_cpus);
+ else
+ *result = cpu__max_present_cpu().cpu;
+ }
return true;
case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: {
struct perf_cpu_map *online = cpu_map__online();
- if (online) {
+ if (!online)
+ return false;
+
+ if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) {
+ /* No evsel to be specific to. */
*result = perf_cpu_map__nr(online);
- perf_cpu_map__put(online);
- return true;
+ } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) {
+ /* Evsel just has specific CPUs. */
+ struct perf_cpu_map *tmp =
+ perf_cpu_map__intersect(online, evsel->core.cpus);
+
+ *result = perf_cpu_map__nr(tmp);
+ perf_cpu_map__put(tmp);
+ } else {
+ /*
+ * "Any CPU" event that can be scheduled on any CPU in
+ * the PMU's cpumask. The PMU cpumask should be saved in
+ * own_cpus, if not present then just the online cpu
+ * mask.
+ */
+ if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) {
+ struct perf_cpu_map *tmp =
+ perf_cpu_map__intersect(online, evsel->core.own_cpus);
+
+ *result = perf_cpu_map__nr(tmp);
+ perf_cpu_map__put(tmp);
+ } else {
+ *result = perf_cpu_map__nr(online);
+ }
}
- return false;
+ perf_cpu_map__put(online);
+ return true;
}
case TOOL_PMU__EVENT_NUM_DIES:
topology = online_topology();
@@ -417,7 +459,7 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread)
old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
val = 0;
if (cpu_map_idx == 0 && thread == 0) {
- if (!tool_pmu__read_event(ev, &val)) {
+ if (!tool_pmu__read_event(ev, evsel, &val)) {
count->lost++;
val = 0;
}
diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h
index c6ad1dd90a56..d642e7d73910 100644
--- a/tools/perf/util/tool_pmu.h
+++ b/tools/perf/util/tool_pmu.h
@@ -34,7 +34,7 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str);
bool tool_pmu__skip_event(const char *name);
int tool_pmu__num_skip_events(void);
-bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result);
+bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result);
u64 tool_pmu__cpu_slots_per_cycle(void);