diff options
author | Ian Rogers <irogers@google.com> | 2025-07-18 20:05:07 -0700 |
---|---|---|
committer | Namhyung Kim <namhyung@kernel.org> | 2025-07-24 13:41:35 -0700 |
commit | 175c852325a1f566426e2470e5d5d67efc7621dd (patch) | |
tree | dce45d5597579302daf7ef0dd8bf2c64d9d7c513 /tools/perf/util | |
parent | bd741d80dc65922c7d6e5fd855a934f5d2cf2309 (diff) |
perf tool_pmu: Allow num_cpus(_online) to be specific to a cpumask
For hybrid metrics it is useful to know the number of p-core or e-core
CPUs. If a cpumask is specified for the num_cpus or num_cpus_online
tool events, compute the value relative to the given mask rather than
for the full system.
```
$ sudo /tmp/perf/perf stat -e 'tool/num_cpus/,tool/num_cpus,cpu=cpu_core/,
tool/num_cpus,cpu=cpu_atom/,tool/num_cpus_online/,tool/num_cpus_online,
cpu=cpu_core/,tool/num_cpus_online,cpu=cpu_atom/' true
Performance counter stats for 'true':
28 tool/num_cpus/
16 tool/num_cpus,cpu=cpu_core/
12 tool/num_cpus,cpu=cpu_atom/
28 tool/num_cpus_online/
16 tool/num_cpus_online,cpu=cpu_core/
12 tool/num_cpus_online,cpu=cpu_atom/
0.000767205 seconds time elapsed
0.000938000 seconds user
0.000000000 seconds sys
```
Reviewed-by: Thomas Falcon <thomas.falcon@intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: James Clark <james.clark@linaro.org>
Link: https://lore.kernel.org/r/20250719030517.1990983-6-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r-- | tools/perf/util/expr.c | 2 | ||||
-rw-r--r-- | tools/perf/util/tool_pmu.c | 56 | ||||
-rw-r--r-- | tools/perf/util/tool_pmu.h | 2 |
3 files changed, 51 insertions, 9 deletions
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index ca70a14c7cdf..7fda0ff89c16 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -401,7 +401,7 @@ double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx if (ev != TOOL_PMU__EVENT_NONE) { u64 count; - if (tool_pmu__read_event(ev, &count)) + if (tool_pmu__read_event(ev, /*evsel=*/NULL, &count)) result = count; else pr_err("Failure to read '%s'", literal); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 4630b8cc8e52..7aa4f315b0ac 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -332,7 +332,7 @@ static bool has_pmem(void) return has_pmem; } -bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) +bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result) { const struct cpu_topology *topology; @@ -347,18 +347,60 @@ bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) return true; case TOOL_PMU__EVENT_NUM_CPUS: - *result = cpu__max_present_cpu().cpu; + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ + *result = cpu__max_present_cpu().cpu; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + *result = perf_cpu_map__nr(evsel->core.cpus); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * own_cpus. If not present fall back to max. + */ + if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) + *result = perf_cpu_map__nr(evsel->core.own_cpus); + else + *result = cpu__max_present_cpu().cpu; + } return true; case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { struct perf_cpu_map *online = cpu_map__online(); - if (online) { + if (!online) + return false; + + if (!evsel || perf_cpu_map__is_empty(evsel->core.cpus)) { + /* No evsel to be specific to. */ *result = perf_cpu_map__nr(online); - perf_cpu_map__put(online); - return true; + } else if (!perf_cpu_map__has_any_cpu(evsel->core.cpus)) { + /* Evsel just has specific CPUs. */ + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + /* + * "Any CPU" event that can be scheduled on any CPU in + * the PMU's cpumask. The PMU cpumask should be saved in + * own_cpus, if not present then just the online cpu + * mask. + */ + if (!perf_cpu_map__is_empty(evsel->core.own_cpus)) { + struct perf_cpu_map *tmp = + perf_cpu_map__intersect(online, evsel->core.own_cpus); + + *result = perf_cpu_map__nr(tmp); + perf_cpu_map__put(tmp); + } else { + *result = perf_cpu_map__nr(online); + } } - return false; + perf_cpu_map__put(online); + return true; } case TOOL_PMU__EVENT_NUM_DIES: topology = online_topology(); @@ -417,7 +459,7 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); val = 0; if (cpu_map_idx == 0 && thread == 0) { - if (!tool_pmu__read_event(ev, &val)) { + if (!tool_pmu__read_event(ev, evsel, &val)) { count->lost++; val = 0; } diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h index c6ad1dd90a56..d642e7d73910 100644 --- a/tools/perf/util/tool_pmu.h +++ b/tools/perf/util/tool_pmu.h @@ -34,7 +34,7 @@ enum tool_pmu_event tool_pmu__str_to_event(const char *str); bool tool_pmu__skip_event(const char *name); int tool_pmu__num_skip_events(void); -bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result); +bool tool_pmu__read_event(enum tool_pmu_event ev, struct evsel *evsel, u64 *result); u64 tool_pmu__cpu_slots_per_cycle(void); |