summaryrefslogtreecommitdiff
path: root/tools/perf
diff options
context:
space:
mode:
authorNamhyung Kim <namhyung@kernel.org>2025-02-05 14:57:18 -0800
committerNamhyung Kim <namhyung@kernel.org>2025-02-05 14:57:18 -0800
commit9e676a024fa1fa2bd8150c2d2ba85478280353bc (patch)
tree5cf0e1d4ab27002fcafdc7dc5bdfdd9ff3f3c9f1 /tools/perf
parent357b965deba9fb71467413e473764ec4e1694d8d (diff)
parent2014c95afecee3e76ca4a56956a936e23283f05b (diff)
Merge tag 'v6.14-rc1' into perf-tools-next
To get the various fixes in the current master. Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-arm-spe.txt26
-rw-r--r--tools/perf/builtin-trace.c6
-rwxr-xr-xtools/perf/tests/shell/trace_btf_enum.sh8
-rw-r--r--tools/perf/util/annotate.c76
-rw-r--r--tools/perf/util/annotate.h15
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c11
-rw-r--r--tools/perf/util/cpumap.c4
-rw-r--r--tools/perf/util/disasm.c83
8 files changed, 139 insertions, 90 deletions
diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt
index de2b0b479249..37afade4f1b2 100644
--- a/tools/perf/Documentation/perf-arm-spe.txt
+++ b/tools/perf/Documentation/perf-arm-spe.txt
@@ -150,6 +150,7 @@ arm_spe/load_filter=1,min_latency=10/'
pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege
store_filter=1 - collect stores only (PMSFCR.ST)
ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS)
+ discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD)
+++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather
than only the execution latency.
@@ -220,6 +221,31 @@ Common errors
Increase sampling interval (see above)
+PMU events
+~~~~~~~~~~
+
+SPE has events that can be counted on core PMUs. These are prefixed with
+SAMPLE_, for example SAMPLE_POP, SAMPLE_FEED, SAMPLE_COLLISION and
+SAMPLE_FEED_BR.
+
+These events will only count when an SPE event is running on the same core that
+the PMU event is opened on, otherwise they read as 0. There are various ways to
+ensure that the PMU event and SPE event are scheduled together depending on the
+way the event is opened. For example opening both events as per-process events
+on the same process, although it's not guaranteed that the PMU event is enabled
+first when context switching. For that reason it may be better to open the PMU
+event as a systemwide event and then open SPE on the process of interest.
+
+Discard mode
+~~~~~~~~~~~~
+
+SPE related (SAMPLE_* etc) core PMU events can be used without the overhead of
+collecting sample data if discard mode is supported (optional from Armv8.6).
+First run a system wide SPE session (or on the core of interest) using options
+to minimize output. Then run perf stat:
+
+ perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null &
+ perf stat -e SAMPLE_FEED_LD
SEE ALSO
--------
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ac97632f13dc..06356217adeb 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2107,8 +2107,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return PTR_ERR(sc->tp_format);
}
+ /*
+ * The tracepoint format contains __syscall_nr field, so it's one more
+ * than the actual number of syscall arguments.
+ */
if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
- RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
+ RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields - 1))
return -ENOMEM;
sc->args = sc->tp_format->format.fields;
diff --git a/tools/perf/tests/shell/trace_btf_enum.sh b/tools/perf/tests/shell/trace_btf_enum.sh
index 5a3b8a5a9b5c..8d1e6bbeac90 100755
--- a/tools/perf/tests/shell/trace_btf_enum.sh
+++ b/tools/perf/tests/shell/trace_btf_enum.sh
@@ -26,8 +26,12 @@ check_vmlinux() {
trace_landlock() {
echo "Tracing syscall ${syscall}"
- # test flight just to see if landlock_add_rule and libbpf are available
- $TESTPROG
+ # test flight just to see if landlock_add_rule is available
+ if ! perf trace $TESTPROG 2>&1 | grep -q landlock
+ then
+ echo "No landlock system call found, skipping to non-syscall tracing."
+ return
+ fi
if perf trace -e $syscall $TESTPROG 2>&1 | \
grep -q -E ".*landlock_add_rule\(ruleset_fd: 11, rule_type: (LANDLOCK_RULE_PATH_BENEATH|LANDLOCK_RULE_NET_PORT), rule_attr: 0x[a-f0-9]+, flags: 45\) = -1.*"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0d2ea22bd9e4..31bb326b07a6 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -2100,6 +2100,57 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
return 0;
}
+const char * const perf_disassembler__strs[] = {
+ [PERF_DISASM_UNKNOWN] = "unknown",
+ [PERF_DISASM_LLVM] = "llvm",
+ [PERF_DISASM_CAPSTONE] = "capstone",
+ [PERF_DISASM_OBJDUMP] = "objdump",
+};
+
+
+static void annotation_options__add_disassembler(struct annotation_options *options,
+ enum perf_disassembler dis)
+{
+ for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers); i++) {
+ if (options->disassemblers[i] == dis) {
+ /* Disassembler is already present then don't add again. */
+ return;
+ }
+ if (options->disassemblers[i] == PERF_DISASM_UNKNOWN) {
+ /* Found a free slot. */
+ options->disassemblers[i] = dis;
+ return;
+ }
+ }
+ pr_err("Failed to add disassembler %d\n", dis);
+}
+
+static int annotation_options__add_disassemblers_str(struct annotation_options *options,
+ const char *str)
+{
+ while (str && *str != '\0') {
+ const char *comma = strchr(str, ',');
+ int len = comma ? comma - str : (int)strlen(str);
+ bool match = false;
+
+ for (u8 i = 0; i < ARRAY_SIZE(perf_disassembler__strs); i++) {
+ const char *dis_str = perf_disassembler__strs[i];
+
+ if (len == (int)strlen(dis_str) && !strncmp(str, dis_str, len)) {
+ annotation_options__add_disassembler(options, i);
+ match = true;
+ break;
+ }
+ }
+ if (!match) {
+ pr_err("Invalid disassembler '%.*s'\n", len, str);
+ return -1;
+ }
+ str = comma ? comma + 1 : NULL;
+ }
+ return 0;
+}
+
static int annotation__config(const char *var, const char *value, void *data)
{
struct annotation_options *opt = data;
@@ -2115,11 +2166,10 @@ static int annotation__config(const char *var, const char *value, void *data)
else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL)
opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
} else if (!strcmp(var, "annotate.disassemblers")) {
- opt->disassemblers_str = strdup(value);
- if (!opt->disassemblers_str) {
- pr_err("Not enough memory for annotate.disassemblers\n");
- return -1;
- }
+ int err = annotation_options__add_disassemblers_str(opt, value);
+
+ if (err)
+ return err;
} else if (!strcmp(var, "annotate.hide_src_code")) {
opt->hide_src_code = perf_config_bool("hide_src_code", value);
} else if (!strcmp(var, "annotate.jump_arrows")) {
@@ -2185,9 +2235,25 @@ void annotation_options__exit(void)
zfree(&annotate_opts.objdump_path);
}
+static void annotation_options__default_init_disassemblers(struct annotation_options *options)
+{
+ if (options->disassemblers[0] != PERF_DISASM_UNKNOWN) {
+ /* Already initialized. */
+ return;
+ }
+#ifdef HAVE_LIBLLVM_SUPPORT
+ annotation_options__add_disassembler(options, PERF_DISASM_LLVM);
+#endif
+#ifdef HAVE_LIBCAPSTONE_SUPPORT
+ annotation_options__add_disassembler(options, PERF_DISASM_CAPSTONE);
+#endif
+ annotation_options__add_disassembler(options, PERF_DISASM_OBJDUMP);
+}
+
void annotation_config__init(void)
{
perf_config(annotation__config, &annotate_opts);
+ annotation_options__default_init_disassemblers(&annotate_opts);
}
static unsigned int parse_percent_type(char *str1, char *str2)
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 0ba5846dad4d..98db1b88daf4 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -34,8 +34,13 @@ struct annotated_data_type;
#define ANNOTATION__BR_CNTR_WIDTH 30
#define ANNOTATION_DUMMY_LEN 256
-// llvm, capstone, objdump
-#define MAX_DISASSEMBLERS 3
+enum perf_disassembler {
+ PERF_DISASM_UNKNOWN = 0,
+ PERF_DISASM_LLVM,
+ PERF_DISASM_CAPSTONE,
+ PERF_DISASM_OBJDUMP,
+};
+#define MAX_DISASSEMBLERS (PERF_DISASM_OBJDUMP + 1)
struct annotation_options {
bool hide_src_code,
@@ -52,14 +57,12 @@ struct annotation_options {
annotate_src,
full_addr;
u8 offset_level;
- u8 nr_disassemblers;
+ u8 disassemblers[MAX_DISASSEMBLERS];
int min_pcnt;
int max_lines;
int context;
char *objdump_path;
char *disassembler_style;
- const char *disassemblers_str;
- const char *disassemblers[MAX_DISASSEMBLERS];
const char *prefix;
const char *prefix_strip;
unsigned int percent_type;
@@ -134,6 +137,8 @@ struct disasm_line {
struct annotation_line al;
};
+extern const char * const perf_disassembler__strs[];
+
void annotation_line__add(struct annotation_line *al, struct list_head *head);
static inline double annotation_data__percent(struct annotation_data *data,
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index 4a62ed593e84..e4352881e3fa 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -431,9 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
{
bool augmented, do_output = false;
- int zero = 0, size, aug_size, index,
- value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+ int zero = 0, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */
+ s64 aug_size, size;
unsigned int nr, *beauty_map;
struct beauty_payload_enter *payload;
void *arg, *payload_offset;
@@ -484,14 +484,11 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
} else if (size > 0 && size <= value_size) { /* struct */
if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
augmented = true;
- } else if (size < 0 && size >= -6) { /* buffer */
+ } else if ((int)size < 0 && size >= -6) { /* buffer */
index = -(size + 1);
barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick.
index &= 7; // Satisfy the bounds checking with the verifier in some kernels.
- aug_size = args->args[index];
-
- if (aug_size > TRACE_AUG_MAX_BUF)
- aug_size = TRACE_AUG_MAX_BUF;
+ aug_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
if (aug_size > 0) {
if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 27094211edd8..5c329ad614e9 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -293,7 +293,7 @@ struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
- if (die == -1)
+ if (die < 0)
die = 0;
/*
@@ -322,7 +322,7 @@ struct aggr_cpu_id aggr_cpu_id__cluster(struct perf_cpu cpu, void *data)
struct aggr_cpu_id id;
/* There is no cluster_id on legacy system. */
- if (cluster == -1)
+ if (cluster < 0)
cluster = 0;
id = aggr_cpu_id__die(cpu, data);
diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c
index b7de4d9fd004..50c5c206b70e 100644
--- a/tools/perf/util/disasm.c
+++ b/tools/perf/util/disasm.c
@@ -2216,56 +2216,6 @@ out_free_command:
return err;
}
-static int annotation_options__init_disassemblers(struct annotation_options *options)
-{
- char *disassembler;
-
- if (options->disassemblers_str == NULL) {
- const char *default_disassemblers_str =
-#ifdef HAVE_LIBLLVM_SUPPORT
- "llvm,"
-#endif
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
- "capstone,"
-#endif
- "objdump";
-
- options->disassemblers_str = strdup(default_disassemblers_str);
- if (!options->disassemblers_str)
- goto out_enomem;
- }
-
- disassembler = strdup(options->disassemblers_str);
- if (disassembler == NULL)
- goto out_enomem;
-
- while (1) {
- char *comma = strchr(disassembler, ',');
-
- if (comma != NULL)
- *comma = '\0';
-
- options->disassemblers[options->nr_disassemblers++] = strim(disassembler);
-
- if (comma == NULL)
- break;
-
- disassembler = comma + 1;
-
- if (options->nr_disassemblers >= MAX_DISASSEMBLERS) {
- pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n",
- MAX_DISASSEMBLERS, disassembler);
- break;
- }
- }
-
- return 0;
-
-out_enomem:
- pr_err("Not enough memory for annotate.disassemblers\n");
- return -1;
-}
-
int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
{
struct annotation_options *options = args->options;
@@ -2274,7 +2224,6 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
char symfs_filename[PATH_MAX];
bool delete_extract = false;
struct kcore_extract kce;
- const char *disassembler;
bool decomp = false;
int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
@@ -2334,28 +2283,26 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
}
}
- err = annotation_options__init_disassemblers(options);
- if (err)
- goto out_remove_tmp;
-
err = -1;
+ for (u8 i = 0; i < ARRAY_SIZE(options->disassemblers) && err != 0; i++) {
+ enum perf_disassembler dis = options->disassemblers[i];
- for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) {
- disassembler = options->disassemblers[i];
-
- if (!strcmp(disassembler, "llvm"))
+ switch (dis) {
+ case PERF_DISASM_LLVM:
err = symbol__disassemble_llvm(symfs_filename, sym, args);
- else if (!strcmp(disassembler, "capstone"))
+ break;
+ case PERF_DISASM_CAPSTONE:
err = symbol__disassemble_capstone(symfs_filename, sym, args);
- else if (!strcmp(disassembler, "objdump"))
+ break;
+ case PERF_DISASM_OBJDUMP:
err = symbol__disassemble_objdump(symfs_filename, sym, args);
- else
- pr_debug("Unknown disassembler %s, skipping...\n", disassembler);
- }
-
- if (err == 0) {
- pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n",
- disassembler, options->disassemblers_str);
+ break;
+ case PERF_DISASM_UNKNOWN: /* End of disassemblers. */
+ default:
+ goto out_remove_tmp;
+ }
+ if (err == 0)
+ pr_debug("Disassembled with %s\n", perf_disassembler__strs[dis]);
}
out_remove_tmp:
if (decomp)