diff options
Diffstat (limited to 'tools/perf/arch')
-rw-r--r-- | tools/perf/arch/arm/util/pmu.c | 3 | ||||
-rw-r--r-- | tools/perf/arch/arm64/annotate/instructions.c | 3 | ||||
-rw-r--r-- | tools/perf/arch/arm64/util/arm-spe.c | 107 | ||||
-rw-r--r-- | tools/perf/arch/loongarch/annotate/instructions.c | 6 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/annotate/instructions.c | 254 | ||||
-rw-r--r-- | tools/perf/arch/powerpc/util/dwarf-regs.c | 53 | ||||
-rw-r--r-- | tools/perf/arch/s390/annotate/instructions.c | 5 | ||||
-rw-r--r-- | tools/perf/arch/x86/annotate/instructions.c | 377 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/event.c | 4 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/evlist.c | 6 |
10 files changed, 766 insertions, 52 deletions
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index 1c9541d01722..57dc94a6e38c 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -23,16 +23,19 @@ void perf_pmu__arch_init(struct perf_pmu *pmu) #ifdef HAVE_AUXTRACE_SUPPORT if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { /* add ETM default config here */ + pmu->auxtrace = true; pmu->selectable = true; pmu->perf_event_attr_init_default = cs_etm_get_default_config; #if defined(__aarch64__) } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { + pmu->auxtrace = true; pmu->selectable = true; pmu->is_uncore = false; pmu->perf_event_attr_init_default = arm_spe_pmu_default_config; if (strstarts(pmu->name, "arm_spe_")) pmu->mem_events = perf_mem_events_arm; } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) { + pmu->auxtrace = true; pmu->selectable = true; #endif } diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index 4af0c3a0f86e..f86d9f4798bd 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -11,7 +11,8 @@ struct arm64_annotate { static int arm64_mov__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, - struct map_symbol *ms __maybe_unused) + struct map_symbol *ms __maybe_unused, + struct disasm_line *dl __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *endptr; diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 0b52e67edb3b..d59f6ca499f2 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -8,6 +8,7 @@ #include <linux/types.h> #include <linux/bitops.h> #include <linux/log2.h> +#include <linux/string.h> #include <linux/zalloc.h> #include <time.h> @@ -132,32 +133,66 @@ static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu) return sample_period; } +static void arm_spe_setup_evsel(struct evsel *evsel, struct perf_cpu_map *cpus) +{ + u64 bit; + + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = arm_spe_pmu__sample_period(evsel->pmu); + evsel->needs_auxtrace_mmap = true; + + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + evlist__to_front(evsel->evlist, evsel); + + /* + * In the case of per-cpu mmaps, sample CPU for AUX event; + * also enable the timestamp tracing for samples correlation. + */ + if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { + evsel__set_sample_bit(evsel, CPU); + evsel__set_config_if_unset(evsel->pmu, evsel, "ts_enable", 1); + } + + /* + * Set this only so that perf report knows that SPE generates memory info. It has no effect + * on the opening of the event or the SPE data produced. + */ + evsel__set_sample_bit(evsel, DATA_SRC); + + /* + * The PHYS_ADDR flag does not affect the driver behaviour, it is used to + * inform that the resulting output's SPE samples contain physical addresses + * where applicable. + */ + bit = perf_pmu__format_bits(evsel->pmu, "pa_enable"); + if (evsel->core.attr.config & bit) + evsel__set_sample_bit(evsel, PHYS_ADDR); +} + static int arm_spe_recording_options(struct auxtrace_record *itr, struct evlist *evlist, struct record_opts *opts) { struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); - struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; - struct evsel *evsel, *arm_spe_evsel = NULL; + struct evsel *evsel, *tmp; struct perf_cpu_map *cpus = evlist->core.user_requested_cpus; bool privileged = perf_event_paranoid_check(-1); struct evsel *tracking_evsel; int err; - u64 bit; sper->evlist = evlist; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == arm_spe_pmu->type) { - if (arm_spe_evsel) { - pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n"); + if (evsel__is_aux_event(evsel)) { + if (!strstarts(evsel->pmu_name, ARM_SPE_PMU_NAME)) { + pr_err("Found unexpected auxtrace event: %s\n", + evsel->pmu_name); return -EINVAL; } - evsel->core.attr.freq = 0; - evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu); - evsel->needs_auxtrace_mmap = true; - arm_spe_evsel = evsel; opts->full_auxtrace = true; } } @@ -222,37 +257,11 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME, opts->auxtrace_snapshot_size); - /* - * To obtain the auxtrace buffer file descriptor, the auxtrace event - * must come first. - */ - evlist__to_front(evlist, arm_spe_evsel); - - /* - * In the case of per-cpu mmaps, sample CPU for AUX event; - * also enable the timestamp tracing for samples correlation. - */ - if (!perf_cpu_map__is_any_cpu_or_is_empty(cpus)) { - evsel__set_sample_bit(arm_spe_evsel, CPU); - evsel__set_config_if_unset(arm_spe_pmu, arm_spe_evsel, - "ts_enable", 1); + evlist__for_each_entry_safe(evlist, tmp, evsel) { + if (evsel__is_aux_event(evsel)) + arm_spe_setup_evsel(evsel, cpus); } - /* - * Set this only so that perf report knows that SPE generates memory info. It has no effect - * on the opening of the event or the SPE data produced. - */ - evsel__set_sample_bit(arm_spe_evsel, DATA_SRC); - - /* - * The PHYS_ADDR flag does not affect the driver behaviour, it is used to - * inform that the resulting output's SPE samples contain physical addresses - * where applicable. - */ - bit = perf_pmu__format_bits(arm_spe_pmu, "pa_enable"); - if (arm_spe_evsel->core.attr.config & bit) - evsel__set_sample_bit(arm_spe_evsel, PHYS_ADDR); - /* Add dummy event to keep tracking */ err = parse_event(evlist, "dummy:u"); if (err) @@ -301,12 +310,16 @@ static int arm_spe_snapshot_start(struct auxtrace_record *itr) struct arm_spe_recording *ptr = container_of(itr, struct arm_spe_recording, itr); struct evsel *evsel; + int ret = -EINVAL; evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->arm_spe_pmu->type) - return evsel__disable(evsel); + if (evsel__is_aux_event(evsel)) { + ret = evsel__disable(evsel); + if (ret < 0) + return ret; + } } - return -EINVAL; + return ret; } static int arm_spe_snapshot_finish(struct auxtrace_record *itr) @@ -314,12 +327,16 @@ static int arm_spe_snapshot_finish(struct auxtrace_record *itr) struct arm_spe_recording *ptr = container_of(itr, struct arm_spe_recording, itr); struct evsel *evsel; + int ret = -EINVAL; evlist__for_each_entry(ptr->evlist, evsel) { - if (evsel->core.attr.type == ptr->arm_spe_pmu->type) - return evsel__enable(evsel); + if (evsel__is_aux_event(evsel)) { + ret = evsel__enable(evsel); + if (ret < 0) + return ret; + } } - return -EINVAL; + return ret; } static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx) diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index 21cc7e4149f7..ab43b1ab51e3 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -5,7 +5,8 @@ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited */ -static int loongarch_call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +static int loongarch_call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) { char *c, *endptr, *tok, *name; struct map *map = ms->map; @@ -51,7 +52,8 @@ static struct ins_ops loongarch_call_ops = { .scnprintf = call__scnprintf, }; -static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms) +static int loongarch_jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms, + struct disasm_line *dl __maybe_unused) { struct map *map = ms->map; struct symbol *sym = ms->sym; diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c index a3f423c27cae..ede9eeade0ab 100644 --- a/tools/perf/arch/powerpc/annotate/instructions.c +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -49,12 +49,266 @@ static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, con return ops; } +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_21_30(R) (((R) >> 1) & 0x3ff) +#define PPC_22_30(R) (((R) >> 1) & 0x1ff) + +struct insn_offset { + const char *name; + int value; +}; + +/* + * There are memory instructions with opcode 31 which are + * of X Form, Example: + * ldx RT,RA,RB + * ______________________________________ + * | 31 | RT | RA | RB | 21 |/| + * -------------------------------------- + * 0 6 11 16 21 30 31 + * + * But all instructions with opcode 31 are not memory. + * Example: add RT,RA,RB + * + * Use bits 21 to 30 to check memory insns with 31 as opcode. + * In ins_array below, for ldx instruction: + * name => OP_31_XOP_LDX + * value => 21 + */ + +static struct insn_offset ins_array[] = { + { .name = "OP_31_XOP_LXSIWZX", .value = 12, }, + { .name = "OP_31_XOP_LWARX", .value = 20, }, + { .name = "OP_31_XOP_LDX", .value = 21, }, + { .name = "OP_31_XOP_LWZX", .value = 23, }, + { .name = "OP_31_XOP_LDUX", .value = 53, }, + { .name = "OP_31_XOP_LWZUX", .value = 55, }, + { .name = "OP_31_XOP_LXSIWAX", .value = 76, }, + { .name = "OP_31_XOP_LDARX", .value = 84, }, + { .name = "OP_31_XOP_LBZX", .value = 87, }, + { .name = "OP_31_XOP_LVX", .value = 103, }, + { .name = "OP_31_XOP_LBZUX", .value = 119, }, + { .name = "OP_31_XOP_STXSIWX", .value = 140, }, + { .name = "OP_31_XOP_STDX", .value = 149, }, + { .name = "OP_31_XOP_STWX", .value = 151, }, + { .name = "OP_31_XOP_STDUX", .value = 181, }, + { .name = "OP_31_XOP_STWUX", .value = 183, }, + { .name = "OP_31_XOP_STBX", .value = 215, }, + { .name = "OP_31_XOP_STVX", .value = 231, }, + { .name = "OP_31_XOP_STBUX", .value = 247, }, + { .name = "OP_31_XOP_LHZX", .value = 279, }, + { .name = "OP_31_XOP_LHZUX", .value = 311, }, + { .name = "OP_31_XOP_LXVDSX", .value = 332, }, + { .name = "OP_31_XOP_LWAX", .value = 341, }, + { .name = "OP_31_XOP_LHAX", .value = 343, }, + { .name = "OP_31_XOP_LWAUX", .value = 373, }, + { .name = "OP_31_XOP_LHAUX", .value = 375, }, + { .name = "OP_31_XOP_STHX", .value = 407, }, + { .name = "OP_31_XOP_STHUX", .value = 439, }, + { .name = "OP_31_XOP_LXSSPX", .value = 524, }, + { .name = "OP_31_XOP_LDBRX", .value = 532, }, + { .name = "OP_31_XOP_LSWX", .value = 533, }, + { .name = "OP_31_XOP_LWBRX", .value = 534, }, + { .name = "OP_31_XOP_LFSUX", .value = 567, }, + { .name = "OP_31_XOP_LXSDX", .value = 588, }, + { .name = "OP_31_XOP_LSWI", .value = 597, }, + { .name = "OP_31_XOP_LFDX", .value = 599, }, + { .name = "OP_31_XOP_LFDUX", .value = 631, }, + { .name = "OP_31_XOP_STXSSPX", .value = 652, }, + { .name = "OP_31_XOP_STDBRX", .value = 660, }, + { .name = "OP_31_XOP_STXWX", .value = 661, }, + { .name = "OP_31_XOP_STWBRX", .value = 662, }, + { .name = "OP_31_XOP_STFSX", .value = 663, }, + { .name = "OP_31_XOP_STFSUX", .value = 695, }, + { .name = "OP_31_XOP_STXSDX", .value = 716, }, + { .name = "OP_31_XOP_STSWI", .value = 725, }, + { .name = "OP_31_XOP_STFDX", .value = 727, }, + { .name = "OP_31_XOP_STFDUX", .value = 759, }, + { .name = "OP_31_XOP_LXVW4X", .value = 780, }, + { .name = "OP_31_XOP_LHBRX", .value = 790, }, + { .name = "OP_31_XOP_LXVD2X", .value = 844, }, + { .name = "OP_31_XOP_LFIWAX", .value = 855, }, + { .name = "OP_31_XOP_LFIWZX", .value = 887, }, + { .name = "OP_31_XOP_STXVW4X", .value = 908, }, + { .name = "OP_31_XOP_STHBRX", .value = 918, }, + { .name = "OP_31_XOP_STXVD2X", .value = 972, }, + { .name = "OP_31_XOP_STFIWX", .value = 983, }, +}; + +/* + * Arithmetic instructions which are having opcode as 31. + * These instructions are tracked to save the register state + * changes. Example: + * + * lwz r10,264(r3) + * add r31, r3, r3 + * lwz r9, 0(r31) + * + * Here instruction tracking needs to identify the "add" + * instruction and save data type of r3 to r31. If a sample + * is hit at next "lwz r9, 0(r31)", by this instruction tracking, + * data type of r31 can be resolved. + */ +static struct insn_offset arithmetic_ins_op_31[] = { + { .name = "SUB_CARRY_XO_FORM", .value = 8, }, + { .name = "MUL_HDW_XO_FORM1", .value = 9, }, + { .name = "ADD_CARRY_XO_FORM", .value = 10, }, + { .name = "MUL_HW_XO_FORM1", .value = 11, }, + { .name = "SUB_XO_FORM", .value = 40, }, + { .name = "MUL_HDW_XO_FORM", .value = 73, }, + { .name = "MUL_HW_XO_FORM", .value = 75, }, + { .name = "SUB_EXT_XO_FORM", .value = 136, }, + { .name = "ADD_EXT_XO_FORM", .value = 138, }, + { .name = "SUB_ZERO_EXT_XO_FORM", .value = 200, }, + { .name = "ADD_ZERO_EXT_XO_FORM", .value = 202, }, + { .name = "SUB_EXT_XO_FORM2", .value = 232, }, + { .name = "MUL_DW_XO_FORM", .value = 233, }, + { .name = "ADD_EXT_XO_FORM2", .value = 234, }, + { .name = "MUL_W_XO_FORM", .value = 235, }, + { .name = "ADD_XO_FORM", .value = 266, }, + { .name = "DIV_DW_XO_FORM1", .value = 457, }, + { .name = "DIV_W_XO_FORM1", .value = 459, }, + { .name = "DIV_DW_XO_FORM", .value = 489, }, + { .name = "DIV_W_XO_FORM", .value = 491, }, +}; + +static struct insn_offset arithmetic_two_ops[] = { + { .name = "mulli", .value = 7, }, + { .name = "subfic", .value = 8, }, + { .name = "addic", .value = 12, }, + { .name = "addic.", .value = 13, }, + { .name = "addi", .value = 14, }, + { .name = "addis", .value = 15, }, +}; + +static int cmp_offset(const void *a, const void *b) +{ + const struct insn_offset *val1 = a; + const struct insn_offset *val2 = b; + + return (val1->value - val2->value); +} + +static struct ins_ops *check_ppc_insn(struct disasm_line *dl) +{ + int raw_insn = dl->raw.raw_insn; + int opcode = PPC_OP(raw_insn); + int mem_insn_31 = PPC_21_30(raw_insn); + struct insn_offset *ret; + struct insn_offset mem_insns_31_opcode = { + "OP_31_INSN", + mem_insn_31 + }; + char name_insn[32]; + + /* + * Instructions with opcode 32 to 63 are memory + * instructions in powerpc + */ + if ((opcode & 0x20)) { + /* + * Set name in case of raw instruction to + * opcode to be used in insn-stat + */ + if (!strlen(dl->ins.name)) { + sprintf(name_insn, "%d", opcode); + dl->ins.name = strdup(name_insn); + } + return &load_store_ops; + } else if (opcode == 31) { + /* Check for memory instructions with opcode 31 */ + ret = bsearch(&mem_insns_31_opcode, ins_array, ARRAY_SIZE(ins_array), sizeof(ins_array[0]), cmp_offset); + if (ret) { + if (!strlen(dl->ins.name)) + dl->ins.name = strdup(ret->name); + return &load_store_ops; + } else { + mem_insns_31_opcode.value = PPC_22_30(raw_insn); + ret = bsearch(&mem_insns_31_opcode, arithmetic_ins_op_31, ARRAY_SIZE(arithmetic_ins_op_31), + sizeof(arithmetic_ins_op_31[0]), cmp_offset); + if (ret != NULL) + return &arithmetic_ops; + /* Bits 21 to 30 has value 444 for "mr" insn ie, OR X form */ + if (PPC_21_30(raw_insn) == 444) + return &arithmetic_ops; + } + } else { + mem_insns_31_opcode.value = opcode; + ret = bsearch(&mem_insns_31_opcode, arithmetic_two_ops, ARRAY_SIZE(arithmetic_two_ops), + sizeof(arithmetic_two_ops[0]), cmp_offset); + if (ret != NULL) + return &arithmetic_ops; + } + + return NULL; +} + +/* + * Instruction tracking function to track register state moves. + * Example sequence: + * ld r10,264(r3) + * mr r31,r3 + * <<after some sequence> + * ld r9,312(r31) + * + * Previous instruction sequence shows that register state of r3 + * is moved to r31. update_insn_state_powerpc tracks these state + * changes + */ +#ifdef HAVE_DWARF_SUPPORT +static void update_insn_state_powerpc(struct type_state *state, + struct data_loc_info *dloc, Dwarf_Die * cu_die __maybe_unused, + struct disasm_line *dl) +{ + struct annotated_insn_loc loc; + struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE]; + struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET]; + struct type_state_reg *tsr; + u32 insn_offset = dl->al.offset; + + if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0) + return; + + /* + * Value 444 for bits 21:30 is for "mr" + * instruction. "mr" is extended OR. So set the + * source and destination reg correctly + */ + if (PPC_21_30(dl->raw.raw_insn) == 444) { + int src_reg = src->reg1; + + src->reg1 = dst->reg1; + dst->reg1 = src_reg; + } + + if (!has_reg_type(state, dst->reg1)) + return; + + tsr = &state->regs[dst->reg1]; + + if (!has_reg_type(state, src->reg1) || + !state->regs[src->reg1].ok) { + tsr->ok = false; + return; + } + + tsr->type = state->regs[src->reg1].type; + tsr->kind = state->regs[src->reg1].kind; + tsr->ok = true; + + pr_debug_dtp("mov [%x] reg%d -> reg%d", + insn_offset, src->reg1, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); +} +#endif /* HAVE_DWARF_SUPPORT */ + static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { if (!arch->initialized) { arch->initialized = true; arch->associate_instruction_ops = powerpc__associate_instruction_ops; arch->objdump.comment_char = '#'; + annotate_opts.show_asm_raw = true; } return 0; diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c index 0c4f4caf53ac..104c7ae5c433 100644 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ b/tools/perf/arch/powerpc/util/dwarf-regs.c @@ -98,3 +98,56 @@ int regs_query_register_offset(const char *name) return roff->ptregs_offset; return -EINVAL; } + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 + +static int get_source_reg(u32 raw_insn) +{ + return PPC_RA(raw_insn); +} + +static int get_target_reg(u32 raw_insn) +{ + return PPC_RT(raw_insn); +} + +static int get_offset_opcode(u32 raw_insn) +{ + int opcode = PPC_OP(raw_insn); + + /* DS- form */ + if ((opcode == OP_LD) || (opcode == OP_STD)) + return PPC_DS(raw_insn); + else + return PPC_D(raw_insn); +} + +/* + * Fills the required fields for op_loc depending on if it + * is a source or target. + * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT + * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT + * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT + */ +void get_powerpc_regs(u32 raw_insn, int is_source, + struct annotated_op_loc *op_loc) +{ + if (is_source) + op_loc->reg1 = get_source_reg(raw_insn); + else + op_loc->reg1 = get_target_reg(raw_insn); + + if (op_loc->multi_regs) + op_loc->reg2 = PPC_RB(raw_insn); + + /* TODO: Implement offset handling for X Form */ + if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) + op_loc->offset = get_offset_opcode(raw_insn); +} diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index da5aa3e1f04c..eeac25cca699 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -2,7 +2,7 @@ #include <linux/compiler.h> static int s390_call__parse(struct arch *arch, struct ins_operands *ops, - struct map_symbol *ms) + struct map_symbol *ms, struct disasm_line *dl __maybe_unused) { char *endptr, *tok, *name; struct map *map = ms->map; @@ -52,7 +52,8 @@ static struct ins_ops s390_call_ops = { static int s390_mov__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, - struct map_symbol *ms __maybe_unused) + struct map_symbol *ms __maybe_unused, + struct disasm_line *dl __maybe_unused) { char *s = strchr(ops->raw, ','), *target, *endptr; diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5cdf457f5cbe..7b7d462c6c6b 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -206,3 +206,380 @@ static int x86__annotate_init(struct arch *arch, char *cpuid) arch->initialized = true; return err; } + +#ifdef HAVE_DWARF_SUPPORT +static void update_insn_state_x86(struct type_state *state, + struct data_loc_info *dloc, Dwarf_Die *cu_die, + struct disasm_line *dl) +{ + struct annotated_insn_loc loc; + struct annotated_op_loc *src = &loc.ops[INSN_OP_SOURCE]; + struct annotated_op_loc *dst = &loc.ops[INSN_OP_TARGET]; + struct type_state_reg *tsr; + Dwarf_Die type_die; + u32 insn_offset = dl->al.offset; + int fbreg = dloc->fbreg; + int fboff = 0; + + if (annotate_get_insn_location(dloc->arch, dl, &loc) < 0) + return; + + if (ins__is_call(&dl->ins)) { + struct symbol *func = dl->ops.target.sym; + + if (func == NULL) + return; + + /* __fentry__ will preserve all registers */ + if (!strcmp(func->name, "__fentry__")) + return; + + pr_debug_dtp("call [%x] %s\n", insn_offset, func->name); + + /* Otherwise invalidate caller-saved registers after call */ + for (unsigned i = 0; i < ARRAY_SIZE(state->regs); i++) { + if (state->regs[i].caller_saved) + state->regs[i].ok = false; + } + + /* Update register with the return type (if any) */ + if (die_find_func_rettype(cu_die, func->name, &type_die)) { + tsr = &state->regs[state->ret_reg]; + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + + pr_debug_dtp("call [%x] return -> reg%d", + insn_offset, state->ret_reg); + pr_debug_type_name(&type_die, tsr->kind); + } + return; + } + + if (!strncmp(dl->ins.name, "add", 3)) { + u64 imm_value = -1ULL; + int offset; + const char *var_name = NULL; + struct map_symbol *ms = dloc->ms; + u64 ip = ms->sym->start + dl->al.offset; + + if (!has_reg_type(state, dst->reg1)) + return; + + tsr = &state->regs[dst->reg1]; + + if (src->imm) + imm_value = src->offset; + else if (has_reg_type(state, src->reg1) && + state->regs[src->reg1].kind == TSR_KIND_CONST) + imm_value = state->regs[src->reg1].imm_value; + else if (src->reg1 == DWARF_REG_PC) { + u64 var_addr = annotate_calc_pcrel(dloc->ms, ip, + src->offset, dl); + + if (get_global_var_info(dloc, var_addr, + &var_name, &offset) && + !strcmp(var_name, "this_cpu_off") && + tsr->kind == TSR_KIND_CONST) { + tsr->kind = TSR_KIND_PERCPU_BASE; + imm_value = tsr->imm_value; + } + } + else + return; + + if (tsr->kind != TSR_KIND_PERCPU_BASE) + return; + + if (get_global_var_type(cu_die, dloc, ip, imm_value, &offset, + &type_die) && offset == 0) { + /* + * This is not a pointer type, but it should be treated + * as a pointer. + */ + tsr->type = type_die; + tsr->kind = TSR_KIND_POINTER; + tsr->ok = true; + + pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d", + insn_offset, imm_value, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); + } + return; + } + + if (strncmp(dl->ins.name, "mov", 3)) + return; + + if (dloc->fb_cfa) { + u64 ip = dloc->ms->sym->start + dl->al.offset; + u64 pc = map__rip_2objdump(dloc->ms->map, ip); + + if (die_get_cfa(dloc->di->dbg, pc, &fbreg, &fboff) < 0) + fbreg = -1; + } + + /* Case 1. register to register or segment:offset to register transfers */ + if (!src->mem_ref && !dst->mem_ref) { + if (!has_reg_type(state, dst->reg1)) + return; + + tsr = &state->regs[dst->reg1]; + if (dso__kernel(map__dso(dloc->ms->map)) && + src->segment == INSN_SEG_X86_GS && src->imm) { + u64 ip = dloc->ms->sym->start + dl->al.offset; + u64 var_addr; + int offset; + + /* + * In kernel, %gs points to a per-cpu region for the + * current CPU. Access with a constant offset should + * be treated as a global variable access. + */ + var_addr = src->offset; + + if (var_addr == 40) { + tsr->kind = TSR_KIND_CANARY; + tsr->ok = true; + + pr_debug_dtp("mov [%x] stack canary -> reg%d\n", + insn_offset, dst->reg1); + return; + } + + if (!get_global_var_type(cu_die, dloc, ip, var_addr, + &offset, &type_die) || + !die_get_member_type(&type_die, offset, &type_die)) { + tsr->ok = false; + return; + } + + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + + pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d", + insn_offset, var_addr, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); + return; + } + + if (src->imm) { + tsr->kind = TSR_KIND_CONST; + tsr->imm_value = src->offset; + tsr->ok = true; + + pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n", + insn_offset, tsr->imm_value, dst->reg1); + return; + } + + if (!has_reg_type(state, src->reg1) || + !state->regs[src->reg1].ok) { + tsr->ok = false; + return; + } + + tsr->type = state->regs[src->reg1].type; + tsr->kind = state->regs[src->reg1].kind; + tsr->ok = true; + + pr_debug_dtp("mov [%x] reg%d -> reg%d", + insn_offset, src->reg1, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); + } + /* Case 2. memory to register transers */ + if (src->mem_ref && !dst->mem_ref) { + int sreg = src->reg1; + + if (!has_reg_type(state, dst->reg1)) + return; + + tsr = &state->regs[dst->reg1]; + +retry: + /* Check stack variables with offset */ + if (sreg == fbreg) { + struct type_state_stack *stack; + int offset = src->offset - fboff; + + stack = find_stack_state(state, offset); + if (stack == NULL) { + tsr->ok = false; + return; + } else if (!stack->compound) { + tsr->type = stack->type; + tsr->kind = stack->kind; + tsr->ok = true; + } else if (die_get_member_type(&stack->type, + offset - stack->offset, + &type_die)) { + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + } else { + tsr->ok = false; + return; + } + + pr_debug_dtp("mov [%x] -%#x(stack) -> reg%d", + insn_offset, -offset, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); + } + /* And then dereference the pointer if it has one */ + else if (has_reg_type(state, sreg) && state->regs[sreg].ok && + state->regs[sreg].kind == TSR_KIND_TYPE && + die_deref_ptr_type(&state->regs[sreg].type, + src->offset, &type_die)) { + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + + pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d", + insn_offset, src->offset, sreg, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); + } + /* Or check if it's a global variable */ + else if (sreg == DWARF_REG_PC) { + struct map_symbol *ms = dloc->ms; + u64 ip = ms->sym->start + dl->al.offset; + u64 addr; + int offset; + + addr = annotate_calc_pcrel(ms, ip, src->offset, dl); + + if (!get_global_var_type(cu_die, dloc, ip, addr, &offset, + &type_die) || + !die_get_member_type(&type_die, offset, &type_die)) { + tsr->ok = false; + return; + } + + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + + pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d", + insn_offset, addr, dst->reg1); + pr_debug_type_name(&type_die, tsr->kind); + } + /* And check percpu access with base register */ + else if (has_reg_type(state, sreg) && + state->regs[sreg].kind == TSR_KIND_PERCPU_BASE) { + u64 ip = dloc->ms->sym->start + dl->al.offset; + u64 var_addr = src->offset; + int offset; + + if (src->multi_regs) { + int reg2 = (sreg == src->reg1) ? src->reg2 : src->reg1; + + if (has_reg_type(state, reg2) && state->regs[reg2].ok && + state->regs[reg2].kind == TSR_KIND_CONST) + var_addr += state->regs[reg2].imm_value; + } + + /* + * In kernel, %gs points to a per-cpu region for the + * current CPU. Access with a constant offset should + * be treated as a global variable access. + */ + if (get_global_var_type(cu_die, dloc, ip, var_addr, + &offset, &type_die) && + die_get_member_type(&type_die, offset, &type_die)) { + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + + if (src->multi_regs) { + pr_debug_dtp("mov [%x] percpu %#x(reg%d,reg%d) -> reg%d", + insn_offset, src->offset, src->reg1, + src->reg2, dst->reg1); + } else { + pr_debug_dtp("mov [%x] percpu %#x(reg%d) -> reg%d", + insn_offset, src->offset, sreg, dst->reg1); + } + pr_debug_type_name(&tsr->type, tsr->kind); + } else { + tsr->ok = false; + } + } + /* And then dereference the calculated pointer if it has one */ + else if (has_reg_type(state, sreg) && state->regs[sreg].ok && + state->regs[sreg].kind == TSR_KIND_POINTER && + die_get_member_type(&state->regs[sreg].type, + src->offset, &type_die)) { + tsr->type = type_die; + tsr->kind = TSR_KIND_TYPE; + tsr->ok = true; + + pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d", + insn_offset, src->offset, sreg, dst->reg1); + pr_debug_type_name(&tsr->type, tsr->kind); + } + /* Or try another register if any */ + else if (src->multi_regs && sreg == src->reg1 && + src->reg1 != src->reg2) { + sreg = src->reg2; + goto retry; + } + else { + int offset; + const char *var_name = NULL; + + /* it might be per-cpu variable (in kernel) access */ + if (src->offset < 0) { + if (get_global_var_info(dloc, (s64)src->offset, + &var_name, &offset) && + !strcmp(var_name, "__per_cpu_offset")) { + tsr->kind = TSR_KIND_PERCPU_BASE; + + pr_debug_dtp("mov [%x] percpu base reg%d\n", + insn_offset, dst->reg1); + } + } + + tsr->ok = false; + } + } + /* Case 3. register to memory transfers */ + if (!src->mem_ref && dst->mem_ref) { + if (!has_reg_type(state, src->reg1) || + !state->regs[src->reg1].ok) + return; + + /* Check stack variables with offset */ + if (dst->reg1 == fbreg) { + struct type_state_stack *stack; + int offset = dst->offset - fboff; + + tsr = &state->regs[src->reg1]; + + stack = find_stack_state(state, offset); + if (stack) { + /* + * The source register is likely to hold a type + * of member if it's a compound type. Do not + * update the stack variable type since we can + * get the member type later by using the + * die_get_member_type(). + */ + if (!stack->compound) + set_stack_state(stack, offset, tsr->kind, + &tsr->type); + } else { + findnew_stack_state(state, offset, tsr->kind, + &tsr->type); + } + + pr_debug_dtp("mov [%x] reg%d -> -%#x(stack)", + insn_offset, src->reg1, -offset); + pr_debug_type_name(&tsr->type, tsr->kind); + } + /* + * Ignore other transfers since it'd set a value in a struct + * and won't change the type. + */ + } + /* Case 4. memory to memory transfers (not handled for now) */ +} +#endif diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index e65b7dbe27fb..a0400707180c 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -15,7 +15,7 @@ #if defined(__x86_64__) struct perf_event__synthesize_extra_kmaps_cb_args { - struct perf_tool *tool; + const struct perf_tool *tool; perf_event__handler_t process; struct machine *machine; union perf_event *event; @@ -65,7 +65,7 @@ static int perf_event__synthesize_extra_kmaps_cb(struct map *map, void *data) return 0; } -int perf_event__synthesize_extra_kmaps(struct perf_tool *tool, +int perf_event__synthesize_extra_kmaps(const struct perf_tool *tool, perf_event__handler_t process, struct machine *machine) { diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index b1ce0c52d88d..cebdd483149e 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -89,6 +89,12 @@ int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) return 1; } + /* Retire latency event should not be group leader*/ + if (lhs->retire_lat && !rhs->retire_lat) + return 1; + if (!lhs->retire_lat && rhs->retire_lat) + return -1; + /* Default ordering by insertion index. */ return lhs->core.idx - rhs->core.idx; } |