summaryrefslogtreecommitdiff
path: root/tools/perf/util/intel-pt-decoder
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-27 13:42:32 -0700
commit7b58b82b86c8b65a2b57a4c6cb96a460654f9e09 (patch)
treea13e19f216389f16f1cb6641d54751f167482515 /tools/perf/util/intel-pt-decoder
parent02f9a04d76b76b80b05ddc33ceabe806b84fda3c (diff)
parentab0809af0bee88b689ba289ec8c40aa2be3a17ec (diff)
Merge tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools updates from Arnaldo Carvalho de Melo: "New features: perf ftrace: - Add -n/--use-nsec option to the 'latency' subcommand. Default: usecs: $ sudo perf ftrace latency -T dput -a sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 2098375 | ############################# | 1 - 2 us | 61 | | 2 - 4 us | 33 | | 4 - 8 us | 13 | | 8 - 16 us | 124 | | 16 - 32 us | 123 | | 32 - 64 us | 1 | | 64 - 128 us | 0 | | 128 - 256 us | 1 | | 256 - 512 us | 0 | | Better granularity with nsec: $ sudo perf ftrace latency -T dput -a -n sleep 1 # DURATION | COUNT | GRAPH | 0 - 1 us | 0 | | 1 - 2 ns | 0 | | 2 - 4 ns | 0 | | 4 - 8 ns | 0 | | 8 - 16 ns | 0 | | 16 - 32 ns | 0 | | 32 - 64 ns | 0 | | 64 - 128 ns | 1163434 | ############## | 128 - 256 ns | 914102 | ############# | 256 - 512 ns | 884 | | 512 - 1024 ns | 613 | | 1 - 2 us | 31 | | 2 - 4 us | 17 | | 4 - 8 us | 7 | | 8 - 16 us | 123 | | 16 - 32 us | 83 | | perf lock: - Add -c/--combine-locks option to merge lock instances in the same class into a single entry. # perf lock report -c Name acquired contended avg wait(ns) total wait(ns) max wait(ns) min wait(ns) rcu_read_lock 251225 0 0 0 0 0 hrtimer_bases.lock 39450 0 0 0 0 0 &sb->s_type->i_l... 10301 1 662 662 662 662 ptlock_ptr(page) 10173 2 701 1402 760 642 &(ei->i_block_re... 8732 0 0 0 0 0 &xa->xa_lock 8088 0 0 0 0 0 &base->lock 6705 0 0 0 0 0 &p->pi_lock 5549 0 0 0 0 0 &dentry->d_lockr... 5010 4 1274 5097 1844 789 &ep->lock 3958 0 0 0 0 0 - Add -F/--field option to customize the list of fields to output: $ perf lock report -F contended,wait_max -k avg_wait Name contended max wait(ns) avg wait(ns) slock-AF_INET6 1 23543 23543 &lruvec->lru_lock 5 18317 11254 slock-AF_INET6 1 10379 10379 rcu_node_1 1 2104 2104 &dentry->d_lockr... 1 1844 1844 &dentry->d_lockr... 1 1672 1672 &newf->file_lock 15 2279 1025 &dentry->d_lockr... 1 792 792 - Add --synth=no option for record, as there is no need to symbolize, lock names comes from the tracepoints. perf record: - Threaded recording, opt-in, via the new --threads command line option. - Improve AMD IBS (Instruction-Based Sampling) error handling messages. perf script: - Add 'brstackinsnlen' field (use it with -F) for branch stacks. - Output branch sample type in 'perf script'. perf report: - Add "addr_from" and "addr_to" sort dimensions. - Print branch stack entry type in 'perf report --dump-raw-trace' - Fix symbolization for chrooted workloads. Hardware tracing: Intel PT: - Add CFE (Control Flow Event) and EVD (Event Data) packets support. - Add MODE.Exec IFLAG bit support. Explanation about these features from the "Intel® 64 and IA-32 architectures software developer’s manual combined volumes: 1, 2A, 2B, 2C, 2D, 3A, 3B, 3C, 3D, and 4" PDF at: https://cdrdv2.intel.com/v1/dl/getContent/671200 At page 3951: "32.2.4 Event Trace is a capability that exposes details about the asynchronous events, when they are generated, and when their corresponding software event handler completes execution. These include: o Interrupts, including NMI and SMI, including the interrupt vector when defined. o Faults, exceptions including the fault vector. - Page faults additionally include the page fault address, when in context. o Event handler returns, including IRET and RSM. o VM exits and VM entries.¹ - VM exits include the values written to the “exit reason” and “exit qualification” VMCS fields. INIT and SIPI events. o TSX aborts, including the abort status returned for the RTM instructions. o Shutdown. Additionally, it provides indication of the status of the Interrupt Flag (IF), to indicate when interrupts are masked" ARM CoreSight: - Use advertised caps/min_interval as default sample_period on ARM spe. - Update deduction of TRCCONFIGR register for branch broadcast on ARM's CoreSight ETM. Vendor Events (JSON): Intel: - Update events and metrics for: Alderlake, Broadwell, Broadwell DE, BroadwellX, CascadelakeX, Elkhartlake, Bonnell, Goldmont, GoldmontPlus, Westmere EP-DP, Haswell, HaswellX, Icelake, IcelakeX, Ivybridge, Ivytown, Jaketown, Knights Landing, Nehalem EP, Sandybridge, Silvermont, Skylake, Skylake Server, SkylakeX, Tigerlake, TremontX, Westmere EP-SP, and Westmere EX. ARM: - Add support for HiSilicon CPA PMU aliasing. perf stat: - Fix forked applications enablement of counters. - The 'slots' should only be printed on a different order than the one specified on the command line when 'topdown' events are present, fix it. Miscellaneous: - Sync msr-index, cpufeatures header files with the kernel sources. - Stop using some deprecated libbpf APIs in 'perf trace'. - Fix some spelling mistakes. - Refactor the maps pointers usage to pave the way for using refcount debugging. - Only offer the --tui option on perf top, report and annotate when perf was built with libslang. - Don't mention --to-ctf in 'perf data --help' when not linking with the required library, libbabeltrace. - Use ARRAY_SIZE() instead of ad hoc equivalent, spotted by array_size.cocci. - Enhance the matching of sub-commands abbreviations: 'perf c2c rec' -> 'perf c2c record' 'perf c2c recport -> error - Set build-id using build-id header on new mmap records. - Fix generation of 'perf --version' string. perf test: - Add test for the arm_spe event. - Add test to check unwinding using fame-pointer (fp) mode on arm64. - Make metric testing more robust in 'perf test'. - Add error message for unsupported branch stack cases. libperf: - Add API for allocating new thread map array. - Fix typo in perf_evlist__open() failure error messages in libperf tests. perf c2c: - Replace bitmap_weight() with bitmap_empty() where appropriate" * tag 'perf-tools-for-v5.18-2022-03-26' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (143 commits) perf evsel: Improve AMD IBS (Instruction-Based Sampling) error handling messages perf python: Add perf_env stubs that will be needed in evsel__open_strerror() perf tools: Enhance the matching of sub-commands abbreviations libperf tests: Fix typo in perf_evlist__open() failure error messages tools arm64: Import cputype.h perf lock: Add -F/--field option to control output perf lock: Extend struct lock_key to have print function perf lock: Add --synth=no option for record tools headers cpufeatures: Sync with the kernel sources tools headers cpufeatures: Sync with the kernel sources perf stat: Fix forked applications enablement of counters tools arch x86: Sync the msr-index.h copy with the kernel sources perf evsel: Make evsel__env() always return a valid env perf build-id: Fix spelling mistake "Cant" -> "Can't" perf header: Fix spelling mistake "could't" -> "couldn't" perf script: Add 'brstackinsnlen' for branch stacks perf parse-events: Move slots only with topdown perf ftrace latency: Update documentation perf ftrace latency: Add -n/--use-nsec option perf tools: Fix version kernel tag ...
Diffstat (limited to 'tools/perf/util/intel-pt-decoder')
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c245
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h21
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c47
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h3
4 files changed, 290 insertions, 26 deletions
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 0e013c2d9eb4..e1d8f7504cbe 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -144,6 +144,8 @@ struct intel_pt_decoder {
bool vm_tm_corr_continuous;
bool nr;
bool next_nr;
+ bool iflag;
+ bool next_iflag;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -213,6 +215,9 @@ struct intel_pt_decoder {
bool set_fup_pwre;
bool set_fup_exstop;
bool set_fup_bep;
+ bool set_fup_cfe_ip;
+ bool set_fup_cfe;
+ bool set_fup_mode_exec;
bool sample_cyc;
unsigned int fup_tx_flags;
unsigned int tx_flags;
@@ -223,6 +228,7 @@ struct intel_pt_decoder {
uint64_t timestamp_insn_cnt;
uint64_t sample_insn_cnt;
uint64_t stuck_ip;
+ struct intel_pt_pkt fup_cfe_pkt;
int max_loops;
int no_progress;
int stuck_ip_prd;
@@ -231,6 +237,8 @@ struct intel_pt_decoder {
const unsigned char *next_buf;
size_t next_len;
unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+ int evd_cnt;
+ struct intel_pt_evd evd[INTEL_PT_MAX_EVDS];
};
static uint64_t intel_pt_lower_power_of_2(uint64_t x)
@@ -820,6 +828,9 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
return 0;
case INTEL_PT_MTC:
@@ -1094,6 +1105,52 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
decoder->state.type |= INTEL_PT_INSTRUCTION;
}
+/*
+ * Sample FUP instruction at the same time as reporting the FUP event, so the
+ * instruction sample gets the same flags as the FUP event.
+ */
+static void intel_pt_sample_fup_insn(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ decoder->state.insn_op = INTEL_PT_OP_OTHER;
+ decoder->state.insn_len = 0;
+
+ if (!decoder->branch_enable || !decoder->pge || decoder->hop ||
+ decoder->ip != decoder->last_ip)
+ return;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+ if (max_insn_cnt != 1)
+ return;
+
+ err = decoder->walk_insn(&intel_pt_insn, &insn_cnt, &decoder->ip,
+ 0, max_insn_cnt, decoder->data);
+ /* Ignore error, it will be reported next walk anyway */
+ if (err)
+ return;
+
+ if (intel_pt_insn.branch != INTEL_PT_BR_NO_BRANCH) {
+ intel_pt_log_at("ERROR: Unexpected branch at FUP instruction", decoder->ip);
+ return;
+ }
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ intel_pt_sample_insn(decoder);
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ decoder->ip += intel_pt_insn.length;
+}
+
static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
struct intel_pt_insn *intel_pt_insn, uint64_t ip)
{
@@ -1203,13 +1260,85 @@ out_no_progress:
return err;
}
-static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+static void intel_pt_mode_exec_status(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->iflag = iflag;
+ decoder->next_iflag = iflag;
+ decoder->state.from_iflag = iflag;
+ decoder->state.to_iflag = iflag;
+}
+
+static void intel_pt_mode_exec(struct intel_pt_decoder *decoder)
+{
+ bool iflag = decoder->packet.count & INTEL_PT_IFLAG;
+
+ decoder->exec_mode = decoder->packet.payload;
+ decoder->next_iflag = iflag;
+}
+
+static void intel_pt_sample_iflag(struct intel_pt_decoder *decoder)
+{
+ decoder->state.type |= INTEL_PT_IFLAG_CHG;
+ decoder->state.from_iflag = decoder->iflag;
+ decoder->state.to_iflag = decoder->next_iflag;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static void intel_pt_sample_iflag_chg(struct intel_pt_decoder *decoder)
+{
+ if (decoder->iflag != decoder->next_iflag)
+ intel_pt_sample_iflag(decoder);
+}
+
+static void intel_pt_clear_fup_event(struct intel_pt_decoder *decoder)
+{
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+ decoder->set_fup_bep = false;
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->evd_cnt = 0;
+ decoder->set_fup_mode_exec = false;
+ decoder->iflag = decoder->next_iflag;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder, bool no_tip)
{
enum intel_pt_sample_type type = decoder->state.type;
+ bool sample_fup_insn = false;
bool ret = false;
decoder->state.type &= ~INTEL_PT_BRANCH;
+ if (decoder->set_fup_cfe_ip || decoder->set_fup_cfe) {
+ bool ip = decoder->set_fup_cfe_ip;
+
+ decoder->set_fup_cfe_ip = false;
+ decoder->set_fup_cfe = false;
+ decoder->state.type |= INTEL_PT_EVT;
+ if (!ip && decoder->pge)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.cfe_type = decoder->fup_cfe_pkt.count;
+ decoder->state.cfe_vector = decoder->fup_cfe_pkt.payload;
+ decoder->state.evd_cnt = decoder->evd_cnt;
+ decoder->state.evd = decoder->evd;
+ decoder->evd_cnt = 0;
+ if (ip || decoder->pge)
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ ret = true;
+ }
+ if (decoder->set_fup_mode_exec) {
+ decoder->set_fup_mode_exec = false;
+ intel_pt_sample_iflag(decoder);
+ sample_fup_insn = no_tip;
+ ret = true;
+ }
if (decoder->set_fup_tx_flags) {
decoder->set_fup_tx_flags = false;
decoder->tx_flags = decoder->fup_tx_flags;
@@ -1266,6 +1395,8 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
if (ret) {
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
+ if (sample_fup_insn)
+ intel_pt_sample_fup_insn(decoder);
} else {
decoder->state.type = type;
}
@@ -1298,7 +1429,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
- if (intel_pt_fup_event(decoder) && no_tip)
+ if (intel_pt_fup_event(decoder, no_tip) && no_tip)
return 0;
return -EAGAIN;
}
@@ -1350,6 +1481,7 @@ static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
return err;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
@@ -1464,6 +1596,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->last_ip;
decoder->ip = decoder->last_ip;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return 0;
}
@@ -1527,6 +1660,19 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
return 0;
}
+static int intel_pt_evd(struct intel_pt_decoder *decoder)
+{
+ if (decoder->evd_cnt >= INTEL_PT_MAX_EVDS) {
+ intel_pt_log_at("ERROR: Too many EVD packets", decoder->pos);
+ return -ENOSYS;
+ }
+ decoder->evd[decoder->evd_cnt++] = (struct intel_pt_evd){
+ .type = decoder->packet.count,
+ .payload = decoder->packet.payload,
+ };
+ return 0;
+}
+
static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
{
timestamp |= (ref_timestamp & (0xffULL << 56));
@@ -1620,12 +1766,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
decoder->state.from_ip = decoder->ip;
decoder->ip = 0;
decoder->pge = false;
- decoder->set_fup_tx_flags = false;
- decoder->set_fup_ptw = false;
- decoder->set_fup_mwait = false;
- decoder->set_fup_pwre = false;
- decoder->set_fup_exstop = false;
- decoder->set_fup_bep = false;
+ intel_pt_clear_fup_event(decoder);
decoder->overflow = true;
return -EOVERFLOW;
}
@@ -1873,6 +2014,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
decoder->have_tma = false;
intel_pt_log("ERROR: Unexpected packet\n");
err = -EAGAIN;
@@ -1895,7 +2039,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_PIP:
@@ -1975,6 +2119,9 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
intel_pt_log("ERROR: Missing TIP after FUP\n");
decoder->pkt_state = INTEL_PT_STATE_ERR3;
decoder->pkt_step = 0;
@@ -2026,6 +2173,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
decoder->state.to_ip = decoder->ip;
}
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return 0;
case INTEL_PT_PIP:
@@ -2043,7 +2191,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec(decoder);
break;
case INTEL_PT_VMCS:
@@ -2134,6 +2282,9 @@ static int intel_pt_vm_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_TIP:
case INTEL_PT_PSB:
case INTEL_PT_TRACESTOP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
return 1;
}
@@ -2653,6 +2804,9 @@ static int intel_pt_vm_time_correlation(struct intel_pt_decoder *decoder)
decoder->blk_type = 0;
break;
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
case INTEL_PT_MODE_EXEC:
case INTEL_PT_MODE_TSX:
case INTEL_PT_MNT:
@@ -2719,6 +2873,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
intel_pt_update_nr(decoder);
+ intel_pt_sample_iflag_chg(decoder);
return HOP_RETURN;
case INTEL_PT_FUP:
@@ -2733,10 +2888,10 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
decoder->state.type = INTEL_PT_INSTRUCTION;
decoder->state.from_ip = decoder->ip;
decoder->state.to_ip = 0;
- intel_pt_fup_event(decoder);
+ intel_pt_fup_event(decoder, *no_tip);
return HOP_RETURN;
}
- intel_pt_fup_event(decoder);
+ intel_pt_fup_event(decoder, *no_tip);
decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH;
*err = intel_pt_walk_fup_tip(decoder);
if (!*err && decoder->state.to_ip)
@@ -2789,6 +2944,9 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
return HOP_PROCESS;
}
@@ -2857,6 +3015,9 @@ static int intel_pt_psb_lookahead_cb(struct intel_pt_pkt_info *pkt_info)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
if (data->after_psbend) {
data->after_psbend -= 1;
if (!data->after_psbend)
@@ -2994,6 +3155,7 @@ next:
decoder->pos);
break;
}
+ intel_pt_sample_iflag_chg(decoder);
intel_pt_set_ip(decoder);
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
@@ -3026,7 +3188,7 @@ next:
intel_pt_set_last_ip(decoder);
if (!decoder->branch_enable || !decoder->pge) {
decoder->ip = decoder->last_ip;
- if (intel_pt_fup_event(decoder))
+ if (intel_pt_fup_event(decoder, no_tip))
return 0;
no_tip = false;
break;
@@ -3108,8 +3270,15 @@ next:
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
- break;
+ intel_pt_mode_exec(decoder);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_mode_exec = true;
+ no_tip = true;
+ }
+ goto next;
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
@@ -3223,6 +3392,35 @@ next:
}
goto next;
+ case INTEL_PT_CFE:
+ decoder->fup_cfe_pkt = decoder->packet;
+ decoder->set_fup_cfe = true;
+ if (!decoder->pge) {
+ intel_pt_fup_event(decoder, true);
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_CFE_IP:
+ decoder->fup_cfe_pkt = decoder->packet;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_cfe_ip = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after CFE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EVD:
+ err = intel_pt_evd(decoder);
+ if (err)
+ return err;
+ break;
+
default:
return intel_pt_bug(decoder);
}
@@ -3265,6 +3463,9 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
intel_pt_log("ERROR: Unexpected packet\n");
err = -ENOENT;
goto out;
@@ -3307,7 +3508,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -3426,7 +3627,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
break;
case INTEL_PT_MODE_EXEC:
- decoder->exec_mode = decoder->packet.payload;
+ intel_pt_mode_exec_status(decoder);
break;
case INTEL_PT_MODE_TSX:
@@ -3476,6 +3677,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
case INTEL_PT_BIP:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
default:
break;
}
@@ -3486,12 +3690,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
{
int err;
- decoder->set_fup_tx_flags = false;
- decoder->set_fup_ptw = false;
- decoder->set_fup_mwait = false;
- decoder->set_fup_pwre = false;
- decoder->set_fup_exstop = false;
- decoder->set_fup_bep = false;
+ intel_pt_clear_fup_event(decoder);
decoder->overflow = false;
if (!decoder->branch_enable) {
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 8fd68f7a0963..efb2cb3ae0ca 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -17,6 +17,7 @@
#define INTEL_PT_IN_TX (1 << 0)
#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_IFLAG (1 << 2)
#define INTEL_PT_ASYNC (1 << 2)
#define INTEL_PT_FUP_IP (1 << 3)
#define INTEL_PT_SAMPLE_IPC (1 << 4)
@@ -35,6 +36,8 @@ enum intel_pt_sample_type {
INTEL_PT_TRACE_END = 1 << 10,
INTEL_PT_BLK_ITEMS = 1 << 11,
INTEL_PT_PSB_EVT = 1 << 12,
+ INTEL_PT_EVT = 1 << 13,
+ INTEL_PT_IFLAG_CHG = 1 << 14,
};
enum intel_pt_period_type {
@@ -209,10 +212,24 @@ struct intel_pt_vmcs_info {
bool error_printed;
};
+/*
+ * Maximum number of event trace data in one go, assuming at most 1 per type
+ * and 6-bits of type in the EVD packet.
+ */
+#define INTEL_PT_MAX_EVDS 64
+
+/* Event trace data from EVD packet */
+struct intel_pt_evd {
+ int type;
+ uint64_t payload;
+};
+
struct intel_pt_state {
enum intel_pt_sample_type type;
bool from_nr;
bool to_nr;
+ bool from_iflag;
+ bool to_iflag;
int err;
uint64_t from_ip;
uint64_t to_ip;
@@ -234,6 +251,10 @@ struct intel_pt_state {
int insn_len;
char insn[INTEL_PT_INSN_BUF_SZ];
struct intel_pt_blk_items items;
+ int cfe_type;
+ int cfe_vector;
+ int evd_cnt;
+ struct intel_pt_evd *evd;
};
struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 4bd154848cad..18f97f43e01a 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -64,6 +64,9 @@ static const char * const packet_name[] = {
[INTEL_PT_BIP] = "BIP",
[INTEL_PT_BEP] = "BEP",
[INTEL_PT_BEP_IP] = "BEP",
+ [INTEL_PT_CFE] = "CFE",
+ [INTEL_PT_CFE_IP] = "CFE",
+ [INTEL_PT_EVD] = "EVD",
};
const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -197,8 +200,7 @@ static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
return INTEL_PT_NEED_MORE_BYTES;
packet->type = INTEL_PT_MNT;
memcpy_le64(&packet->payload, buf + 3, 8);
- return 11
-;
+ return 11;
}
static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
@@ -329,6 +331,29 @@ static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
return 2;
}
+static int intel_pt_get_cfe(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = buf[2] & 0x80 ? INTEL_PT_CFE_IP : INTEL_PT_CFE;
+ packet->count = buf[2] & 0x1f;
+ packet->payload = buf[3];
+ return 4;
+}
+
+static int intel_pt_get_evd(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_EVD;
+ packet->count = buf[2] & 0x3f;
+ packet->payload = buf[3];
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11;
+}
+
static int intel_pt_get_ext(const unsigned char *buf, size_t len,
struct intel_pt_pkt *packet)
{
@@ -375,6 +400,10 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
return intel_pt_get_bep(len, packet);
case 0xb3: /* BEP with IP */
return intel_pt_get_bep_ip(len, packet);
+ case 0x13: /* CFE */
+ return intel_pt_get_cfe(buf, len, packet);
+ case 0x53: /* EVD */
+ return intel_pt_get_evd(buf, len, packet);
default:
return INTEL_PT_BAD_PACKET;
}
@@ -475,6 +504,7 @@ static int intel_pt_get_mode(const unsigned char *buf, size_t len,
switch (buf[1] >> 5) {
case 0:
packet->type = INTEL_PT_MODE_EXEC;
+ packet->count = buf[1];
switch (buf[1] & 3) {
case 0:
packet->payload = 16;
@@ -624,6 +654,9 @@ void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
case INTEL_PT_MWAIT:
case INTEL_PT_BEP:
case INTEL_PT_BEP_IP:
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ case INTEL_PT_EVD:
*ctx = INTEL_PT_NO_CTX;
break;
case INTEL_PT_BBP:
@@ -709,7 +742,8 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
(unsigned)payload, packet->count);
case INTEL_PT_MODE_EXEC:
- return snprintf(buf, buf_len, "%s %lld", name, payload);
+ return snprintf(buf, buf_len, "%s IF:%d %lld",
+ name, !!(packet->count & 4), payload);
case INTEL_PT_MODE_TSX:
return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
name, (unsigned)(payload >> 1) & 1,
@@ -751,6 +785,13 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
case INTEL_PT_BIP:
return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
name, packet->count, payload);
+ case INTEL_PT_CFE:
+ case INTEL_PT_CFE_IP:
+ return snprintf(buf, buf_len, "%s IP:%d Type 0x%02x Vector 0x%llx",
+ name, packet->type == INTEL_PT_CFE_IP, packet->count, payload);
+ case INTEL_PT_EVD:
+ return snprintf(buf, buf_len, "%s Type 0x%02x Payload 0x%llx",
+ name, packet->count, payload);
default:
break;
}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 996090cb84f6..496ba4be875c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -56,6 +56,9 @@ enum intel_pt_pkt_type {
INTEL_PT_BIP,
INTEL_PT_BEP,
INTEL_PT_BEP_IP,
+ INTEL_PT_CFE,
+ INTEL_PT_CFE_IP,
+ INTEL_PT_EVD,
};
struct intel_pt_pkt {