summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJason Liu <jason.hui.liu@nxp.com>2022-05-21 15:32:59 -0500
committerJason Liu <jason.hui.liu@nxp.com>2022-06-29 11:35:21 -0500
commiteba369f0f66db8e57d52d788f455ebf80b52efa1 (patch)
tree8fbf20f34a2ac5ae285d083dde786560b0969ee9 /tools
parent056b375c471639e87c8f5cb49aeebab87ff8015c (diff)
parent9f43e3ac7e662f352f829077723fa0b92ccaded1 (diff)
Merge tag 'v5.15.41' into lf-5.15.y
This is the 5.15.41 stable release * tag 'v5.15.41': (1977 commits) Linux 5.15.41 usb: gadget: uvc: allow for application to cleanly shutdown usb: gadget: uvc: rename function to be more consistent ... Signed-off-by: Jason Liu <jason.hui.liu@nxp.com> Conflicts: arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi arch/arm64/configs/defconfig drivers/clk/imx/clk-imx8qxp-lpcg.c drivers/dma/imx-sdma.c drivers/gpu/drm/bridge/nwl-dsi.c drivers/mailbox/imx-mailbox.c drivers/net/phy/at803x.c drivers/tty/serial/fsl_lpuart.c security/keys/trusted-keys/trusted_core.c
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/x86/include/asm/msr-index.h4
-rw-r--r--tools/arch/x86/lib/memcpy_64.S12
-rw-r--r--tools/arch/x86/lib/memset_64.S6
-rw-r--r--tools/bpf/bpftool/gen.c2
-rw-r--r--tools/bpf/bpftool/map.c6
-rw-r--r--tools/build/feature/Makefile9
-rw-r--r--tools/include/uapi/linux/bpf.h4
-rw-r--r--tools/lib/bpf/Makefile4
-rw-r--r--tools/lib/bpf/btf_dump.c11
-rw-r--r--tools/lib/bpf/libbpf.c3
-rw-r--r--tools/lib/bpf/netlink.c63
-rw-r--r--tools/lib/bpf/xsk.c11
-rw-r--r--tools/lib/perf/evlist.c3
-rw-r--r--tools/objtool/arch/x86/decode.c13
-rw-r--r--tools/objtool/builtin-check.c3
-rw-r--r--tools/objtool/check.c32
-rw-r--r--tools/objtool/elf.c189
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/builtin.h2
-rw-r--r--tools/objtool/include/objtool/elf.h4
-rw-r--r--tools/perf/Makefile.config6
-rw-r--r--tools/perf/arch/arm64/util/Build1
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c6
-rw-r--r--tools/perf/arch/arm64/util/machine.c28
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/machine.c25
-rw-r--r--tools/perf/arch/s390/util/machine.c16
-rw-r--r--tools/perf/builtin-report.c14
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/cache.json111
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json24
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json18
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/memory.json96
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json461
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json23
-rw-r--r--tools/perf/util/arm-spe.c3
-rw-r--r--tools/perf/util/parse-events.c5
-rw-r--r--tools/perf/util/session.c15
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/symbol.c37
-rw-r--r--tools/perf/util/symbol.h3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bind_perm.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_dummy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh5
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh10
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh2
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh4
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh62
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh4
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c19
-rw-r--r--tools/testing/selftests/bpf/xdping.c5
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c2
-rw-r--r--tools/testing/selftests/cgroup/test_core.c165
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh17
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h15
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c192
-rw-r--r--tools/testing/selftests/lkdtm/config1
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c25
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c6
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh3
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh19
-rw-r--r--tools/testing/selftests/net/so_txtime.c4
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_under_vrf.sh8
-rw-r--r--tools/testing/selftests/net/timestamping.c4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh4
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c10
-rw-r--r--tools/testing/selftests/sgx/Makefile2
-rw-r--r--tools/testing/selftests/vm/Makefile22
-rw-r--r--tools/testing/selftests/vm/mremap_test.c136
-rw-r--r--tools/testing/selftests/x86/Makefile6
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh2
-rw-r--r--tools/virtio/virtio_test.c1
94 files changed, 1621 insertions, 546 deletions
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index a7c413432b33..66ae309840fe 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -128,9 +128,9 @@
#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
-/* SRBDS support */
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
-#define RNGDS_MITG_DIS BIT(0)
+#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
+#define RTM_ALLOW BIT(1) /* TSX development mode */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 1cc9da6e29c7..59cf2343f3d9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
rep movsq
movl %edx, %ecx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
@@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
- ret
+ RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
@@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
@@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
@@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
- retq
+ RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
@@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movb %cl, (%rdi)
.Lend:
- retq
+ RET
SYM_FUNC_END(memcpy_orig)
.popsection
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index 9827ae267f96..d624f2bc42f1 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -40,7 +40,7 @@ SYM_FUNC_START(__memset)
movl %edx,%ecx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(__memset)
SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
@@ -63,7 +63,7 @@ SYM_FUNC_START_LOCAL(memset_erms)
movq %rdx,%rcx
rep stosb
movq %r9,%rax
- ret
+ RET
SYM_FUNC_END(memset_erms)
SYM_FUNC_START_LOCAL(memset_orig)
@@ -125,7 +125,7 @@ SYM_FUNC_START_LOCAL(memset_orig)
.Lende:
movq %r10,%rax
- ret
+ RET
.Lbad_alignment:
cmpq $7,%rdx
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index d40d92bbf0e4..07fa502a4ac1 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -870,7 +870,6 @@ static int do_skeleton(int argc, char **argv)
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
goto err; \n\
- obj->skeleton = s; \n\
\n\
s->sz = sizeof(*s); \n\
s->name = \"%1$s\"; \n\
@@ -955,6 +954,7 @@ static int do_skeleton(int argc, char **argv)
\n\
\"; \n\
\n\
+ obj->skeleton = s; \n\
return 0; \n\
err: \n\
bpf_object__destroy_skeleton(s); \n\
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 407071d54ab1..72ef9ddae260 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1042,11 +1042,9 @@ static void print_key_value(struct bpf_map_info *info, void *key,
json_writer_t *btf_wtr;
struct btf *btf;
- btf = btf__load_from_kernel_by_id(info->btf_id);
- if (libbpf_get_error(btf)) {
- p_err("failed to get btf");
+ btf = get_map_kv_btf(info);
+ if (libbpf_get_error(btf))
return;
- }
if (json_output) {
print_entry_json(info, key, value, btf);
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index e1e670014bd0..0e6d685b8617 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -213,9 +213,16 @@ strip-libs = $(filter-out -l%,$(1))
PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
-PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
+PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+ifeq ($(CC_NO_CLANG), 0)
+ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
+endif
+
$(OUTPUT)test-libperl.bin:
$(BUILD) $(FLAGS_PERL_EMBED)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 791f31dd0abe..e2c8f946c541 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2276,8 +2276,8 @@ union bpf_attr {
* Return
* The return value depends on the result of the test, and can be:
*
- * * 0, if current task belongs to the cgroup2.
- * * 1, if current task does not belong to the cgroup2.
+ * * 1, if current task belongs to the cgroup2.
+ * * 0, if current task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
* long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 74c3b73a5fbe..089b73b3cb37 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -126,7 +126,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -195,7 +195,7 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
sed 's/\[.*\]//' | \
- awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND|ABS/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 8c9325802793..841cc68e3f42 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -1481,6 +1481,11 @@ static const char *btf_dump_resolve_name(struct btf_dump *d, __u32 id,
if (s->name_resolved)
return *cached_name ? *cached_name : orig_name;
+ if (btf_is_fwd(t) || (btf_is_enum(t) && btf_vlen(t) == 0)) {
+ s->name_resolved = 1;
+ return orig_name;
+ }
+
dup_cnt = btf_dump_name_dups(d, name_map, orig_name);
if (dup_cnt > 1) {
const size_t max_len = 256;
@@ -1829,14 +1834,16 @@ static int btf_dump_array_data(struct btf_dump *d,
{
const struct btf_array *array = btf_array(t);
const struct btf_type *elem_type;
- __u32 i, elem_size = 0, elem_type_id;
+ __u32 i, elem_type_id;
+ __s64 elem_size;
bool is_array_member;
elem_type_id = array->type;
elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL);
elem_size = btf__resolve_size(d->btf, elem_type_id);
if (elem_size <= 0) {
- pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id);
+ pr_warn("unexpected elem size %zd for array type [%u]\n",
+ (ssize_t)elem_size, id);
return -EINVAL;
}
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0ad29203cbfb..693e14799fb9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10809,6 +10809,9 @@ void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
{
+ if (!s)
+ return;
+
if (s->progs)
bpf_object__detach_skeleton(s);
if (s->obj)
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 39f25e09b51e..fadde7d80a51 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -87,29 +87,75 @@ enum {
NL_DONE,
};
+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(sock, mhdr, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0)
+ return -errno;
+ return len;
+}
+
+static int alloc_iov(struct iovec *iov, int len)
+{
+ void *nbuf;
+
+ nbuf = realloc(iov->iov_base, len);
+ if (!nbuf)
+ return -ENOMEM;
+
+ iov->iov_base = nbuf;
+ iov->iov_len = len;
+ return 0;
+}
+
static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq,
__dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn,
void *cookie)
{
+ struct iovec iov = {};
+ struct msghdr mhdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
bool multipart = true;
struct nlmsgerr *err;
struct nlmsghdr *nh;
- char buf[4096];
int len, ret;
+ ret = alloc_iov(&iov, 4096);
+ if (ret)
+ goto done;
+
while (multipart) {
start:
multipart = false;
- len = recv(sock, buf, sizeof(buf), 0);
+ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len > iov.iov_len) {
+ ret = alloc_iov(&iov, len);
+ if (ret)
+ goto done;
+ }
+
+ len = netlink_recvmsg(sock, &mhdr, 0);
if (len < 0) {
- ret = -errno;
+ ret = len;
goto done;
}
if (len == 0)
break;
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
@@ -130,7 +176,8 @@ start:
libbpf_nla_dump_errormsg(nh);
goto done;
case NLMSG_DONE:
- return 0;
+ ret = 0;
+ goto done;
default:
break;
}
@@ -142,15 +189,17 @@ start:
case NL_NEXT:
goto start;
case NL_DONE:
- return 0;
+ ret = 0;
+ goto done;
default:
- return ret;
+ goto done;
}
}
}
}
ret = 0;
done:
+ free(iov.iov_base);
return ret;
}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e9b619aa0cdf..a27b3141463a 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -1210,12 +1210,23 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
int xsk_umem__delete(struct xsk_umem *umem)
{
+ struct xdp_mmap_offsets off;
+ int err;
+
if (!umem)
return 0;
if (umem->refcount)
return -EBUSY;
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err && umem->fill_save && umem->comp_save) {
+ munmap(umem->fill_save->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp_save->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
close(umem->fd);
free(umem);
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index e37dfad31383..5146ff0fa078 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->cpus;
- const struct perf_thread_map *threads = evlist->threads;
if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
@@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
perf_evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
- perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
+ perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
return -ENOMEM;
}
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 77b51600e3e9..63ffbc36dacc 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -529,6 +529,11 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
}
break;
+ case 0xcc:
+ /* int3 */
+ *type = INSN_TRAP;
+ break;
+
case 0xe3:
/* jecxz/jrcxz */
*type = INSN_JUMP_CONDITIONAL;
@@ -665,10 +670,10 @@ const char *arch_ret_insn(int len)
{
static const char ret[5][5] = {
{ BYTE_RET },
- { BYTE_RET, BYTES_NOP1 },
- { BYTE_RET, BYTES_NOP2 },
- { BYTE_RET, BYTES_NOP3 },
- { BYTE_RET, BYTES_NOP4 },
+ { BYTE_RET, 0xcc },
+ { BYTE_RET, 0xcc, BYTES_NOP1 },
+ { BYTE_RET, 0xcc, BYTES_NOP2 },
+ { BYTE_RET, 0xcc, BYTES_NOP3 },
};
if (len < 1 || len > 5) {
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 8b38b5d6fec7..38070f26105b 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -20,7 +20,7 @@
#include <objtool/objtool.h>
bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- validate_dup, vmlinux, mcount, noinstr, backup;
+ validate_dup, vmlinux, mcount, noinstr, backup, sls;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -45,6 +45,7 @@ const struct option check_options[] = {
OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
+ OPT_BOOLEAN('S', "sls", &sls, "validate straight-line-speculation"),
OPT_END(),
};
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 81982948f981..66c7c13098b3 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -393,12 +393,12 @@ static int add_dead_ends(struct objtool_file *file)
else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
- WARN("can't find unreachable insn at %s+0x%x",
+ WARN("can't find unreachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
- WARN("can't find unreachable insn at %s+0x%x",
+ WARN("can't find unreachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -428,12 +428,12 @@ reachable:
else if (reloc->addend == reloc->sym->sec->sh.sh_size) {
insn = find_last_insn(file, reloc->sym->sec);
if (!insn) {
- WARN("can't find reachable insn at %s+0x%x",
+ WARN("can't find reachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
} else {
- WARN("can't find reachable insn at %s+0x%x",
+ WARN("can't find reachable insn at %s+0x%lx",
reloc->sym->sec->name, reloc->addend);
return -1;
}
@@ -871,6 +871,16 @@ static void add_call_dest(struct objtool_file *file, struct instruction *insn,
: arch_nop_insn(insn->len));
insn->type = sibling ? INSN_RETURN : INSN_NOP;
+
+ if (sibling) {
+ /*
+ * We've replaced the tail-call JMP insn by two new
+ * insn: RET; INT3, except we only have a single struct
+ * insn here. Mark it retpoline_safe to avoid the SLS
+ * warning, instead of adding another insn.
+ */
+ insn->retpoline_safe = true;
+ }
}
if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) {
@@ -2776,6 +2786,12 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
switch (insn->type) {
case INSN_RETURN:
+ if (next_insn && next_insn->type == INSN_TRAP) {
+ next_insn->ignore = true;
+ } else if (sls && !insn->retpoline_safe) {
+ WARN_FUNC("missing int3 after ret",
+ insn->sec, insn->offset);
+ }
return validate_return(func, insn, &state);
case INSN_CALL:
@@ -2819,6 +2835,14 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
break;
case INSN_JUMP_DYNAMIC:
+ if (next_insn && next_insn->type == INSN_TRAP) {
+ next_insn->ignore = true;
+ } else if (sls && !insn->retpoline_safe) {
+ WARN_FUNC("missing int3 after indirect jump",
+ insn->sec, insn->offset);
+ }
+
+ /* fallthrough */
case INSN_JUMP_DYNAMIC_CONDITIONAL:
if (is_sibling_call(insn)) {
ret = validate_sibling_call(insn, &state);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index fee03b744a6e..a3395467c316 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -485,7 +485,7 @@ static struct section *elf_create_reloc_section(struct elf *elf,
int reltype);
int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, int addend)
+ unsigned int type, struct symbol *sym, long addend)
{
struct reloc *reloc;
@@ -514,37 +514,180 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
return 0;
}
-int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int type,
- struct section *insn_sec, unsigned long insn_off)
+/*
+ * Ensure that any reloc section containing references to @sym is marked
+ * changed such that it will get re-generated in elf_rebuild_reloc_sections()
+ * with the new symbol index.
+ */
+static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ struct reloc *reloc;
+
+ if (sec->changed)
+ continue;
+
+ list_for_each_entry(reloc, &sec->reloc_list, list) {
+ if (reloc->sym == sym) {
+ sec->changed = true;
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * Move the first global symbol, as per sh_info, into a new, higher symbol
+ * index. This fees up the shndx for a new local symbol.
+ */
+static int elf_move_global_symbol(struct elf *elf, struct section *symtab,
+ struct section *symtab_shndx)
{
+ Elf_Data *data, *shndx_data = NULL;
+ Elf32_Word first_non_local;
struct symbol *sym;
- int addend;
+ Elf_Scn *s;
- if (insn_sec->sym) {
- sym = insn_sec->sym;
- addend = insn_off;
+ first_non_local = symtab->sh.sh_info;
- } else {
- /*
- * The Clang assembler strips section symbols, so we have to
- * reference the function symbol instead:
- */
- sym = find_symbol_containing(insn_sec, insn_off);
- if (!sym) {
- /*
- * Hack alert. This happens when we need to reference
- * the NOP pad insn immediately after the function.
- */
- sym = find_symbol_containing(insn_sec, insn_off - 1);
+ sym = find_symbol_by_index(elf, first_non_local);
+ if (!sym) {
+ WARN("no non-local symbols !?");
+ return first_non_local;
+ }
+
+ s = elf_getscn(elf->elf, symtab->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ data->d_buf = &sym->sym;
+ data->d_size = sizeof(sym->sym);
+ data->d_align = 1;
+ data->d_type = ELF_T_SYM;
+
+ sym->idx = symtab->sh.sh_size / sizeof(sym->sym);
+ elf_dirty_reloc_sym(elf, sym);
+
+ symtab->sh.sh_info += 1;
+ symtab->sh.sh_size += data->d_size;
+ symtab->changed = true;
+
+ if (symtab_shndx) {
+ s = elf_getscn(elf->elf, symtab_shndx->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
}
- if (!sym) {
- WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
+ shndx_data = elf_newdata(s);
+ if (!shndx_data) {
+ WARN_ELF("elf_newshndx_data");
return -1;
}
- addend = insn_off - sym->offset;
+ shndx_data->d_buf = &sym->sec->idx;
+ shndx_data->d_size = sizeof(Elf32_Word);
+ shndx_data->d_align = 4;
+ shndx_data->d_type = ELF_T_WORD;
+
+ symtab_shndx->sh.sh_size += 4;
+ symtab_shndx->changed = true;
+ }
+
+ return first_non_local;
+}
+
+static struct symbol *
+elf_create_section_symbol(struct elf *elf, struct section *sec)
+{
+ struct section *symtab, *symtab_shndx;
+ Elf_Data *shndx_data = NULL;
+ struct symbol *sym;
+ Elf32_Word shndx;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (symtab) {
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx)
+ shndx_data = symtab_shndx->data;
+ } else {
+ WARN("no .symtab");
+ return NULL;
+ }
+
+ sym = malloc(sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(sym, 0, sizeof(*sym));
+
+ sym->idx = elf_move_global_symbol(elf, symtab, symtab_shndx);
+ if (sym->idx < 0) {
+ WARN("elf_move_global_symbol");
+ return NULL;
+ }
+
+ sym->name = sec->name;
+ sym->sec = sec;
+
+ // st_name 0
+ sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
+ // st_other 0
+ // st_value 0
+ // st_size 0
+ shndx = sec->idx;
+ if (shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) {
+ sym->sym.st_shndx = shndx;
+ if (!shndx_data)
+ shndx = 0;
+ } else {
+ sym->sym.st_shndx = SHN_XINDEX;
+ if (!shndx_data) {
+ WARN("no .symtab_shndx");
+ return NULL;
+ }
+ }
+
+ if (!gelf_update_symshndx(symtab->data, shndx_data, sym->idx, &sym->sym, shndx)) {
+ WARN_ELF("gelf_update_symshndx");
+ return NULL;
+ }
+
+ elf_add_symbol(elf, sym);
+
+ return sym;
+}
+
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off)
+{
+ struct symbol *sym = insn_sec->sym;
+ int addend = insn_off;
+
+ if (!sym) {
+ /*
+ * Due to how weak functions work, we must use section based
+ * relocations. Symbol based relocations would result in the
+ * weak and non-weak function annotations being overlaid on the
+ * non-weak function after linking.
+ */
+ sym = elf_create_section_symbol(elf, insn_sec);
+ if (!sym)
+ return -1;
+
+ insn_sec->sym = sym;
}
return elf_add_reloc(elf, sec, offset, type, sym, addend);
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 478e054fcdf7..9ca08d95e78e 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -26,6 +26,7 @@ enum insn_type {
INSN_CLAC,
INSN_STD,
INSN_CLD,
+ INSN_TRAP,
INSN_OTHER,
};
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 15ac0b7d3d6a..89ba869ed08f 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,7 +9,7 @@
extern const struct option check_options[];
extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
- validate_dup, vmlinux, mcount, noinstr, backup;
+ validate_dup, vmlinux, mcount, noinstr, backup, sls;
extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 075d8291b854..b4d01f8fd09b 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -69,7 +69,7 @@ struct reloc {
struct symbol *sym;
unsigned long offset;
unsigned int type;
- int addend;
+ long addend;
int idx;
bool jump_table_start;
};
@@ -131,7 +131,7 @@ struct elf *elf_open_read(const char *name, int flags);
struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
- unsigned int type, struct symbol *sym, int addend);
+ unsigned int type, struct symbol *sym, long addend);
int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int type,
struct section *insn_sec, unsigned long insn_off);
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 71772b20ea73..a92f0f025ec7 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -270,6 +270,9 @@ ifdef PYTHON_CONFIG
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+ ifeq ($(CC_NO_CLANG), 0)
+ PYTHON_EMBED_CCOPTS := $(filter-out -ffat-lto-objects, $(PYTHON_EMBED_CCOPTS))
+ endif
endif
FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
@@ -785,6 +788,9 @@ else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
CFLAGS += -DHAVE_LIBPERL_SUPPORT
+ ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -Wno-compound-token-split-by-macro
+ endif
$(call detected,CONFIG_LIBPERL)
endif
endif
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 9fcb4e68add9..78dfc282e5e2 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,5 +1,4 @@
perf-y += header.o
-perf-y += machine.o
perf-y += perf_regs.o
perf-y += tsc.o
perf-y += pmu.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index a4420d4df503..7d589a705fc8 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -154,6 +154,12 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
arm_spe_set_timestamp(itr, arm_spe_evsel);
}
+ /*
+ * Set this only so that perf report knows that SPE generates memory info. It has no effect
+ * on the opening of the event or the SPE data produced.
+ */
+ evsel__set_sample_bit(arm_spe_evsel, DATA_SRC);
+
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
if (err)
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
deleted file mode 100644
index 7e7714290a87..000000000000
--- a/tools/perf/arch/arm64/util/machine.c
+++ /dev/null
@@ -1,28 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include "debug.h"
-#include "symbol.h"
-
-/* On arm64, kernel text segment starts at high memory address,
- * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
- * address, like 0xffff 0000 00ax xxxx. When only small amount of
- * memory is used by modules, gap between end of module's text segment
- * and start of kernel text segment may reach 2G.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-#define SYMBOL_LIMIT (1 << 12) /* 4K */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
- (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
- /* Limit range of last symbol in module and kernel */
- p->end += SYMBOL_LIMIT;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 8a79c4126e5b..0115f3166568 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,4 @@
perf-y += header.o
-perf-y += machine.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.c
deleted file mode 100644
index e652a1aa8132..000000000000
--- a/tools/perf/arch/powerpc/util/machine.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <string.h>
-#include <internal/lib.h> // page_size
-#include "debug.h"
-#include "symbol.h"
-
-/* On powerpc kernel text segment start at memory addresses, 0xc000000000000000
- * whereas the modules are located at very high memory addresses,
- * for example 0xc00800000xxxxxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very high.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
- /* Limit the range of last kernel symbol */
- p->end += page_size;
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c
index 7644a4f6d4a4..98bc3f39d5f3 100644
--- a/tools/perf/arch/s390/util/machine.c
+++ b/tools/perf/arch/s390/util/machine.c
@@ -35,19 +35,3 @@ int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
return 0;
}
-
-/* On s390 kernel text segment start is located at very low memory addresses,
- * for example 0x10000. Modules are located at very high memory addresses,
- * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
- * and beginning of first module's text segment is very big.
- * Therefore do not fill this gap and do not assign it to the kernel dso map.
- */
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
- /* Last kernel symbol mapped to end of page */
- p->end = roundup(p->end, page_size);
- else
- p->end = c->start;
- pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
-}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 997e0a4b0902..6583ad9cc7de 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -349,6 +349,7 @@ static int report__setup_sample_type(struct report *rep)
struct perf_session *session = rep->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data__is_pipe(session->data);
+ struct evsel *evsel;
if (session->itrace_synth_opts->callchain ||
session->itrace_synth_opts->add_callchain ||
@@ -403,6 +404,19 @@ static int report__setup_sample_type(struct report *rep)
}
if (sort__mode == SORT_MODE__MEMORY) {
+ /*
+ * FIXUP: prior to kernel 5.18, Arm SPE missed to set
+ * PERF_SAMPLE_DATA_SRC bit in sample type. For backward
+ * compatibility, set the bit if it's an old perf data file.
+ */
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (strstr(evsel->name, "arm_spe") &&
+ !(sample_type & PERF_SAMPLE_DATA_SRC)) {
+ evsel->core.attr.sample_type |= PERF_SAMPLE_DATA_SRC;
+ sample_type |= PERF_SAMPLE_DATA_SRC;
+ }
+ }
+
if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
ui__error("Selected --mem-mode but no mem data. "
"Did you call perf record without -d?\n");
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 18b56256bb6f..cb3d81adf5ca 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -455,7 +455,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(DATA_SRC) &&
- evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f0ecfda34ece..1a194edb5452 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -956,10 +956,10 @@ try_again_reset:
* Enable counters and exec the command:
*/
if (forks) {
- evlist__start_workload(evsel_list);
err = enable_counters();
if (err)
return -1;
+ evlist__start_workload(evsel_list);
t0 = rdclock();
clock_gettime(CLOCK_MONOTONIC, &ref_time);
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 2f6b67189b42..6aae7b6c376b 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -434,7 +434,7 @@ void pthread__unblock_sigwinch(void)
static int libperf_print(enum libperf_print_level level,
const char *fmt, va_list ap)
{
- return eprintf(level, verbose, fmt, ap);
+ return veprintf(level, verbose, fmt, ap);
}
int main(int argc, const char **argv)
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
index 9ff67206ade4..821d2f2a8f25 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -315,6 +315,19 @@
"UMask": "0x82"
},
{
+ "BriefDescription": "All retired memory instructions.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xD0",
+ "EventName": "MEM_INST_RETIRED.ANY",
+ "L1_Hit_Indication": "1",
+ "PEBS": "1",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x83"
+ },
+ {
"BriefDescription": "Retired load instructions with locked access.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
@@ -358,6 +371,7 @@
"EventCode": "0xD0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
"PEBS": "1",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
"SampleAfterValue": "100003",
"UMask": "0x11"
},
@@ -370,6 +384,7 @@
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
"L1_Hit_Indication": "1",
"PEBS": "1",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
"SampleAfterValue": "100003",
"UMask": "0x12"
},
@@ -733,7 +748,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010491",
+ "MSRValue": "0x10491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -772,7 +787,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0491",
+ "MSRValue": "0x4003C0491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -785,7 +800,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0491",
+ "MSRValue": "0x1003C0491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -798,7 +813,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0491",
+ "MSRValue": "0x8003C0491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -811,7 +826,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010490",
+ "MSRValue": "0x10490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -850,7 +865,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0490",
+ "MSRValue": "0x4003C0490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -863,7 +878,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0490",
+ "MSRValue": "0x1003C0490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -876,7 +891,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0490",
+ "MSRValue": "0x8003C0490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -889,7 +904,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010120",
+ "MSRValue": "0x10120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -928,7 +943,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0120",
+ "MSRValue": "0x4003C0120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -941,7 +956,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0120",
+ "MSRValue": "0x1003C0120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -954,7 +969,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0120",
+ "MSRValue": "0x8003C0120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -967,7 +982,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010122",
+ "MSRValue": "0x10122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1006,7 +1021,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0122",
+ "MSRValue": "0x4003C0122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1019,7 +1034,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0122",
+ "MSRValue": "0x1003C0122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1032,7 +1047,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0122",
+ "MSRValue": "0x8003C0122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1045,7 +1060,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010004",
+ "MSRValue": "0x10004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1084,7 +1099,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0004",
+ "MSRValue": "0x4003C0004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1097,7 +1112,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0004",
+ "MSRValue": "0x1003C0004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1110,7 +1125,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0004",
+ "MSRValue": "0x8003C0004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1123,7 +1138,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010001",
+ "MSRValue": "0x10001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1162,7 +1177,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0001",
+ "MSRValue": "0x4003C0001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1175,7 +1190,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0001",
+ "MSRValue": "0x1003C0001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1188,7 +1203,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0001",
+ "MSRValue": "0x8003C0001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1201,7 +1216,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010002",
+ "MSRValue": "0x10002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1240,7 +1255,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0002",
+ "MSRValue": "0x4003C0002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1253,7 +1268,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0002",
+ "MSRValue": "0x1003C0002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1266,7 +1281,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0002",
+ "MSRValue": "0x8003C0002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1279,7 +1294,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010400",
+ "MSRValue": "0x10400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1318,7 +1333,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0400",
+ "MSRValue": "0x4003C0400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1331,7 +1346,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0400",
+ "MSRValue": "0x1003C0400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1344,7 +1359,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0400",
+ "MSRValue": "0x8003C0400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1357,7 +1372,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010010",
+ "MSRValue": "0x10010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1396,7 +1411,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0010",
+ "MSRValue": "0x4003C0010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1409,7 +1424,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0010",
+ "MSRValue": "0x1003C0010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1422,7 +1437,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0010",
+ "MSRValue": "0x8003C0010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1435,7 +1450,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010020",
+ "MSRValue": "0x10020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1474,7 +1489,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0020",
+ "MSRValue": "0x4003C0020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1487,7 +1502,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0020",
+ "MSRValue": "0x1003C0020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1500,7 +1515,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0020",
+ "MSRValue": "0x8003C0020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1513,7 +1528,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010080",
+ "MSRValue": "0x10080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1552,7 +1567,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0080",
+ "MSRValue": "0x4003C0080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1565,7 +1580,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0080",
+ "MSRValue": "0x1003C0080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1578,7 +1593,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0080",
+ "MSRValue": "0x8003C0080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1591,7 +1606,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0000010100",
+ "MSRValue": "0x10100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1630,7 +1645,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x04003C0100",
+ "MSRValue": "0x4003C0100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1643,7 +1658,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x01003C0100",
+ "MSRValue": "0x1003C0100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1656,7 +1671,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x08003C0100",
+ "MSRValue": "0x8003C0100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
index 503737ed3a83..9e873ab22450 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -1,73 +1,81 @@
[
{
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT14 RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "PublicDescription": "Counts once for most SIMD 128-bit packed computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
{
- "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instruction retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "PublicDescription": "Counts once for most SIMD 128-bit packed computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
{
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "PublicDescription": "Counts once for most SIMD 256-bit packed double computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
{
- "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "PublicDescription": "Counts once for most SIMD 256-bit packed single computational precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
{
- "BriefDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 8 calculations per element.",
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
{
- "BriefDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 16 calculations per element.",
+ "BriefDescription": "Counts number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
{
- "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "PublicDescription": "Counts once for most SIMD scalar computational double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
{
- "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
+ "BriefDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired. Counts twice for DPP and FM(N)ADD/SUB instructions retired.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3,4,5,6,7",
"EventCode": "0xC7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "PublicDescription": "Counts once for most SIMD scalar computational single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SIMD scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
index 078706a50091..ecce4273ae52 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -30,7 +30,21 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.",
+ "BriefDescription": "Retired Instructions who experienced DSB miss.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3",
+ "EventCode": "0xC6",
+ "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "MSRValue": "0x1",
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "SampleAfterValue": "100007",
+ "TakenAlone": "1",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
"Counter": "0,1,2,3",
"CounterHTOff": "0,1,2,3",
"EventCode": "0xC6",
@@ -38,7 +52,7 @@
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
"PEBS": "1",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
"SampleAfterValue": "100007",
"TakenAlone": "1",
"UMask": "0x1"
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
index 6f29b02fa320..60c286b4fe54 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -299,7 +299,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00491",
+ "MSRValue": "0x83FC00491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -312,7 +312,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00491",
+ "MSRValue": "0x63FC00491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -325,7 +325,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000491",
+ "MSRValue": "0x604000491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -338,7 +338,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800491",
+ "MSRValue": "0x63B800491",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -377,7 +377,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00490",
+ "MSRValue": "0x83FC00490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -390,7 +390,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00490",
+ "MSRValue": "0x63FC00490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -403,7 +403,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000490",
+ "MSRValue": "0x604000490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -416,7 +416,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800490",
+ "MSRValue": "0x63B800490",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -455,7 +455,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00120",
+ "MSRValue": "0x83FC00120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -468,7 +468,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00120",
+ "MSRValue": "0x63FC00120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -481,7 +481,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000120",
+ "MSRValue": "0x604000120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -494,7 +494,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800120",
+ "MSRValue": "0x63B800120",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -533,7 +533,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00122",
+ "MSRValue": "0x83FC00122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -546,7 +546,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00122",
+ "MSRValue": "0x63FC00122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -559,7 +559,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000122",
+ "MSRValue": "0x604000122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -572,7 +572,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800122",
+ "MSRValue": "0x63B800122",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -611,7 +611,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00004",
+ "MSRValue": "0x83FC00004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -624,7 +624,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00004",
+ "MSRValue": "0x63FC00004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -637,7 +637,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000004",
+ "MSRValue": "0x604000004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -650,7 +650,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800004",
+ "MSRValue": "0x63B800004",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -689,7 +689,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00001",
+ "MSRValue": "0x83FC00001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -702,7 +702,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00001",
+ "MSRValue": "0x63FC00001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -715,7 +715,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000001",
+ "MSRValue": "0x604000001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -728,7 +728,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800001",
+ "MSRValue": "0x63B800001",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -767,7 +767,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00002",
+ "MSRValue": "0x83FC00002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -780,7 +780,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00002",
+ "MSRValue": "0x63FC00002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -793,7 +793,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000002",
+ "MSRValue": "0x604000002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -806,7 +806,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800002",
+ "MSRValue": "0x63B800002",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -845,7 +845,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00400",
+ "MSRValue": "0x83FC00400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -858,7 +858,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00400",
+ "MSRValue": "0x63FC00400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -871,7 +871,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000400",
+ "MSRValue": "0x604000400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -884,7 +884,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800400",
+ "MSRValue": "0x63B800400",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -923,7 +923,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00010",
+ "MSRValue": "0x83FC00010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -936,7 +936,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00010",
+ "MSRValue": "0x63FC00010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -949,7 +949,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000010",
+ "MSRValue": "0x604000010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -962,7 +962,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800010",
+ "MSRValue": "0x63B800010",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1001,7 +1001,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00020",
+ "MSRValue": "0x83FC00020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1014,7 +1014,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00020",
+ "MSRValue": "0x63FC00020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1027,7 +1027,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000020",
+ "MSRValue": "0x604000020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1040,7 +1040,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800020",
+ "MSRValue": "0x63B800020",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1079,7 +1079,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00080",
+ "MSRValue": "0x83FC00080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1092,7 +1092,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00080",
+ "MSRValue": "0x63FC00080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1105,7 +1105,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000080",
+ "MSRValue": "0x604000080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1118,7 +1118,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800080",
+ "MSRValue": "0x63B800080",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1157,7 +1157,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x083FC00100",
+ "MSRValue": "0x83FC00100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1170,7 +1170,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063FC00100",
+ "MSRValue": "0x63FC00100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1183,7 +1183,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x0604000100",
+ "MSRValue": "0x604000100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
@@ -1196,7 +1196,7 @@
"EventCode": "0xB7, 0xBB",
"EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
"MSRIndex": "0x1a6,0x1a7",
- "MSRValue": "0x063B800100",
+ "MSRValue": "0x63B800100",
"Offcore": "1",
"PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
"SampleAfterValue": "100003",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
index ca5748120666..12eabae3e224 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -436,6 +436,17 @@
"SampleAfterValue": "2000003"
},
{
+ "BriefDescription": "Number of all retired NOP instructions.",
+ "Counter": "0,1,2,3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Errata": "SKL091, SKL044",
+ "EventCode": "0xC0",
+ "EventName": "INST_RETIRED.NOP",
+ "PEBS": "1",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x2"
+ },
+ {
"BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
"Counter": "1",
"CounterHTOff": "1",
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 863c9e103969..b016f7d1ff3d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -1,26 +1,167 @@
[
{
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Frontend_Bound",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Frontend_Bound_SMT",
+ "PublicDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend. Frontend denotes the first part of the processor core responsible to fetch operations that are executed later on by the Backend part. Within the Frontend; a branch predictor predicts the next address to fetch; cache-lines are fetched from the memory subsystem; parsed into instructions; and lastly decoded into micro-operations (uops). Ideally the Frontend can issue Machine_Width uops every cycle to the Backend. Frontend Bound denotes unutilized issue-slots when there is no Backend stall; i.e. bubbles where Frontend delivered no uops while Backend could have accepted them. For example; stalls due to instruction-cache misses would be categorized under Frontend Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Bad_Speculation",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Bad_Speculation_SMT",
+ "PublicDescription": "This category represents fraction of slots wasted due to incorrect speculations. This include slots used to issue uops that do not eventually get retired and slots for which the issue-pipeline was blocked due to recovery from earlier incorrect speculation. For example; wasted work due to miss-predicted branches are categorized under Bad Speculation category. Incorrect data speculation followed by Memory Ordering Nukes is another example. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
+ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Backend_Bound",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Backend_Bound_SMT",
+ "PublicDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend. Backend is the portion of the processor core where the out-of-order scheduler dispatches ready uops into their respective execution units; and once completed these uops get retired according to program order. For example; stalls due to data-cache misses or stalls due to the divider unit being overloaded are both categorized under Backend Bound. Backend Bound is further divided into two main categories: Memory Bound and Core Bound. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)",
+ "MetricGroup": "TopdownL1",
+ "MetricName": "Retiring",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. "
+ },
+ {
+ "BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))",
+ "MetricGroup": "TopdownL1_SMT",
+ "MetricName": "Retiring_SMT",
+ "PublicDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired. Ideally; all pipeline slots would be attributed to the Retiring category. Retiring of 100% would indicate the maximum Pipeline_Width throughput was achieved. Maximizing Retiring typically increases the Instructions-per-cycle (see IPC metric). Note that a high Retiring value does not necessary mean there is no room for more performance. For example; Heavy-operations or Microcode Assists are categorized under Retiring. They often indicate suboptimal performance and can often be optimized or avoided. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) )",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
+ "MetricName": "Mispredictions"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricExpr": "100 * ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) )",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts_SMT",
+ "MetricName": "Mispredictions_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (OFFCORE_REQUESTS_BUFFER.SQ_FULL / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
+ "MetricName": "Memory_Bandwidth"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( OFFCORE_REQUESTS_BUFFER.SQ_FULL / 2 ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) ) + ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( ((L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )) * cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ / CPU_CLK_UNHALTED.THREAD) / #(max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) ",
+ "MetricGroup": "Mem;MemoryBW;Offcore_SMT",
+ "MetricName": "Memory_Bandwidth_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) )",
+ "MetricGroup": "Mem;MemoryLat;Offcore",
+ "MetricName": "Memory_Latency"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Latency related bottlenecks (external memory and off-core caches)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( CPU_CLK_UNHALTED.THREAD , OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD ) / CPU_CLK_UNHALTED.THREAD - (min( CPU_CLK_UNHALTED.THREAD , cpu@OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD\\,cmask\\=4@ ) / CPU_CLK_UNHALTED.THREAD)) / #(CYCLE_ACTIVITY.STALLS_L3_MISS / CPU_CLK_UNHALTED.THREAD + (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD) - (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD))) ) + ( (( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( (20.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) - (3.5 * ((CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC) * msr@tsc@ / 1000000000 / duration_time)) ) * MEM_LOAD_RETIRED.L3_HIT * (1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) / 2) / CPU_CLK_UNHALTED.THREAD) / #(( CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS ) / CPU_CLK_UNHALTED.THREAD) ) + ( (( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) / ( (MEM_LOAD_RETIRED.L2_HIT * ( 1 + (MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) )) + cpu@L1D_PEND_MISS.FB_FULL\\,cmask\\=1@ ) ) * (( CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS ) / CPU_CLK_UNHALTED.THREAD)) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) )",
+ "MetricGroup": "Mem;MemoryLat;Offcore_SMT",
+ "MetricName": "Memory_Latency_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD)) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - ( UOPS_ISSUED.ANY + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / CPU_CLK_UNHALTED.THREAD) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "Memory_Data_TLBs"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricExpr": "100 * ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * ( ( (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) / ((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (min( 9 * cpu@DTLB_LOAD_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_LOAD_MISSES.WALK_ACTIVE , max( CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS , 0 ) ) / CPU_CLK_UNHALTED.THREAD) / (max( ( CYCLE_ACTIVITY.STALLS_MEM_ANY - CYCLE_ACTIVITY.STALLS_L1D_MISS ) / CPU_CLK_UNHALTED.THREAD , 0 )) ) + ( (EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) / #((( CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES ) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + (UOPS_RETIRED.RETIRE_SLOTS / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES)) * (1 - (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - ( UOPS_ISSUED.ANY + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) ) * ( (( 9 * cpu@DTLB_STORE_MISSES.STLB_HIT\\,cmask\\=1@ + DTLB_STORE_MISSES.WALK_ACTIVE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / #(EXE_ACTIVITY.BOUND_ON_STORES / CPU_CLK_UNHALTED.THREAD) ) ) ",
+ "MetricGroup": "Mem;MemoryTLB;_SMT",
+ "MetricName": "Memory_Data_TLBs_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * CPU_CLK_UNHALTED.THREAD))",
+ "MetricGroup": "Ret",
+ "MetricName": "Branching_Overhead"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of branch related instructions (used for program control-flow including function calls)",
+ "MetricExpr": "100 * (( BR_INST_RETIRED.CONDITIONAL + 3 * BR_INST_RETIRED.NEAR_CALL + (BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricGroup": "Ret_SMT",
+ "MetricName": "Branching_Overhead_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))",
+ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB",
+ "MetricName": "Big_Code"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricExpr": "100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))",
+ "MetricGroup": "BigFoot;Fed;Frontend;IcMiss;MemoryTLB_SMT",
+ "MetricName": "Big_Code_SMT"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
+ "MetricGroup": "Fed;FetchBW;Frontend",
+ "MetricName": "Instruction_Fetch_BW"
+ },
+ {
+ "BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks",
+ "MetricExpr": "100 * ( (IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) - (100 * (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ( (ICACHE_64B.IFTAG_STALL / CPU_CLK_UNHALTED.THREAD) + (( ICACHE_16B.IFDATA_STALL + 2 * cpu@ICACHE_16B.IFDATA_STALL\\,cmask\\=1\\,edge@ ) / CPU_CLK_UNHALTED.THREAD) + (9 * BACLEARS.ANY / CPU_CLK_UNHALTED.THREAD) ) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
+ "MetricGroup": "Fed;FetchBW;Frontend_SMT",
+ "MetricName": "Instruction_Fetch_BW_SMT"
+ },
+ {
"BriefDescription": "Instructions Per Cycle (per Logical Processor)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Summary",
+ "MetricGroup": "Ret;Summary",
"MetricName": "IPC"
},
{
"BriefDescription": "Uops Per Instruction",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
- "MetricGroup": "Pipeline;Retire",
+ "MetricGroup": "Pipeline;Ret;Retire",
"MetricName": "UPI"
},
{
"BriefDescription": "Instruction per taken branch",
- "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;FetchBW;PGO",
- "MetricName": "IpTB"
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;FetchBW",
+ "MetricName": "UpTB"
},
{
"BriefDescription": "Cycles Per Instruction (per Logical Processor)",
"MetricExpr": "1 / (INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD)",
- "MetricGroup": "Pipeline",
+ "MetricGroup": "Pipeline;Mem",
"MetricName": "CPI"
},
{
@@ -30,39 +171,84 @@
"MetricName": "CLKS"
},
{
- "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TmaL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots (per-Physical Core till ICL; per-Logical Processor ICL onward)",
+ "MetricExpr": "4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "TmaL1_SMT",
+ "MetricName": "SLOTS_SMT"
+ },
+ {
+ "BriefDescription": "The ratio of Executed- by Issued-Uops",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / UOPS_ISSUED.ANY",
+ "MetricGroup": "Cor;Pipeline",
+ "MetricName": "Execute_per_Issue",
+ "PublicDescription": "The ratio of Executed- by Issued-Uops. Ratio > 1 suggests high rate of uop micro-fusions. Ratio < 1 suggest high rate of \"execute\" at rename stage."
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "SMT;TmaL1",
+ "MetricGroup": "Ret;SMT;TmaL1",
"MetricName": "CoreIPC"
},
{
- "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "BriefDescription": "Instructions Per Cycle across hyper-threads (per physical core)",
"MetricExpr": "INST_RETIRED.ANY / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
- "MetricGroup": "SMT;TmaL1",
+ "MetricGroup": "Ret;SMT;TmaL1_SMT",
"MetricName": "CoreIPC_SMT"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
"MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / CPU_CLK_UNHALTED.THREAD",
- "MetricGroup": "Flops",
+ "MetricGroup": "Ret;Flops",
"MetricName": "FLOPc"
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
"MetricExpr": "( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
- "MetricGroup": "Flops_SMT",
+ "MetricGroup": "Ret;Flops_SMT",
"MetricName": "FLOPc_SMT"
},
{
+ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width)",
+ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
+ "MetricGroup": "Cor;Flops;HPC",
+ "MetricName": "FP_Arith_Utilization",
+ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting."
+ },
+ {
+ "BriefDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
+ "MetricGroup": "Cor;Flops;HPC_SMT",
+ "MetricName": "FP_Arith_Utilization_SMT",
+ "PublicDescription": "Actual per-core usage of the Floating Point execution units (regardless of the vector width). Values > 1 are possible due to Fused-Multiply Add (FMA) counting. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
"BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
"MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 ) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
- "MetricGroup": "Pipeline;PortsUtil",
+ "MetricGroup": "Backend;Cor;Pipeline;PortsUtil",
"MetricName": "ILP"
},
{
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES ) / (4 * CPU_CLK_UNHALTED.THREAD))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) ) * (4 * CPU_CLK_UNHALTED.THREAD) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts",
+ "MetricName": "Branch_Misprediction_Cost"
+ },
+ {
+ "BriefDescription": "Branch Misprediction Cost: Fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricExpr": " ( ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * (( UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * ( INT_MISC.RECOVERY_CYCLES_ANY / 2 ) ) / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) + (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * ((BR_MISP_RETIRED.ALL_BRANCHES / ( BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT )) * INT_MISC.CLEAR_RESTEER_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) ) * (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )) / BR_MISP_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;BrMispredicts_SMT",
+ "MetricName": "Branch_Misprediction_Cost_SMT"
+ },
+ {
"BriefDescription": "Number of Instructions per non-speculative Branch Misprediction (JEClear)",
"MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
- "MetricGroup": "BrMispredicts",
+ "MetricGroup": "Bad;BadSpec;BrMispredicts",
"MetricName": "IpMispredict"
},
{
@@ -86,122 +272,249 @@
{
"BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
- "MetricGroup": "Branches;InsType",
+ "MetricGroup": "Branches;Fed;InsType",
"MetricName": "IpBranch"
},
{
"BriefDescription": "Instructions per (near) call (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_CALL",
- "MetricGroup": "Branches",
+ "MetricGroup": "Branches;Fed;PGO",
"MetricName": "IpCall"
},
{
+ "BriefDescription": "Instruction per taken branch",
+ "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.NEAR_TAKEN",
+ "MetricGroup": "Branches;Fed;FetchBW;Frontend;PGO",
+ "MetricName": "IpTB"
+ },
+ {
"BriefDescription": "Branch instructions per taken branch. ",
"MetricExpr": "BR_INST_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.NEAR_TAKEN",
- "MetricGroup": "Branches;PGO",
+ "MetricGroup": "Branches;Fed;PGO",
"MetricName": "BpTkBranch"
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
"MetricExpr": "INST_RETIRED.ANY / ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
- "MetricGroup": "Flops;FpArith;InsType",
+ "MetricGroup": "Flops;InsType",
"MetricName": "IpFLOP"
},
{
+ "BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( (FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE) + (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) )",
+ "MetricGroup": "Flops;InsType",
+ "MetricName": "IpArith",
+ "PublicDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate). May undercount due to FMA double counting. Approximated prior to BDW."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "IpArith_Scalar_SP",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "MetricGroup": "Flops;FpScalar;InsType",
+ "MetricName": "IpArith_Scalar_DP",
+ "PublicDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE )",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "IpArith_AVX128",
+ "PublicDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "IpArith_AVX256",
+ "PublicDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
+ "BriefDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate)",
+ "MetricExpr": "INST_RETIRED.ANY / ( FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE )",
+ "MetricGroup": "Flops;FpVector;InsType",
+ "MetricName": "IpArith_AVX512",
+ "PublicDescription": "Instructions per FP Arithmetic AVX 512-bit instruction (lower number means higher occurrence rate). May undercount due to FMA double counting."
+ },
+ {
"BriefDescription": "Total number of retired Instructions, Sample with: INST_RETIRED.PREC_DIST",
"MetricExpr": "INST_RETIRED.ANY",
"MetricGroup": "Summary;TmaL1",
"MetricName": "Instructions"
},
{
+ "BriefDescription": "Average number of Uops issued by front-end when it issued something",
+ "MetricExpr": "UOPS_ISSUED.ANY / cpu@UOPS_ISSUED.ANY\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchBW",
+ "MetricName": "Fetch_UpC"
+ },
+ {
"BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded ICache; or Uop Cache)",
"MetricExpr": "IDQ.DSB_UOPS / (IDQ.DSB_UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS)",
- "MetricGroup": "DSB;FetchBW",
+ "MetricGroup": "DSB;Fed;FetchBW",
"MetricName": "DSB_Coverage"
},
{
- "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads (in core cycles)",
+ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
+ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / CPU_CLK_UNHALTED.THREAD / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * CPU_CLK_UNHALTED.THREAD)) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * CPU_CLK_UNHALTED.THREAD)))",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "DSB_Misses_Cost"
+ },
+ {
+ "BriefDescription": "Total penalty related to DSB (uop cache) misses - subset/see of/the Instruction_Fetch_BW Bottleneck.",
+ "MetricExpr": "(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) * (DSB2MITE_SWITCHES.PENALTY_CYCLES / CPU_CLK_UNHALTED.THREAD) / #(4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) + ((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )))) * (( IDQ.ALL_MITE_CYCLES_ANY_UOPS - IDQ.ALL_MITE_CYCLES_4_UOPS ) / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) / 2) / #((IDQ_UOPS_NOT_DELIVERED.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))) - (4 * IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE / (4 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ))))",
+ "MetricGroup": "DSBmiss;Fed_SMT",
+ "MetricName": "DSB_Misses_Cost_SMT"
+ },
+ {
+ "BriefDescription": "Number of Instructions per non-speculative DSB miss",
+ "MetricExpr": "INST_RETIRED.ANY / FRONTEND_RETIRED.ANY_DSB_MISS",
+ "MetricGroup": "DSBmiss;Fed",
+ "MetricName": "IpDSB_Miss_Ret"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are non-taken conditionals",
+ "MetricExpr": "BR_INST_RETIRED.NOT_TAKEN / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "Cond_NT"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are taken conditionals",
+ "MetricExpr": "( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches;CodeGen;PGO",
+ "MetricName": "Cond_TK"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are CALL or RET",
+ "MetricExpr": "( BR_INST_RETIRED.NEAR_CALL + BR_INST_RETIRED.NEAR_RETURN ) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "CallRet"
+ },
+ {
+ "BriefDescription": "Fraction of branches that are unconditional (direct or indirect) jumps",
+ "MetricExpr": "(BR_INST_RETIRED.NEAR_TAKEN - ( BR_INST_RETIRED.CONDITIONAL - BR_INST_RETIRED.NOT_TAKEN ) - 2 * BR_INST_RETIRED.NEAR_CALL) / BR_INST_RETIRED.ALL_BRANCHES",
+ "MetricGroup": "Bad;Branches",
+ "MetricName": "Jump"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles)",
"MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
- "MetricGroup": "MemoryBound;MemoryLat",
- "MetricName": "Load_Miss_Real_Latency"
+ "MetricGroup": "Mem;MemoryBound;MemoryLat",
+ "MetricName": "Load_Miss_Real_Latency",
+ "PublicDescription": "Actual Average Latency for L1 data-cache miss demand load instructions (in core cycles). Latency may be overestimated for multi-load instructions - e.g. repeat strings."
},
{
"BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least one such miss. Per-Logical Processor)",
"MetricExpr": "L1D_PEND_MISS.PENDING / L1D_PEND_MISS.PENDING_CYCLES",
- "MetricGroup": "MemoryBound;MemoryBW",
+ "MetricGroup": "Mem;MemoryBound;MemoryBW",
"MetricName": "MLP"
},
{
- "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
- "MetricConstraint": "NO_NMI_WATCHDOG",
- "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CORE_CLKS )",
- "MetricGroup": "MemoryTLB",
- "MetricName": "Page_Walks_Utilization"
- },
- {
"BriefDescription": "Average data fill bandwidth to the L1 data cache [GB / sec]",
"MetricExpr": "64 * L1D.REPLACEMENT / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW",
+ "MetricGroup": "Mem;MemoryBW",
"MetricName": "L1D_Cache_Fill_BW"
},
{
"BriefDescription": "Average data fill bandwidth to the L2 cache [GB / sec]",
"MetricExpr": "64 * L2_LINES_IN.ALL / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW",
+ "MetricGroup": "Mem;MemoryBW",
"MetricName": "L2_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data fill bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * LONGEST_LAT_CACHE.MISS / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW",
+ "MetricGroup": "Mem;MemoryBW",
"MetricName": "L3_Cache_Fill_BW"
},
{
"BriefDescription": "Average per-core data access bandwidth to the L3 cache [GB / sec]",
"MetricExpr": "64 * OFFCORE_REQUESTS.ALL_REQUESTS / 1000000000 / duration_time",
- "MetricGroup": "MemoryBW;Offcore",
+ "MetricGroup": "Mem;MemoryBW;Offcore",
"MetricName": "L3_Cache_Access_BW"
},
{
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L1_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;CacheMisses",
"MetricName": "L1MPKI"
},
{
+ "BriefDescription": "L1 cache true misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.ALL_DEMAND_DATA_RD / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L1MPKI_Load"
+ },
+ {
"BriefDescription": "L2 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L2_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;Backend;CacheMisses",
"MetricName": "L2MPKI"
},
{
"BriefDescription": "L2 cache misses per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * L2_RQSTS.MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses;Offcore",
+ "MetricGroup": "Mem;CacheMisses;Offcore",
"MetricName": "L2MPKI_All"
},
{
+ "BriefDescription": "L2 cache misses per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_MISS / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L2MPKI_Load"
+ },
+ {
"BriefDescription": "L2 cache hits per kilo instruction for all request types (including speculative)",
"MetricExpr": "1000 * ( L2_RQSTS.REFERENCES - L2_RQSTS.MISS ) / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;CacheMisses",
"MetricName": "L2HPKI_All"
},
{
+ "BriefDescription": "L2 cache hits per kilo instruction for all demand loads (including speculative)",
+ "MetricExpr": "1000 * L2_RQSTS.DEMAND_DATA_RD_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "L2HPKI_Load"
+ },
+ {
"BriefDescription": "L3 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1000 * MEM_LOAD_RETIRED.L3_MISS / INST_RETIRED.ANY",
- "MetricGroup": "CacheMisses",
+ "MetricGroup": "Mem;CacheMisses",
"MetricName": "L3MPKI"
},
{
+ "BriefDescription": "Fill Buffer (FB) true hits per kilo instructions for retired demand loads",
+ "MetricExpr": "1000 * MEM_LOAD_RETIRED.FB_HIT / INST_RETIRED.ANY",
+ "MetricGroup": "Mem;CacheMisses",
+ "MetricName": "FB_HPKI"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricConstraint": "NO_NMI_WATCHDOG",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * CPU_CLK_UNHALTED.THREAD )",
+ "MetricGroup": "Mem;MemoryTLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) ) )",
+ "MetricGroup": "Mem;MemoryTLB_SMT",
+ "MetricName": "Page_Walks_Utilization_SMT"
+ },
+ {
"BriefDescription": "Rate of silent evictions from the L2 cache per Kilo instruction where the evicted lines are dropped (no writeback to L3 or memory)",
"MetricExpr": "1000 * L2_LINES_OUT.SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Server",
+ "MetricGroup": "L2Evicts;Mem;Server",
"MetricName": "L2_Evictions_Silent_PKI"
},
{
"BriefDescription": "Rate of non silent evictions from the L2 cache per Kilo instruction",
"MetricExpr": "1000 * L2_LINES_OUT.NON_SILENT / INST_RETIRED.ANY",
- "MetricGroup": "L2Evicts;Server",
+ "MetricGroup": "L2Evicts;Mem;Server",
"MetricName": "L2_Evictions_NonSilent_PKI"
},
{
@@ -219,7 +532,7 @@
{
"BriefDescription": "Giga Floating Point Operations Per Second",
"MetricExpr": "( ( 1 * ( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * ( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8 * ( FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE ) + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE ) / 1000000000 ) / duration_time",
- "MetricGroup": "Flops;HPC",
+ "MetricGroup": "Cor;Flops;HPC",
"MetricName": "GFLOPs"
},
{
@@ -229,6 +542,48 @@
"MetricName": "Turbo_Utilization"
},
{
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Power",
+ "MetricName": "Power_License0_Utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "CORE_POWER.LVL0_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "Power_SMT",
+ "MetricName": "Power_License0_Utilization_SMT",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Power",
+ "MetricName": "Power_License1_Utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "CORE_POWER.LVL1_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "Power_SMT",
+ "MetricName": "Power_License1_Utilization_SMT",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX)",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Power",
+ "MetricName": "Power_License2_Utilization",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions."
+ },
+ {
+ "BriefDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). SMT version; use when SMT is enabled and measuring per logical CPU.",
+ "MetricExpr": "CORE_POWER.LVL2_TURBO_LICENSE / 2 / ( ( CPU_CLK_UNHALTED.THREAD / 2 ) * ( 1 + CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / CPU_CLK_UNHALTED.REF_XCLK ) )",
+ "MetricGroup": "Power_SMT",
+ "MetricName": "Power_License2_Utilization_SMT",
+ "PublicDescription": "Fraction of Core cycles where the core was running with power-delivery for license level 2 (introduced in SKX). This includes high current AVX 512-bit instructions. SMT version; use when SMT is enabled and measuring per logical CPU."
+ },
+ {
"BriefDescription": "Fraction of cycles where both hardware Logical Processors were active",
"MetricExpr": "1 - CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
"MetricGroup": "SMT",
@@ -241,33 +596,45 @@
"MetricName": "Kernel_Utilization"
},
{
+ "BriefDescription": "Cycles Per Instruction for the Operating System (OS) Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD_P:k / INST_RETIRED.ANY_P:k",
+ "MetricGroup": "OS",
+ "MetricName": "Kernel_CPI"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
- "MetricGroup": "HPC;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;SoC",
"MetricName": "DRAM_BW_Use"
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
"MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x35\\,umask\\=0x21\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
- "MetricGroup": "MemoryLat;SoC",
+ "MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "MEM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
"MetricExpr": "cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433@ / cha@event\\=0x36\\,umask\\=0x21\\,config\\=0x40433\\,thresh\\=1@",
- "MetricGroup": "MemoryBW;SoC",
+ "MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "MEM_Parallel_Reads"
},
{
+ "BriefDescription": "Average latency of data read request to external DRAM memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
+ "MetricExpr": "1000000000 * ( UNC_M_RPQ_OCCUPANCY / UNC_M_RPQ_INSERTS ) / imc_0@event\\=0x0@",
+ "MetricGroup": "Mem;MemoryLat;SoC;Server",
+ "MetricName": "MEM_DRAM_Read_Latency"
+ },
+ {
"BriefDescription": "Average IO (network or disk) Bandwidth Use for Writes [GB / sec]",
"MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3 ) * 4 / 1000000000 / duration_time",
- "MetricGroup": "IoBW;SoC;Server",
+ "MetricGroup": "IoBW;Mem;SoC;Server",
"MetricName": "IO_Write_BW"
},
{
"BriefDescription": "Average IO (network or disk) Bandwidth Use for Reads [GB / sec]",
"MetricExpr": "( UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3 ) * 4 / 1000000000 / duration_time",
- "MetricGroup": "IoBW;SoC;Server",
+ "MetricGroup": "IoBW;Mem;SoC;Server",
"MetricName": "IO_Read_BW"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
index 6ed92bc5c129..06c5ca26ca3f 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -538,6 +538,18 @@
"Unit": "IIO"
},
{
+ "BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC2",
+ "EventName": "UNC_IIO_COMP_BUF_INSERTS.CMPD.ALL_PARTS",
+ "FCMask": "0x4",
+ "PerPkg": "1",
+ "PortMask": "0x0f",
+ "PublicDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0-3",
+ "UMask": "0x03",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "PCIe Completion Buffer Inserts of completions with data: Part 0",
"Counter": "0,1,2,3",
"EventCode": "0xC2",
@@ -586,6 +598,17 @@
"Unit": "IIO"
},
{
+ "BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3",
+ "Counter": "2,3",
+ "EventCode": "0xD5",
+ "EventName": "UNC_IIO_COMP_BUF_OCCUPANCY.CMPD.ALL_PARTS",
+ "FCMask": "0x04",
+ "PerPkg": "1",
+ "PublicDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0-3",
+ "UMask": "0x0f",
+ "Unit": "IIO"
+ },
+ {
"BriefDescription": "PCIe Completion Buffer occupancy of completions with data: Part 0",
"Counter": "2,3",
"EventCode": "0xD5",
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 7054f23150e1..235549bb28b9 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -927,7 +927,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.type = PERF_TYPE_HARDWARE;
attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
- PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC;
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_ADDR;
if (spe->timeless_decoding)
attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
else
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 51a2219df601..3bfe099d8643 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1529,7 +1529,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
bool use_uncore_alias;
LIST_HEAD(config_terms);
- if (verbose > 1) {
+ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
+
+ if (verbose > 1 && !(pmu && pmu->selectable)) {
fprintf(stderr, "Attempting to add event pmu '%s' with '",
name);
if (head_config) {
@@ -1542,7 +1544,6 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
fprintf(stderr, "' that may result in non-fatal errors\n");
}
- pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
if (!pmu) {
char *err_str;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 352f16076e01..562e9b808027 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2076,6 +2076,7 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
bool needs_swap, union perf_event *error)
{
union perf_event *event;
+ u16 event_size;
/*
* Ensure we have enough space remaining to read
@@ -2088,15 +2089,23 @@ prefetch_event(char *buf, u64 head, size_t mmap_size,
if (needs_swap)
perf_event_header__bswap(&event->header);
- if (head + event->header.size <= mmap_size)
+ event_size = event->header.size;
+ if (head + event_size <= mmap_size)
return event;
/* We're not fetching the event so swap back again */
if (needs_swap)
perf_event_header__bswap(&event->header);
- pr_debug("%s: head=%#" PRIx64 " event->header_size=%#x, mmap_size=%#zx:"
- " fuzzed or compressed perf.data?\n",__func__, head, event->header.size, mmap_size);
+ /* Check if the event fits into the next mmapped buf. */
+ if (event_size <= mmap_size - head % page_size) {
+ /* Remap buf and fetch again. */
+ return NULL;
+ }
+
+ /* Invalid input. Event size should never exceed mmap_size. */
+ pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:"
+ " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
return error;
}
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 483f05004e68..c255a2c90cd6 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,12 +1,14 @@
-from os import getenv
+from os import getenv, path
from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
+src_feature_tests = getenv('srctree') + '/tools/build/feature'
def clang_has_option(option):
- return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
+ cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+ return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
if cc_is_clang:
from distutils.sysconfig import get_config_vars
@@ -23,6 +25,8 @@ if cc_is_clang:
vars[var] = sub("-fstack-protector-strong", "", vars[var])
if not clang_has_option("-fno-semantic-interposition"):
vars[var] = sub("-fno-semantic-interposition", "", vars[var])
+ if not clang_has_option("-ffat-lto-objects"):
+ vars[var] = sub("-ffat-lto-objects", "", vars[var])
from distutils.core import setup, Extension
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 31cd59a2b66e..ecd377938eea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1290,7 +1290,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* For misannotated, zeroed, ASM function sizes.
*/
if (nr > 0) {
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
if (kmap) {
/*
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 61379ed2b75c..b1e5fd99e38a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -101,11 +101,6 @@ static int prefix_underscores_count(const char *str)
return tail - str;
}
-void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
-{
- p->end = c->start;
-}
-
const char * __weak arch__normalize_symbol_name(const char *name)
{
return name;
@@ -217,7 +212,8 @@ again:
}
}
-void symbols__fixup_end(struct rb_root_cached *symbols)
+/* Update zero-sized symbols using the address of the next symbol */
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms)
{
struct rb_node *nd, *prevnd = rb_first_cached(symbols);
struct symbol *curr, *prev;
@@ -231,8 +227,29 @@ void symbols__fixup_end(struct rb_root_cached *symbols)
prev = curr;
curr = rb_entry(nd, struct symbol, rb_node);
- if (prev->end == prev->start || prev->end != curr->start)
- arch__symbols__fixup_end(prev, curr);
+ /*
+ * On some architecture kernel text segment start is located at
+ * some low memory address, while modules are located at high
+ * memory addresses (or vice versa). The gap between end of
+ * kernel text segment and beginning of first module's text
+ * segment is very big. Therefore do not fill this gap and do
+ * not assign it to the kernel dso map (kallsyms).
+ *
+ * In kallsyms, it determines module symbols using '[' character
+ * like in:
+ * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi]
+ */
+ if (prev->end == prev->start) {
+ /* Last kernel/module symbol mapped to end of page */
+ if (is_kallsyms && (!strchr(prev->name, '[') !=
+ !strchr(curr->name, '[')))
+ prev->end = roundup(prev->end + 4096, 4096);
+ else
+ prev->end = curr->start;
+
+ pr_debug4("%s sym:%s end:%#" PRIx64 "\n",
+ __func__, prev->name, prev->end);
+ }
}
/* Last entry */
@@ -1456,7 +1473,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
if (kallsyms__delta(kmap, filename, &delta))
return -1;
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, true);
symbols__fixup_duplicate(&dso->symbols);
if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
@@ -1648,7 +1665,7 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile)
#undef bfd_asymbol_section
#endif
- symbols__fixup_end(&dso->symbols);
+ symbols__fixup_end(&dso->symbols, false);
symbols__fixup_duplicate(&dso->symbols);
dso->adjust_symbols = 1;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 954d6a049ee2..28721d761d91 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -192,7 +192,7 @@ void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
bool kernel);
void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
void symbols__fixup_duplicate(struct rb_root_cached *symbols);
-void symbols__fixup_end(struct rb_root_cached *symbols);
+void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms);
void maps__fixup_end(struct maps *maps);
typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
@@ -230,7 +230,6 @@ const char *arch__normalize_symbol_name(const char *name);
#define SYMBOL_A 0
#define SYMBOL_B 1
-void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
int arch__compare_symbol_names(const char *namea, const char *nameb);
int arch__compare_symbol_names_n(const char *namea, const char *nameb,
unsigned int n);
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index d0f06e40c16d..eac71fbb24ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -1,13 +1,24 @@
// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include "bind_perm.skel.h"
-
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/capability.h>
+#include "test_progs.h"
+#include "bind_perm.skel.h"
+
static int duration;
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ return 0;
+}
+
void try_bind(int family, int port, int expected_errno)
{
struct sockaddr_storage addr = {};
@@ -75,6 +86,9 @@ void test_bind_perm(void)
struct bind_perm *skel;
int cgroup_fd;
+ if (create_netns())
+ return;
+
cgroup_fd = test__join_cgroup("/bind_perm");
if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index cf3acfa5a91d..69455fe90ac3 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -7,6 +7,7 @@
#include "test_ksyms_btf.skel.h"
#include "test_ksyms_btf_null_check.skel.h"
#include "test_ksyms_weak.skel.h"
+#include "test_ksyms_btf_write_check.skel.h"
static int duration;
@@ -109,6 +110,16 @@ cleanup:
test_ksyms_weak__destroy(skel);
}
+static void test_write_check(void)
+{
+ struct test_ksyms_btf_write_check *skel;
+
+ skel = test_ksyms_btf_write_check__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
+
+ test_ksyms_btf_write_check__destroy(skel);
+}
+
void test_ksyms_btf(void)
{
int percpu_datasec;
@@ -136,4 +147,7 @@ void test_ksyms_btf(void)
if (test__start_subtest("weak_ksyms"))
test_weak_syms();
+
+ if (test__start_subtest("write_check"))
+ test_write_check();
}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
new file mode 100644
index 000000000000..2180c41cd890
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* Kernel memory obtained from bpf_{per,this}_cpu_ptr
+ * is read-only, should _not_ pass verification.
+ */
+ /* WRITE_ONCE */
+ *(volatile int *)active = -1;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 1cfeb940cf9f..5f0e0bfc151e 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -23,7 +23,7 @@ struct {
__uint(value_size, sizeof(__u32));
} mim_hash SEC(".maps");
-SEC("xdp_mimtest")
+SEC("xdp")
int xdp_mimtest0(struct xdp_md *ctx)
{
int value = 123;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index ac6f7f205e25..cb0aa46b20d1 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -404,8 +404,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from remote_port field. Expect SRC_PORT. */
if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
- LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
- LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
+ LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff))
return SK_DROP;
if (LSW(ctx->remote_port, 0) != SRC_PORT)
return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 81b57b9aaaea..7967348b11af 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -113,7 +113,7 @@ static void tpcpy(struct bpf_tcp_sock *dst,
#define RET_LOG() ({ \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
return CG_OK; \
})
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index 47cbe2eeae43..fac7ef99f9a6 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("xdp/check_syncookie")
+SEC("xdp")
int check_syncookie_xdp(struct xdp_md *ctx)
{
check_syncookie(ctx, (void *)(long)ctx->data,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 31f9bce37491..e6aa2fc6ce6b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -210,7 +210,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 3d66599eee2e..199c61b7d062 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -2,7 +2,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_adjust_tail_grow")
+SEC("xdp")
int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index 22065a9cfb25..b7448253d135 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -9,9 +9,7 @@
#include <linux/if_ether.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail_shrink")
+SEC("xdp")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
index b360ba2bd441..807bf895f42c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dm_log")
+SEC("xdp")
int xdpdm_devlog(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index eb93ea95d1d8..ee7d6ac0f615 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -5,7 +5,7 @@
char LICENSE[] SEC("license") = "GPL";
-SEC("xdp/handler")
+SEC("xdp")
int xdp_handler(struct xdp_md *xdp)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index fcabcda30ba3..27eb52dda92c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -206,7 +206,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 3a67921f62b5..596c4e71bf3a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -797,7 +797,7 @@ out:
return XDP_DROP;
}
-SEC("xdp-test-v4")
+SEC("xdp")
int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
@@ -816,7 +816,7 @@ int balancer_ingress_v4(struct xdp_md *ctx)
return XDP_DROP;
}
-SEC("xdp-test-v6")
+SEC("xdp")
int balancer_ingress_v6(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 59ee4f182ff8..532025057711 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -12,13 +12,13 @@ struct {
__uint(max_entries, 4);
} cpu_map SEC(".maps");
-SEC("xdp_redir")
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&cpu_map, 1, 0);
}
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 0ac086497722..1e6b9c38ea6d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -9,7 +9,7 @@ struct {
__uint(max_entries, 4);
} dm_ports SEC(".maps");
-SEC("xdp_redir")
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&dm_ports, 1, 0);
@@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx)
/* invalid program on DEVMAP entry;
* SEC name means expected attach type not set
*/
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
index ea25e8881992..d988b2e0cee8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
+++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
@@ -4,7 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 880debcbcd65..8395782b6e0a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,7 +34,7 @@ struct {
__uint(max_entries, 128);
} mac_map SEC(".maps");
-SEC("xdp_redirect_map_multi")
+SEC("xdp")
int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
@@ -63,7 +63,7 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
}
/* The following 2 progs are for 2nd devmap prog testing */
-SEC("xdp_redirect_map_ingress")
+SEC("xdp")
int xdp_redirect_map_all_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&map_egress, 0,
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 6b9ca40bd1f4..4ad73847b8a5 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -86,7 +86,7 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
return XDP_TX;
}
-SEC("xdpclient")
+SEC("xdp")
int xdping_client(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
@@ -150,7 +150,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_TX;
}
-SEC("xdpserver")
+SEC("xdp")
int xdping_server(struct xdp_md *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
index ec4e15948e40..5252b91f48a1 100755
--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -3,6 +3,7 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+ret=$ksft_skip
msg="skip all tests:"
if [ $UID != 0 ]; then
@@ -25,7 +26,7 @@ do
fi
done
-if [ -n $LIRCDEV ];
+if [ -n "$LIRCDEV" ];
then
TYPE=lirc_mode2
./test_lirc_mode2_user $LIRCDEV $INPUTDEV
@@ -36,3 +37,5 @@ then
echo -e ${GREEN}"PASS: $TYPE"${NC}
fi
fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index b497bb85b667..6c69c42b1d60 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -120,6 +120,14 @@ setup()
ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ # disable IPv6 DAD because it sometimes takes too long and fails tests
+ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -289,7 +297,7 @@ test_ping()
ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
RET=$?
elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
+ ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
RET=$?
else
echo " test_ping: unknown PROTO: ${PROTO}"
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 9b3617d770a5..fed765157c53 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -77,7 +77,7 @@ TEST_IF=lo
MAX_PING_TRIES=5
BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
CLSACT_SECTION="clsact/check_syncookie"
-XDP_SECTION="xdp/check_syncookie"
+XDP_SECTION="xdp"
BPF_PROG_ID=0
PROG="${DIR}/test_tcp_check_syncookie_user"
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c033850886f4..57c8db9972a6 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -52,8 +52,8 @@ test_xdp_redirect()
return 0
fi
- ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
- ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+ ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+ ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index bedff7aa7023..cc57cb87e65f 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -32,6 +32,11 @@ DRV_MODE="xdpgeneric xdpdrv xdpegress"
PASS=0
FAIL=0
LOG_DIR=$(mktemp -d)
+declare -a NS
+NS[0]="ns0-$(mktemp -u XXXXXX)"
+NS[1]="ns1-$(mktemp -u XXXXXX)"
+NS[2]="ns2-$(mktemp -u XXXXXX)"
+NS[3]="ns3-$(mktemp -u XXXXXX)"
test_pass()
{
@@ -47,11 +52,9 @@ test_fail()
clean_up()
{
- for i in $(seq $NUM); do
- ip link del veth$i 2> /dev/null
- ip netns del ns$i 2> /dev/null
+ for i in $(seq 0 $NUM); do
+ ip netns del ${NS[$i]} 2> /dev/null
done
- ip netns del ns0 2> /dev/null
}
# Kselftest framework requirement - SKIP code is 4.
@@ -79,23 +82,22 @@ setup_ns()
mode="xdpdrv"
fi
- ip netns add ns0
+ ip netns add ${NS[0]}
for i in $(seq $NUM); do
- ip netns add ns$i
- ip -n ns$i link add veth0 index 2 type veth \
- peer name veth$i netns ns0 index $((1 + $i))
- ip -n ns0 link set veth$i up
- ip -n ns$i link set veth0 up
-
- ip -n ns$i addr add 192.0.2.$i/24 dev veth0
- ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+ ip netns add ${NS[$i]}
+ ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
+ ip -n ${NS[$i]} link set veth0 up
+ ip -n ${NS[0]} link set veth$i up
+
+ ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
+ ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
# Add a neigh entry for IPv4 ping test
- ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
- ip -n ns$i link set veth0 $mode obj \
- xdp_dummy.o sec xdp_dummy &> /dev/null || \
+ ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ${NS[$i]} link set veth0 $mode obj \
+ xdp_dummy.o sec xdp &> /dev/null || \
{ test_fail "Unable to load dummy xdp" && exit 1; }
IFACES="$IFACES veth$i"
- veth_mac[$i]=$(ip -n ns0 link show veth$i | awk '/link\/ether/ {print $2}')
+ veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
done
}
@@ -104,10 +106,10 @@ do_egress_tests()
local mode=$1
# mac test
- ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
- ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
sleep 0.5
- ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
sleep 0.5
pkill tcpdump
@@ -123,18 +125,18 @@ do_ping_tests()
local mode=$1
# ping6 test: echo request should be redirect back to itself, not others
- ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+ ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
- ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
- ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
- ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+ ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
sleep 0.5
# ARP test
- ip netns exec ns1 arping -q -c 2 -I veth0 192.0.2.254
+ ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
# IPv4 test
- ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
# IPv6 test
- ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
sleep 0.5
pkill tcpdump
@@ -180,7 +182,7 @@ do_tests()
xdpgeneric) drv_p="-S";;
esac
- ip netns exec ns0 ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+ ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
xdp_pid=$!
sleep 1
if ! ps -p $xdp_pid > /dev/null; then
@@ -197,10 +199,10 @@ do_tests()
kill $xdp_pid
}
-trap clean_up EXIT
-
check_env
+trap clean_up EXIT
+
for mode in ${DRV_MODE}; do
setup_ns $mode
do_tests $mode
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index 995278e684b6..a3a1eaee26ea 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -107,9 +107,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
trap cleanup EXIT
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 336a749673d1..2e701e7f6968 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -108,6 +108,25 @@
.errstr = "R0 min value is outside of the allowed memory range",
},
{
+ "calls: trigger reg2btf_ids[reg->type] for reg->type > __BPF_REG_TYPE_MAX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_ref_kfunc must point",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_release", 5 },
+ },
+},
+{
"calls: overlapping caller/callee",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 842d9155d36c..79a3453dab25 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -178,9 +178,8 @@ int main(int argc, char **argv)
return 1;
}
- main_prog = bpf_object__find_program_by_title(obj,
- server ? "xdpserver" :
- "xdpclient");
+ main_prog = bpf_object__find_program_by_name(obj,
+ server ? "xdping_server" : "xdping_client");
if (main_prog)
prog_fd = bpf_program__fd(main_prog);
if (!main_prog || prog_fd < 0) {
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 623cec04ad42..0cf7e90c0052 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -221,7 +221,7 @@ int cg_find_unified_root(char *root, size_t len)
int cg_create(const char *cgroup)
{
- return mkdir(cgroup, 0644);
+ return mkdir(cgroup, 0755);
}
int cg_wait_for_proc_count(const char *cgroup, int count)
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 3df648c37876..600123503063 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -1,11 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
#include <linux/limits.h>
+#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
@@ -674,6 +677,166 @@ cleanup:
return ret;
}
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ uid_t saved_uid;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ saved_uid = geteuid();
+ if (seteuid(test_euid))
+ goto cleanup;
+
+ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+ if (seteuid(saved_uid))
+ goto cleanup;
+
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+ const char *path;
+ int fd;
+ int err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+ struct lesser_ns_open_thread_arg *targ = arg;
+
+ targ->fd = open(targ->path, O_RDWR);
+ targ->err = errno;
+ return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+ static char stack[65536];
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+ pid_t pid;
+ int status;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_b))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ targ.path = cg_test_b_procs;
+ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+ &targ);
+ if (pid < 0)
+ goto cleanup;
+
+ if (waitpid(pid, &status, 0) < 0)
+ goto cleanup;
+
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ cg_test_b_procs_fd = targ.fd;
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -689,6 +852,8 @@ struct corecg_test {
T(test_cgcore_proc_migration),
T(test_cgcore_thread_migration),
T(test_cgcore_destroy),
+ T(test_cgcore_lesser_euid_open),
+ T(test_cgcore_lesser_ns_open),
};
#undef T
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9..af5ea50ed5c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
tc qdisc add dev $rp2 clsact
for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 05e65ca1c30c..23861c8faa61 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -58,6 +58,21 @@
/* CPUID.0x8000_0001.EDX */
#define CPUID_GBPAGES (1ul << 26)
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK BIT_ULL(0)
+#define PTE_WRITABLE_MASK BIT_ULL(1)
+#define PTE_USER_MASK BIT_ULL(2)
+#define PTE_ACCESSED_MASK BIT_ULL(5)
+#define PTE_DIRTY_MASK BIT_ULL(6)
+#define PTE_LARGE_MASK BIT_ULL(7)
+#define PTE_GLOBAL_MASK BIT_ULL(8)
+#define PTE_NX_MASK BIT_ULL(63)
+
+#define PAGE_SHIFT 12
+
+#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
+#define PTE_GET_PFN(pte) (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
u64 rax;
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 36407cb0ec85..f1ddfe4c4a03 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -278,7 +278,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
else
guest_test_phys_mem = p->phys_offset;
#ifdef __s390x__
- alignment = max(0x100000, alignment);
+ alignment = max(0x100000UL, alignment);
#endif
guest_test_phys_mem &= ~(alignment - 1);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index da73b97e1e6d..46057079d8bb 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -19,38 +19,6 @@
vm_vaddr_t exception_handlers;
-/* Virtual translation table structure declarations */
-struct pageUpperEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t pfn:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t reserved_07:1;
- uint64_t global:1;
- uint64_t ignored_11_09:3;
- uint64_t pfn:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent)
{
@@ -195,23 +163,21 @@ static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
return &page_table[index];
}
-static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
- uint64_t pt_pfn,
- uint64_t vaddr,
- uint64_t paddr,
- int level,
- enum x86_page_size page_size)
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t pt_pfn,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int level,
+ enum x86_page_size page_size)
{
- struct pageUpperEntry *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
-
- if (!pte->present) {
- pte->writable = true;
- pte->present = true;
- pte->page_size = (level == page_size);
- if (pte->page_size)
- pte->pfn = paddr >> vm->page_shift;
+ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (level == page_size)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
- pte->pfn = vm_alloc_page_table(vm) >> vm->page_shift;
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
} else {
/*
* Entry already present. Assert that the caller doesn't want
@@ -221,7 +187,7 @@ static struct pageUpperEntry *virt_create_upper_pte(struct kvm_vm *vm,
TEST_ASSERT(level != page_size,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
page_size, vaddr);
- TEST_ASSERT(!pte->page_size,
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
level, vaddr);
}
@@ -232,8 +198,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
enum x86_page_size page_size)
{
const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
"Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -257,24 +223,22 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
vaddr, paddr, 3, page_size);
- if (pml4e->page_size)
+ if (*pml4e & PTE_LARGE_MASK)
return;
- pdpe = virt_create_upper_pte(vm, pml4e->pfn, vaddr, paddr, 2, page_size);
- if (pdpe->page_size)
+ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+ if (*pdpe & PTE_LARGE_MASK)
return;
- pde = virt_create_upper_pte(vm, pdpe->pfn, vaddr, paddr, 1, page_size);
- if (pde->page_size)
+ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+ if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
- pte = virt_get_pte(vm, pde->pfn, vaddr, 0);
- TEST_ASSERT(!pte->present,
+ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
- pte->pfn = paddr >> vm->page_shift;
- pte->writable = true;
- pte->present = 1;
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
}
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -282,12 +246,12 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
__virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
}
-static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
+static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
uint64_t vaddr)
{
uint16_t index[4];
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
struct kvm_cpuid_entry2 *entry;
struct kvm_sregs sregs;
int max_phy_addr;
@@ -329,30 +293,29 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
index[3] = (vaddr >> 39) & 0x1ffu;
pml4e = addr_gpa2hva(vm, vm->pgd);
- TEST_ASSERT(pml4e[index[3]].present,
+ TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
"Expected pml4e to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT((*(uint64_t*)(&pml4e[index[3]]) &
- (rsvd_mask | (1ull << 7))) == 0,
+ TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
"Unexpected reserved bits set.");
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
- TEST_ASSERT(pdpe[index[2]].present,
+ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+ TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
"Expected pdpe to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT(pdpe[index[2]].page_size == 0,
+ TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
"Expected pdpe to map a pde not a 1-GByte page.");
- TEST_ASSERT((*(uint64_t*)(&pdpe[index[2]]) & rsvd_mask) == 0,
+ TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
"Unexpected reserved bits set.");
- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
- TEST_ASSERT(pde[index[1]].present,
+ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+ TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
"Expected pde to be present for gva: 0x%08lx", vaddr);
- TEST_ASSERT(pde[index[1]].page_size == 0,
+ TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
"Expected pde to map a pte not a 2-MByte page.");
- TEST_ASSERT((*(uint64_t*)(&pde[index[1]]) & rsvd_mask) == 0,
+ TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
"Unexpected reserved bits set.");
- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
- TEST_ASSERT(pte[index[0]].present,
+ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+ TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
"Expected pte to be present for gva: 0x%08lx", vaddr);
return &pte[index[0]];
@@ -360,7 +323,7 @@ static struct pageTableEntry *_vm_get_page_table_entry(struct kvm_vm *vm, int vc
uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
{
- struct pageTableEntry *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
+ uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
return *(uint64_t *)pte;
}
@@ -368,18 +331,17 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr)
void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr,
uint64_t pte)
{
- struct pageTableEntry *new_pte = _vm_get_page_table_entry(vm, vcpuid,
- vaddr);
+ uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr);
*(uint64_t *)new_pte = pte;
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
- struct pageUpperEntry *pml4e, *pml4e_start;
- struct pageUpperEntry *pdpe, *pdpe_start;
- struct pageUpperEntry *pde, *pde_start;
- struct pageTableEntry *pte, *pte_start;
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
if (!vm->pgd_created)
return;
@@ -389,58 +351,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageUpperEntry *) addr_gpa2hva(vm, vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!pml4e->present)
+ if (!(*pml4e & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->pfn,
- pml4e->writable, pml4e->execute_disable);
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
- pdpe_start = addr_gpa2hva(vm, pml4e->pfn * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!pdpe->present)
+ if (!(*pdpe & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->pfn, pdpe->writable,
- pdpe->execute_disable);
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
- pde_start = addr_gpa2hva(vm, pdpe->pfn * vm->page_size);
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!pde->present)
+ if (!(*pde & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u\n",
+ "0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->pfn, pde->writable,
- pde->execute_disable);
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
- pte_start = addr_gpa2hva(vm, pde->pfn * vm->page_size);
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!pte->present)
+ if (!(*pte & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u "
+ "0x%-12lx 0x%-10llx %u %u "
" %u 0x%-10lx\n",
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->pfn,
- pte->writable,
- pte->execute_disable,
- pte->dirty,
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -558,8 +520,8 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
- struct pageUpperEntry *pml4e, *pdpe, *pde;
- struct pageTableEntry *pte;
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -572,22 +534,22 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
if (!vm->pgd_created)
goto unmapped_gva;
pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present)
+ if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].pfn * vm->page_size);
- if (!pdpe[index[2]].present)
+ pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
+ if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pde = addr_gpa2hva(vm, pdpe[index[2]].pfn * vm->page_size);
- if (!pde[index[1]].present)
+ pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
+ if (!(pde[index[1]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- pte = addr_gpa2hva(vm, pde[index[1]].pfn * vm->page_size);
- if (!pte[index[0]].present)
+ pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
+ if (!(pte[index[0]] & PTE_PRESENT_MASK))
goto unmapped_gva;
- return (pte[index[0]].pfn * vm->page_size) + (gva & 0xfffu);
+ return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & 0xfffu);
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 38edea25631b..b642411ceb6c 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -6,5 +6,6 @@ CONFIG_HARDENED_USERCOPY=y
# CONFIG_HARDENED_USERCOPY_FALLBACK is not set
CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN=y
CONFIG_UBSAN_BOUNDS=y
CONFIG_UBSAN_TRAP=y
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index b019e0b8221c..84fda3b49073 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
if (in_shutdown++)
return;
+ /* Free the cpu_set allocated using CPU_ALLOC in main function */
+ CPU_FREE(cpu_set);
+
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i]) {
pthread_kill(cpu_threads[i], SIGUSR1);
@@ -551,6 +554,12 @@ int main(int argc, char *argv[])
perror("sysconf(_SC_NPROCESSORS_ONLN)");
exit(1);
}
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
cpu_set = CPU_ALLOC(cpus_online);
if (cpu_set == NULL) {
@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
cpu_set)) {
fprintf(stderr, "Any given CPU may "
"only be given once.\n");
- exit(1);
+ goto err_code;
} else
CPU_SET_S(cpus_to_pin[cpu],
cpu_set_size, cpu_set);
@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
queue_path = malloc(strlen(option) + 2);
if (!queue_path) {
perror("malloc()");
- exit(1);
+ goto err_code;
}
queue_path[0] = '/';
queue_path[1] = 0;
@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
fprintf(stderr, "Must pass at least one CPU to continuous "
"mode.\n");
poptPrintUsage(popt_context, stderr, 0);
- exit(1);
+ goto err_code;
} else if (!continuous_mode) {
num_cpus_to_pin = 1;
cpus_to_pin[0] = cpus_online - 1;
}
- if (getuid() != 0)
- ksft_exit_skip("Not running as root, but almost all tests "
- "require root in order to modify\nsystem settings. "
- "Exiting.\n");
-
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
if (!max_msgs)
@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
sleep(1);
}
shutdown(0, "", 0);
+
+err_code:
+ CPU_FREE(cpu_set);
+ exit(1);
+
}
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 3dece8b29253..b57e91e1c3f2 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -218,10 +218,10 @@ main(int argc, char **argv)
/* Test 1:
* veriyf that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
+ * delivered, 63 bytes are
+ * read, oob is '@', and POLLPRI works.
*/
- wait_for_data(pfd, POLLIN | POLLPRI);
+ wait_for_data(pfd, POLLPRI);
read_oob(pfd, &oob);
len = read_data(pfd, buf, 1024);
if (!signal_recvd || len != 63 || oob != '@') {
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index a3402cd8d5b6..9ff22f28032d 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -61,9 +61,12 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
+ # Avoid changing br1's PVID while it is operational as a L3 interface.
+ ip link set dev br1 down
ip link set dev $swp3 master br1
bridge vlan add dev br1 vid 555 pvid untagged self
+ ip link set dev br1 up
ip address add dev br1 192.0.2.129/28
ip address add dev br1 2001:db8:2::1/64
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 559173a8e387..d75fa97609c1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -445,6 +445,8 @@ do_transfer()
local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -537,6 +539,23 @@ do_transfer()
fi
fi
+ if $checksum; then
+ local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+ local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+ if [ $csum_err_s_nr -gt 0 ]; then
+ printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]"
+ rets=1
+ fi
+
+ local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+ if [ $csum_err_c_nr -gt 0 ]; then
+ printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]"
+ retc=1
+ fi
+ fi
+
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
printf "[ OK ]"
fi
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 59067f64b775..2672ac0b6d1f 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -421,7 +421,7 @@ static void usage(const char *progname)
"Options:\n"
" -4 only IPv4\n"
" -6 only IPv6\n"
- " -c <clock> monotonic (default) or tai\n"
+ " -c <clock> monotonic or tai (default)\n"
" -D <addr> destination IP address (server)\n"
" -S <addr> source IP address (client)\n"
" -r run rx mode\n"
@@ -475,7 +475,7 @@ static void parse_opts(int argc, char **argv)
cfg_rx = true;
break;
case 't':
- cfg_start_time_ns = strtol(optarg, NULL, 0);
+ cfg_start_time_ns = strtoll(optarg, NULL, 0);
break;
case 'm':
cfg_mark = strtol(optarg, NULL, 0);
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 534c8b7699ab..6fadc8e2f116 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -118,11 +118,11 @@ echo "[ OK ]"
# Move the underlay to a non-default VRF
ip -netns hv-1 link set veth0 vrf vrf-underlay
-ip -netns hv-1 link set veth0 down
-ip -netns hv-1 link set veth0 up
+ip -netns hv-1 link set vxlan0 down
+ip -netns hv-1 link set vxlan0 up
ip -netns hv-2 link set veth0 vrf vrf-underlay
-ip -netns hv-2 link set veth0 down
-ip -netns hv-2 link set veth0 up
+ip -netns hv-2 link set vxlan0 down
+ip -netns hv-2 link set vxlan0 up
echo -n "Check VM connectivity through VXLAN (underlay in a VRF) "
ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aee631c5284e..044bc0e9ed81 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -325,8 +325,8 @@ int main(int argc, char **argv)
struct ifreq device;
struct ifreq hwtstamp;
struct hwtstamp_config hwconfig, hwconfig_requested;
- struct so_timestamping so_timestamping_get = { 0, -1 };
- struct so_timestamping so_timestamping = { 0, -1 };
+ struct so_timestamping so_timestamping_get = { 0, 0 };
+ struct so_timestamping so_timestamping = { 0, 0 };
struct sockaddr_in addr;
struct ip_mreq imr;
struct in_addr iaddr;
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 363f56081eff..66f0f724a1a6 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -71,8 +71,8 @@ usage () {
echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
- echo " --doall"
- echo " --doallmodconfig / --do-no-allmodconfig"
+ echo " --do-all"
+ echo " --do-allmodconfig / --do-no-allmodconfig"
echo " --do-clocksourcewd / --do-no-clocksourcewd"
echo " --do-kasan / --do-no-kasan"
echo " --do-kcsan / --do-no-kcsan"
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1d64891e6492..34ebd1fe5eed 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -955,7 +955,7 @@ TEST(ERRNO_valid)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(E2BIG, errno);
}
@@ -974,7 +974,7 @@ TEST(ERRNO_zero)
EXPECT_EQ(parent, syscall(__NR_getppid));
/* "errno" of 0 is ok. */
- EXPECT_EQ(0, read(0, NULL, 0));
+ EXPECT_EQ(0, read(-1, NULL, 0));
}
/*
@@ -995,7 +995,7 @@ TEST(ERRNO_capped)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(4095, errno);
}
@@ -1026,7 +1026,7 @@ TEST(ERRNO_order)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(12, errno);
}
@@ -2579,7 +2579,7 @@ void *tsync_sibling(void *data)
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
if (!ret)
return (void *)SIBLING_EXIT_NEWPRIVS;
- read(0, NULL, 0);
+ read(-1, NULL, 0);
return (void *)SIBLING_EXIT_UNKILLED;
}
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 7f12d55b97f8..472b27ccd7dc 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
.PHONY: all clean
-CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh $(CC) \
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
../x86/trivial_64bit_program.c)
ifndef OBJCOPY
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index d9605bd10f2d..d8ae7cc01274 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
+LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
+
include local_config.mk
uname_M := $(shell uname -m 2>/dev/null || echo not)
@@ -48,13 +50,13 @@ TEST_GEN_FILES += split_huge_page_test
TEST_GEN_FILES += ksm_tests
ifeq ($(MACHINE),x86_64)
-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
CFLAGS += -no-pie
@@ -107,7 +109,7 @@ $(BINARIES_32): CFLAGS += -m32 -mxsave
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
ifeq ($(CAN_BUILD_X86_64),1)
@@ -115,7 +117,7 @@ $(BINARIES_64): CFLAGS += -m64 -mxsave
$(BINARIES_64): LDLIBS += -lrt -ldl
$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
# x86_64 users should be encouraged to install 32-bit libraries
@@ -139,10 +141,6 @@ endif
$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
-$(OUTPUT)/gup_test: ../../../../mm/gup_test.h
-
-$(OUTPUT)/hmm-tests: local_config.h
-
# HMM_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
$(OUTPUT)/hmm-tests: LDLIBS += $(HMM_EXTRA_LIBS)
diff --git a/tools/testing/selftests/vm/mremap_test.c b/tools/testing/selftests/vm/mremap_test.c
index 0624d1bd71b5..8f4dbbd60c09 100644
--- a/tools/testing/selftests/vm/mremap_test.c
+++ b/tools/testing/selftests/vm/mremap_test.c
@@ -6,9 +6,11 @@
#include <errno.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <time.h>
+#include <stdbool.h>
#include "../kselftest.h"
@@ -65,6 +67,112 @@ enum {
}
/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
* Returns the start address of the mapping on success, else returns
* NULL on failure.
*/
@@ -72,11 +180,18 @@ static void *get_source_mapping(struct config c)
{
unsigned long long addr = 0ULL;
void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+
retry:
addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
- -1, 0);
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
if (src_addr == MAP_FAILED) {
if (errno == EPERM || errno == EEXIST)
goto retry;
@@ -91,8 +206,10 @@ retry:
* alignment in the tests.
*/
if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
- !((unsigned long long) src_addr & c.src_alignment))
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
goto retry;
+ }
if (!src_addr)
goto error;
@@ -141,9 +258,20 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
if (!((unsigned long long) addr & c.dest_alignment))
addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+ /* Don't destroy existing mappings unless expected to overlap */
+ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+ /* Check for unsigned overflow */
+ if (addr + c.dest_alignment < addr) {
+ ksft_print_msg("Couldn't find a valid region to remap to\n");
+ ret = -1;
+ goto out;
+ }
+ addr += c.dest_alignment;
+ }
+
clock_gettime(CLOCK_MONOTONIC, &t_start);
dest_addr = mremap(src_addr, c.region_size, c.region_size,
- MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
clock_gettime(CLOCK_MONOTONIC, &t_end);
if (dest_addr == MAP_FAILED) {
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b4142cd1c5c2..02a77056bca3 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,9 +6,9 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
TESTPROG="$2"
shift 2
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
echo 1
else
echo 0
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index cb3f29c09aff..23f142af544a 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -130,6 +130,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
memset(dev, 0, sizeof *dev);
dev->vdev.features = features;
INIT_LIST_HEAD(&dev->vdev.vqs);
+ spin_lock_init(&dev->vdev.vqs_list_lock);
dev->buf_size = 1024;
dev->buf = malloc(dev->buf_size);
assert(dev->buf);