From d78e20c081e744812cba9d12933a0afe5bc09e61 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 19 Nov 2024 10:01:30 -0800 Subject: perf script python: Improve physical mem type resolution Previously system RAM and persistent memory were hard code matched, change so that the label of the memory region is just read from /proc/iomem. This avoids frequent N/A samples. Change the /proc/iomem reading, event processing and output so that nested entries appear and their counts count toward their parent. As labels may be repeated, include the memory ranges in the output to make it clear why, for example, "System RAM" appears twice. Before: Event: mem_inst_retired.all_loads:P Memory type count percentage ---------------------------------------- ---------- ---------- System RAM 9460 96.5% N/A 998 3.5% After: Event: mem_inst_retired.all_loads:P Memory type count percentage ---------------------------------------- ---------- ---------- 100000000-105f7fffff : System RAM 36741 96.5 841400000-8416599ff : Kernel data 89 0.2 840800000-8412a6fff : Kernel rodata 60 0.2 841ebe000-8423fffff : Kernel bss 34 0.1 0-fff : Reserved 1345 3.5 100000-89dd9fff : System RAM 2 0.0 Before: Event: mem_inst_retired.any:P Memory type count percentage ---------------------------------------- ----------- ----------- System RAM 9460 90.5% N/A 998 9.5% After: Event: mem_inst_retired.any:P Memory type count percentage ---------------------------------------- ---------- ---------- 100000000-105f7fffff : System RAM 9460 90.5 841400000-8416599ff : Kernel data 45 0.4 840800000-8412a6fff : Kernel rodata 19 0.2 841ebe000-8423fffff : Kernel bss 12 0.1 0-fff : Reserved 998 9.5 The code has been updated to python 3 with type hints and resolving issues reported by mypy and pylint. Tabs are swapped to spaces as preferred in PEP8, because most lines of code were modified (of this small file) and this makes pylint significantly less noisy. Committer testing: root@number:/tmp# grep -m1 "model name" /proc/cpuinfo model name : Intel(R) Core(TM) i7-14700K root@number:/tmp# root@number:/tmp# perf script mem-phys-addr -a find / /bin /lib /lib64 /sbin Warning: 744 out of order events recorded. Event: cpu_core/mem_inst_retired.all_loads/P Memory type count percentage ---------------------------------------- ---------- ---------- 100000000-8bfbfffff : System RAM 364561 76.5 621400000-6223a6fff : Kernel rodata 10474 2.2 622400000-62283d4bf : Kernel data 4828 1.0 623304000-6237fffff : Kernel bss 1063 0.2 620000000-6213fffff : Kernel code 98 0.0 0-fff : Reserved 111480 23.4 100000-2b0ca017 : System RAM 337 0.1 2fbad000-30d92fff : System RAM 44 0.0 2c79d000-2fbabfff : System RAM 30 0.0 30d94000-316d5fff : System RAM 16 0.0 2b131a58-2c71dfff : System RAM 7 0.0 root@number:/tmp# Signed-off-by: Ian Rogers Acked-by: Kan Liang Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20241119180130.19160-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/mem-phys-addr.py | 177 +++++++++++++++++------------ 1 file changed, 102 insertions(+), 75 deletions(-) (limited to 'tools/perf/scripts/python') diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py index 1f332e72b9b0..5e237a5a5f1b 100644 --- a/tools/perf/scripts/python/mem-phys-addr.py +++ b/tools/perf/scripts/python/mem-phys-addr.py @@ -3,98 +3,125 @@ # # Copyright (c) 2018, Intel Corporation. -from __future__ import division -from __future__ import print_function - import os import sys -import struct import re import bisect import collections +from dataclasses import dataclass +from typing import (Dict, Optional) sys.path.append(os.environ['PERF_EXEC_PATH'] + \ - '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +@dataclass(frozen=True) +class IomemEntry: + """Read from a line in /proc/iomem""" + begin: int + end: int + indent: int + label: str -#physical address ranges for System RAM -system_ram = [] -#physical address ranges for Persistent Memory -pmem = [] -#file object for proc iomem -f = None -#Count for each type of memory -load_mem_type_cnt = collections.Counter() -#perf event name -event_name = None +# Physical memory layout from /proc/iomem. Key is the indent and then +# a list of ranges. +iomem: Dict[int, list[IomemEntry]] = collections.defaultdict(list) +# Child nodes from the iomem parent. +children: Dict[IomemEntry, set[IomemEntry]] = collections.defaultdict(set) +# Maximum indent seen before an entry in the iomem file. +max_indent: int = 0 +# Count for each range of memory. +load_mem_type_cnt: Dict[IomemEntry, int] = collections.Counter() +# Perf event name set from the first sample in the data. +event_name: Optional[str] = None def parse_iomem(): - global f - f = open('/proc/iomem', 'r') - for i, j in enumerate(f): - m = re.split('-|:',j,2) - if m[2].strip() == 'System RAM': - system_ram.append(int(m[0], 16)) - system_ram.append(int(m[1], 16)) - if m[2].strip() == 'Persistent Memory': - pmem.append(int(m[0], 16)) - pmem.append(int(m[1], 16)) + """Populate iomem from /proc/iomem file""" + global iomem + global max_indent + global children + with open('/proc/iomem', 'r', encoding='ascii') as f: + for line in f: + indent = 0 + while line[indent] == ' ': + indent += 1 + if indent > max_indent: + max_indent = indent + m = re.split('-|:', line, 2) + begin = int(m[0], 16) + end = int(m[1], 16) + label = m[2].strip() + entry = IomemEntry(begin, end, indent, label) + # Before adding entry, search for a parent node using its begin. + if indent > 0: + parent = find_memory_type(begin) + assert parent, f"Given indent expected a parent for {label}" + children[parent].add(entry) + iomem[indent].append(entry) -def print_memory_type(): - print("Event: %s" % (event_name)) - print("%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), end='') - print("%-40s %10s %10s\n" % ("----------------------------------------", - "-----------", "-----------"), - end=''); - total = sum(load_mem_type_cnt.values()) - for mem_type, count in sorted(load_mem_type_cnt.most_common(), \ - key = lambda kv: (kv[1], kv[0]), reverse = True): - print("%-40s %10d %10.1f%%\n" % - (mem_type, count, 100 * count / total), - end='') +def find_memory_type(phys_addr) -> Optional[IomemEntry]: + """Search iomem for the range containing phys_addr with the maximum indent""" + for i in range(max_indent, -1, -1): + if i not in iomem: + continue + position = bisect.bisect_right(iomem[i], phys_addr, + key=lambda entry: entry.begin) + if position is None: + continue + iomem_entry = iomem[i][position-1] + if iomem_entry.begin <= phys_addr <= iomem_entry.end: + return iomem_entry + print(f"Didn't find {phys_addr}") + return None -def trace_begin(): - parse_iomem() +def print_memory_type(): + print(f"Event: {event_name}") + print(f"{'Memory type':<40} {'count':>10} {'percentage':>10}") + print(f"{'-' * 40:<40} {'-' * 10:>10} {'-' * 10:>10}") + total = sum(load_mem_type_cnt.values()) + # Add count from children into the parent. + for i in range(max_indent, -1, -1): + if i not in iomem: + continue + for entry in iomem[i]: + global children + for child in children[entry]: + if load_mem_type_cnt[child] > 0: + load_mem_type_cnt[entry] += load_mem_type_cnt[child] -def trace_end(): - print_memory_type() - f.close() + def print_entries(entries): + """Print counts from parents down to their children""" + global children + for entry in sorted(entries, + key = lambda entry: load_mem_type_cnt[entry], + reverse = True): + count = load_mem_type_cnt[entry] + if count > 0: + mem_type = ' ' * entry.indent + f"{entry.begin:x}-{entry.end:x} : {entry.label}" + percent = 100 * count / total + print(f"{mem_type:<40} {count:>10} {percent:>10.1f}") + print_entries(children[entry]) -def is_system_ram(phys_addr): - #/proc/iomem is sorted - position = bisect.bisect(system_ram, phys_addr) - if position % 2 == 0: - return False - return True + print_entries(iomem[0]) -def is_persistent_mem(phys_addr): - position = bisect.bisect(pmem, phys_addr) - if position % 2 == 0: - return False - return True +def trace_begin(): + parse_iomem() -def find_memory_type(phys_addr): - if phys_addr == 0: - return "N/A" - if is_system_ram(phys_addr): - return "System RAM" +def trace_end(): + print_memory_type() - if is_persistent_mem(phys_addr): - return "Persistent Memory" +def process_event(param_dict): + if "sample" not in param_dict: + return - #slow path, search all - f.seek(0, 0) - for j in f: - m = re.split('-|:',j,2) - if int(m[0], 16) <= phys_addr <= int(m[1], 16): - return m[2] - return "N/A" + sample = param_dict["sample"] + if "phys_addr" not in sample: + return -def process_event(param_dict): - name = param_dict["ev_name"] - sample = param_dict["sample"] - phys_addr = sample["phys_addr"] + phys_addr = sample["phys_addr"] + entry = find_memory_type(phys_addr) + if entry: + load_mem_type_cnt[entry] += 1 - global event_name - if event_name == None: - event_name = name - load_mem_type_cnt[find_memory_type(phys_addr)] += 1 + global event_name + if event_name is None: + event_name = param_dict["ev_name"] -- cgit v1.2.3 From e7e9943c87d857da650f228fdf6cb47b785b3ff9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 18 Nov 2024 17:16:23 -0800 Subject: perf python: Remove python 2 scripting support Python2 was deprecated 4 years ago, remove support and workarounds. Signed-off-by: Ian Rogers Acked-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Colin Ian King Cc: Dapeng Mi Cc: Howard Chu Cc: Ilya Leoshkevich Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Cc: Veronika Molnarova Cc: Weilin Wang Link: https://lore.kernel.org/r/20241119011644.971342-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/Context.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'tools/perf/scripts/python') diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 01f54d6724a5..d742daaa5d5a 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -24,16 +24,6 @@ #include "../../../util/srcline.h" #include "../../../util/srccode.h" -#if PY_MAJOR_VERSION < 3 -#define _PyCapsule_GetPointer(arg1, arg2) \ - PyCObject_AsVoidPtr(arg1) -#define _PyBytes_FromStringAndSize(arg1, arg2) \ - PyString_FromStringAndSize((arg1), (arg2)) -#define _PyUnicode_AsUTF8(arg) \ - PyString_AsString(arg) - -PyMODINIT_FUNC initperf_trace_context(void); -#else #define _PyCapsule_GetPointer(arg1, arg2) \ PyCapsule_GetPointer((arg1), (arg2)) #define _PyBytes_FromStringAndSize(arg1, arg2) \ @@ -42,7 +32,6 @@ PyMODINIT_FUNC initperf_trace_context(void); PyUnicode_AsUTF8(arg) PyMODINIT_FUNC PyInit_perf_trace_context(void); -#endif static struct scripting_context *get_args(PyObject *args, const char *name, PyObject **arg2) { @@ -213,12 +202,6 @@ static PyMethodDef ContextMethods[] = { { NULL, NULL, 0, NULL} }; -#if PY_MAJOR_VERSION < 3 -PyMODINIT_FUNC initperf_trace_context(void) -{ - (void) Py_InitModule("perf_trace_context", ContextMethods); -} -#else PyMODINIT_FUNC PyInit_perf_trace_context(void) { static struct PyModuleDef moduledef = { @@ -240,4 +223,3 @@ PyMODINIT_FUNC PyInit_perf_trace_context(void) return mod; } -#endif -- cgit v1.2.3 From 1ff2ca39b39f2ff5718a74bc4c92183b8eb9763f Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Mon, 18 Nov 2024 17:16:32 -0800 Subject: perf script: Move script_fetch_insn to trace-event-scripting.c Add native_arch as a parameter to script_fetch_insn rather than relying on the builtin-script value that won't be initialized for the dlfilter and python Context use cases. Assume both of those cases are running natively. Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Acked-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Athira Rajeev Cc: Colin Ian King Cc: Dapeng Mi Cc: Howard Chu Cc: Ilya Leoshkevich Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Josh Poimboeuf Cc: Kan Liang Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Thomas Richter Cc: Veronika Molnarova Cc: Weilin Wang Link: https://lore.kernel.org/r/20241119011644.971342-11-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/Perf-Trace-Util/Context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/scripts/python') diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index d742daaa5d5a..60dcfe56d4d9 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -93,7 +93,7 @@ static PyObject *perf_sample_insn(PyObject *obj, PyObject *args) if (c->sample->ip && !c->sample->insn_len && thread__maps(c->al->thread)) { struct machine *machine = maps__machine(thread__maps(c->al->thread)); - script_fetch_insn(c->sample, c->al->thread, machine); + script_fetch_insn(c->sample, c->al->thread, machine, /*native_arch=*/true); } if (!c->sample->insn_len) Py_RETURN_NONE; /* N.B. This is a return statement */ -- cgit v1.2.3