From 8f09142e4f9bacb7f3b9f41c864ef3eb2cfa27df Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 14 Jul 2025 10:17:08 +0200 Subject: idr test suite: remove usage of the deprecated ida_simple_xx() API Patch series "ida: Remove the ida_simple_xxx() API", v3. These are the final steps in removing the ida_simple_xxx() API. This series was last proposed in August 2024. Since then, some users of the old API have be re-introduced and then removed. A first time in drivers/misc/rpmb-core.c, added in commit 1e9046e3a154 ("rpmb: add Replay Protected Memory Block (RPMB) subsystem") (2024-08-26) and removed in commit dfc881abca42 ("rpmb: Remove usage of the deprecated ida_simple_xx() API") (2024-10-13). A second time in drivers/gpio/gpio-mpsse.c, added in commit c46a74ff05c0 ("gpio: add support for FTDI's MPSSE as GPIO") (2024-10-14) and removed in commit f57c08492866 (gpio: mpsse: Remove usage of the deprecated ida_simple_xx() API) (2024-11-22). Since then, I've not spotted any new usage. So things being stable now, it's time to end this story once and for all. This patch (of 3): ida_alloc() and ida_free() should be preferred to the deprecated ida_simple_get() and ida_simple_remove(). Note that the upper limit of ida_simple_get() is exclusive, but the one of ida_alloc_range()/ida_alloc_max() is inclusive. But because of the ranges used for the tests, there is no need to adjust them. While at it remove some useless {}. Link: https://lkml.kernel.org/r/cover.1752480043.git.christophe.jaillet@wanadoo.fr Link: https://lkml.kernel.org/r/2904fa2006e4fe58eea63aef87fa7f832c7804a1.1752480043.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Acked-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- tools/testing/radix-tree/idr-test.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 84b8c3c92c79..2f830ff8396c 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -499,19 +499,17 @@ void ida_check_random(void) goto repeat; } -void ida_simple_get_remove_test(void) +void ida_alloc_free_test(void) { DEFINE_IDA(ida); unsigned long i; - for (i = 0; i < 10000; i++) { - assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i); - } - assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0); + for (i = 0; i < 10000; i++) + assert(ida_alloc_max(&ida, 20000, GFP_KERNEL) == i); + assert(ida_alloc_range(&ida, 5, 30, GFP_KERNEL) < 0); - for (i = 0; i < 10000; i++) { - ida_simple_remove(&ida, i); - } + for (i = 0; i < 10000; i++) + ida_free(&ida, i); assert(ida_is_empty(&ida)); ida_destroy(&ida); @@ -524,7 +522,7 @@ void user_ida_checks(void) ida_check_nomem(); ida_check_conv_user(); ida_check_random(); - ida_simple_get_remove_test(); + ida_alloc_free_test(); radix_tree_cpu_dead(1); } -- cgit v1.2.3 From b1e34412998d628dfa8ba3da042bb60dee232b6c Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 19 Aug 2025 21:19:17 +0300 Subject: proc: test lseek on /proc/net/dev This line in tools/testing/selftests/proc/read.c was added to catch oopses, not to verify lseek correctness: (void)lseek(fd, 0, SEEK_SET); Oh, well. Prevent more embarassement with simple test. Link: https://lkml.kernel.org/r/aKTCfMuRXOpjBXxI@p183 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/proc-net-dev-lseek.c | 68 +++++++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 tools/testing/selftests/proc/proc-net-dev-lseek.c (limited to 'tools') diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore index 19bb333e2485..243f4537a670 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -7,6 +7,7 @@ /proc-loadavg-001 /proc-maps-race /proc-multiple-procfs +/proc-net-dev-lseek /proc-empty-vm /proc-pid-vm /proc-self-map-files-001 diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 50aba102201a..2a9547630115 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -10,6 +10,7 @@ TEST_GEN_PROGS += fd-003-kthread TEST_GEN_PROGS += proc-2-is-kthread TEST_GEN_PROGS += proc-loadavg-001 TEST_GEN_PROGS += proc-maps-race +TEST_GEN_PROGS += proc-net-dev-lseek TEST_GEN_PROGS += proc-empty-vm TEST_GEN_PROGS += proc-pid-vm TEST_GEN_PROGS += proc-self-map-files-001 diff --git a/tools/testing/selftests/proc/proc-net-dev-lseek.c b/tools/testing/selftests/proc/proc-net-dev-lseek.c new file mode 100644 index 000000000000..742a3e804451 --- /dev/null +++ b/tools/testing/selftests/proc/proc-net-dev-lseek.c @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2025 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#undef _GNU_SOURCE +#define _GNU_SOURCE +#undef NDEBUG +#include +#include +#include +#include +#include +#include +/* + * Test that lseek("/proc/net/dev/", 0, SEEK_SET) + * a) works, + * b) does what you think it does. + */ +int main(void) +{ + /* /proc/net/dev output is deterministic in fresh netns only. */ + if (unshare(CLONE_NEWNET) == -1) { + if (errno == ENOSYS || errno == EPERM) { + return 4; + } + return 1; + } + + const int fd = open("/proc/net/dev", O_RDONLY); + assert(fd >= 0); + + char buf1[4096]; + const ssize_t rv1 = read(fd, buf1, sizeof(buf1)); + /* + * Not "<=", this file can't be empty: + * there is header, "lo" interface with some zeroes. + */ + assert(0 < rv1); + assert(rv1 <= sizeof(buf1)); + + /* Believe it or not, this line broke one day. */ + assert(lseek(fd, 0, SEEK_SET) == 0); + + char buf2[4096]; + const ssize_t rv2 = read(fd, buf2, sizeof(buf2)); + /* Not "<=", see above. */ + assert(0 < rv2); + assert(rv2 <= sizeof(buf2)); + + /* Test that lseek rewinds to the beginning of the file. */ + assert(rv1 == rv2); + assert(memcmp(buf1, buf2, rv1) == 0); + + /* Contents of the file is not validated: this test is about lseek(). */ + + return 0; +} -- cgit v1.2.3 From 17bdc64c0d419b78bb400c221d36eaa391d16b3a Mon Sep 17 00:00:00 2001 From: Bala-Vignesh-Reddy Date: Wed, 20 Aug 2025 23:26:10 +0530 Subject: selftests: proc: mark vsyscall strings maybe-unused The str_vsyscall_* constants in proc-pid-vm.c triggers -Wunused-const-variable warnings with gcc-13.32 and clang 18.1. Define and apply __maybe_unused locally to suppress the warnings. No functional change Fixes compiler warning: warning: `str_vsyscall_*' defined but not used[-Wunused-const-variable] Link: https://lkml.kernel.org/r/20250820175610.83014-1-reddybalavignesh9979@gmail.com Signed-off-by: Bala-Vignesh-Reddy Cc: Shuah Khan Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- tools/testing/selftests/proc/proc-pid-vm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index d04685771952..978cbcb3eb11 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -47,6 +47,10 @@ #include #include +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + #include "../kselftest.h" static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags) @@ -218,12 +222,12 @@ static int make_exe(const uint8_t *payload, size_t len) * 2: vsyscall VMA is r-xp vsyscall=emulate */ static volatile int g_vsyscall; -static const char *str_vsyscall; +static const char *str_vsyscall __maybe_unused; -static const char str_vsyscall_0[] = ""; -static const char str_vsyscall_1[] = +static const char str_vsyscall_0[] __maybe_unused = ""; +static const char str_vsyscall_1[] __maybe_unused = "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; -static const char str_vsyscall_2[] = +static const char str_vsyscall_2[] __maybe_unused = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ -- cgit v1.2.3 From 0471440c8061e9a7c0fd292c7c6598d6304efd53 Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Sun, 7 Sep 2025 00:12:05 +0800 Subject: tools/delaytop: add flexible sorting by delay field Patch series "tools/delaytop: implement real-time keyboard interaction support", v2. Current Limitations =================== The current delaytop implementation has two main limitations: 1) Static sorting only by CPU delay Forcing users to restart with different parameters to analyze other resource bottlenecks. 2) Memory delay information is always expanded Causing information overload when only high-level memory pressure monitoring is needed. Improvements ============ 1) Implemented dynamic sorting capability - Interactive key 'o' triggers sort mode. - Supports sorting by CPU/IO/Memory/IRQ delays. - Memory subcategories available in verbose mode. * c - CPU delay (default) * i - IO delay * m - Total memory delay * q - IRQ delay * s/r/t/p/w - Memory subcategories (in verbose mode) 2) Added memory display modes - Compact view (default): shows aggregated memory delays. - Verbose view ('M' key): breaks down into memory sub-delays. * SWAP - swapin delays * RCL - freepages reclaim delays * THR - thrashing delays * CMP - compaction delays * WP - write-protect copy delays Practical benefits ================== 1) Dynamic Sorting for Real-Time Bottleneck Detection System administrators can now dynamically change sorting to identify different types of resource bottlenecks without restarting. 2) Enhanced Usability with On-Screen Keybindings More intuitive interactive usage with on-screen keybindings help. Reduced screen clutter when only memory overview is needed. Use Case ======== # ./delaytop System Pressure Information: (avg10/avg60vg300/total) CPU some: 0.0%/ 0.0%/ 0.0%/ 106817(ms) CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) IO full: 0.0%/ 0.0%/ 0.0%/ 2245(ms) IO some: 0.0%/ 0.0%/ 0.0%/ 2791(ms) IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) [o]sort [M]memverbose [q]quit Top 20 processes (sorted by cpu delay): PID TGID COMMAND CPU(ms) IO(ms) IRQ(ms) MEM(ms) ------------------------------------------------------------------------ 110 110 kworker/15:0H-s 27.91 0.00 0.00 0.00 57 57 cpuhp/7 3.18 0.00 0.00 0.00 99 99 cpuhp/14 2.97 0.00 0.00 0.00 51 51 cpuhp/6 0.90 0.00 0.00 0.00 44 44 kworker/4:0H-sy 0.80 0.00 0.00 0.00 76 76 idle_inject/10 0.31 0.00 0.00 0.00 100 100 idle_inject/14 0.30 0.00 0.00 0.00 1309 1309 systemsettings 0.29 0.00 0.00 0.00 60 60 ksoftirqd/7 0.28 0.00 0.00 0.00 45 45 cpuhp/5 0.22 0.00 0.00 0.00 63 63 cpuhp/8 0.20 0.00 0.00 0.00 87 87 cpuhp/12 0.18 0.00 0.00 0.00 93 93 cpuhp/13 0.17 0.00 0.00 0.00 1265 1265 acpid 0.17 0.00 0.00 0.00 1552 1552 sshd 0.17 0.00 0.00 0.00 2584 2584 sddm-helper 0.16 0.00 0.00 0.00 1284 1284 rtkit-daemon 0.15 0.00 0.00 0.00 1326 1326 nde-netfilter 0.14 0.00 0.00 0.00 27 27 cpuhp/2 0.13 0.00 0.00 0.00 631 631 kworker/11:2-rc 0.11 0.00 0.00 0.00 # ./delaytop -M System Pressure Information: (avg10/avg60vg300/total) CPU some: 0.0%/ 0.0%/ 0.0%/ 106827(ms) CPU full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory full: 0.0%/ 0.0%/ 0.0%/ 0(ms) Memory some: 0.0%/ 0.0%/ 0.0%/ 0(ms) IO full: 0.0%/ 0.0%/ 0.0%/ 2245(ms) IO some: 0.0%/ 0.0%/ 0.0%/ 2791(ms) IRQ full: 0.0%/ 0.0%/ 0.0%/ 0(ms) [o]sort [M]memverbose [q]quit Top 20 processes (sorted by mem delay): PID TGID COMMAND MEM(ms) SWAP(ms) RCL(ms) THR(ms) CMP(ms) WP(ms) ------------------------------------------------------------------------------------------ 121732 121732 delaytop 0.01 0.00 0.00 0.00 0.00 0.01 95876 95876 top 0.00 0.00 0.00 0.00 0.00 0.00 121641 121641 systemd-userwor 0.00 0.00 0.00 0.00 0.00 0.00 121693 121693 systemd-userwor 0.00 0.00 0.00 0.00 0.00 0.00 121661 121661 systemd-userwor 0.00 0.00 0.00 0.00 0.00 0.00 1 1 systemd 0.00 0.00 0.00 0.00 0.00 0.00 2 2 kthreadd 0.00 0.00 0.00 0.00 0.00 0.00 3 3 pool_workqueue_ 0.00 0.00 0.00 0.00 0.00 0.00 4 4 kworker/R-rcu_g 0.00 0.00 0.00 0.00 0.00 0.00 5 5 kworker/R-rcu_p 0.00 0.00 0.00 0.00 0.00 0.00 6 6 kworker/R-slub_ 0.00 0.00 0.00 0.00 0.00 0.00 7 7 kworker/R-netns 0.00 0.00 0.00 0.00 0.00 0.00 9 9 kworker/0:0H-sy 0.00 0.00 0.00 0.00 0.00 0.00 11 11 kworker/u32:0-n 0.00 0.00 0.00 0.00 0.00 0.00 12 12 kworker/R-mm_pe 0.00 0.00 0.00 0.00 0.00 0.00 13 13 rcu_tasks_kthre 0.00 0.00 0.00 0.00 0.00 0.00 14 14 rcu_tasks_rude_ 0.00 0.00 0.00 0.00 0.00 0.00 15 15 rcu_tasks_trace 0.00 0.00 0.00 0.00 0.00 0.00 16 16 ksoftirqd/0 0.00 0.00 0.00 0.00 0.00 0.00 17 17 rcu_preempt 0.00 0.00 0.00 0.00 0.00 0.00 When psi is not enabled: # ./delaytop System Pressure Information: (avg10/avg60vg300/total) PSI not found: check if psi=1 enabled in cmdline This patch (of 5): The delaytop tool only supported sorting by CPU delay, which limited its usefulness when users needed to identify bottlenecks in other subsystems. Users had no way to sort processes by IO, IRQ, or other delay types to quickly pinpoint specific performance issues. Add -s/--sort option to allow sorting by different delay types. Users can now quickly identify bottlenecks in specific subsystems by sorting processes by the relevant delay metric. Link: https://lkml.kernel.org/r/20250907001101305vrTGnXaRNvtmsGkp-Ljk_@zte.com.cn Link: https://lkml.kernel.org/r/20250907001205573L3XpsQMIQnLgDqiiKYd3H@zte.com.cn Signed-off-by: Fan Yu Reviewed-by: xu xin Cc: Jonathan Corbet Cc: Wang Yaxin Cc: Yang Yang Signed-off-by: Andrew Morton --- tools/accounting/delaytop.c | 153 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 121 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 9afb1ffc00ba..52718714496b 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -42,6 +42,7 @@ #include #include #include +#include #define PSI_CPU_SOME "/proc/pressure/cpu" #define PSI_CPU_FULL "/proc/pressure/cpu" @@ -61,6 +62,7 @@ #define TASK_COMM_LEN 16 #define MAX_MSG_SIZE 1024 #define MAX_TASKS 1000 +#define MAX_BUF_LEN 256 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field #define BOOL_FPRINT(stream, fmt, ...) \ ({ \ @@ -68,17 +70,11 @@ ret >= 0; \ }) #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" - -/* Program settings structure */ -struct config { - int delay; /* Update interval in seconds */ - int iterations; /* Number of iterations, 0 == infinite */ - int max_processes; /* Maximum number of processes to show */ - char sort_field; /* Field to sort by */ - int output_one_time; /* Output once and exit */ - int monitor_pid; /* Monitor specific PID */ - char *container_path; /* Path to container cgroup */ -}; +#define SORT_FIELD(name) \ + {#name, \ + offsetof(struct task_info, name##_delay_total), \ + offsetof(struct task_info, name##_count)} +#define END_FIELD {NULL, 0, 0} /* PSI statistics structure */ struct psi_stats { @@ -130,6 +126,24 @@ struct container_stats { int nr_io_wait; /* Number of processes in IO wait */ }; +/* Delay field structure */ +struct field_desc { + const char *name; /* Field name for cmdline argument */ + unsigned long total_offset; /* Offset of total delay in task_info */ + unsigned long count_offset; /* Offset of count in task_info */ +}; + +/* Program settings structure */ +struct config { + int delay; /* Update interval in seconds */ + int iterations; /* Number of iterations, 0 == infinite */ + int max_processes; /* Maximum number of processes to show */ + int output_one_time; /* Output once and exit */ + int monitor_pid; /* Monitor specific PID */ + char *container_path; /* Path to container cgroup */ + const struct field_desc *sort_field; /* Current sort field */ +}; + /* Global variables */ static struct config cfg; static struct psi_stats psi; @@ -137,6 +151,17 @@ static struct task_info tasks[MAX_TASKS]; static int task_count; static int running = 1; static struct container_stats container_stats; +static const struct field_desc sort_fields[] = { + SORT_FIELD(cpu), + SORT_FIELD(blkio), + SORT_FIELD(irq), + SORT_FIELD(swapin), + SORT_FIELD(freepages), + SORT_FIELD(thrashing), + SORT_FIELD(compact), + SORT_FIELD(wpcopy), + END_FIELD +}; /* Netlink socket variables */ static int nl_sd = -1; @@ -158,18 +183,59 @@ static void disable_raw_mode(void) tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); } +/* Find field descriptor by name with string comparison */ +static const struct field_desc *get_field_by_name(const char *name) +{ + const struct field_desc *field; + size_t field_len; + + for (field = sort_fields; field->name != NULL; field++) { + field_len = strlen(field->name); + if (field_len != strlen(name)) + continue; + if (strncmp(field->name, name, field_len) == 0) + return field; + } + + return NULL; +} + +/* Find display name for a field descriptor */ +static const char *get_name_by_field(const struct field_desc *field) +{ + return field ? field->name : "UNKNOWN"; +} + +/* Generate string of available field names */ +static void display_available_fields(void) +{ + const struct field_desc *field; + char buf[MAX_BUF_LEN]; + + buf[0] = '\0'; + + for (field = sort_fields; field->name != NULL; field++) { + strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1); + strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1); + buf[MAX_BUF_LEN - 1] = '\0'; + } + + fprintf(stderr, "Available fields: %s\n", buf); +} + /* Display usage information and command line options */ static void usage(void) { printf("Usage: delaytop [Options]\n" "Options:\n" - " -h, --help Show this help message and exit\n" - " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" - " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" - " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" - " -o, --once Display once and exit\n" - " -p, --pid=PID Monitor only the specified PID\n" - " -C, --container=PATH Monitor the container at specified cgroup path\n"); + " -h, --help Show this help message and exit\n" + " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" + " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" + " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" + " -o, --once Display once and exit\n" + " -p, --pid=PID Monitor only the specified PID\n" + " -C, --container=PATH Monitor the container at specified cgroup path\n" + " -s, --sort=FIELD Sort by delay field (default: cpu)\n"); exit(0); } @@ -177,6 +243,7 @@ static void usage(void) static void parse_args(int argc, char **argv) { int c; + const struct field_desc *field; struct option long_options[] = { {"help", no_argument, 0, 'h'}, {"delay", required_argument, 0, 'd'}, @@ -184,6 +251,7 @@ static void parse_args(int argc, char **argv) {"pid", required_argument, 0, 'p'}, {"once", no_argument, 0, 'o'}, {"processes", required_argument, 0, 'P'}, + {"sort", required_argument, 0, 's'}, {"container", required_argument, 0, 'C'}, {0, 0, 0, 0} }; @@ -192,7 +260,7 @@ static void parse_args(int argc, char **argv) cfg.delay = 2; cfg.iterations = 0; cfg.max_processes = 20; - cfg.sort_field = 'c'; /* Default sort by CPU delay */ + cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */ cfg.output_one_time = 0; cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ cfg.container_path = NULL; @@ -200,7 +268,7 @@ static void parse_args(int argc, char **argv) while (1) { int option_index = 0; - c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index); + c = getopt_long(argc, argv, "hd:n:p:oP:C:s:", long_options, &option_index); if (c == -1) break; @@ -247,6 +315,22 @@ static void parse_args(int argc, char **argv) case 'C': cfg.container_path = strdup(optarg); break; + case 's': + if (strlen(optarg) == 0) { + fprintf(stderr, "Error: empty sort field\n"); + exit(1); + } + + field = get_field_by_name(optarg); + /* Show available fields if invalid option provided */ + if (!field) { + fprintf(stderr, "Error: invalid sort field '%s'\n", optarg); + display_available_fields(); + exit(1); + } + + cfg.sort_field = field; + break; default: fprintf(stderr, "Try 'delaytop --help' for more information.\n"); exit(1); @@ -587,19 +671,23 @@ static int compare_tasks(const void *a, const void *b) { const struct task_info *t1 = (const struct task_info *)a; const struct task_info *t2 = (const struct task_info *)b; + unsigned long long total1; + unsigned long long total2; + unsigned long count1; + unsigned long count2; double avg1, avg2; - switch (cfg.sort_field) { - case 'c': /* CPU */ - avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count); - avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count); - if (avg1 != avg2) - return avg2 > avg1 ? 1 : -1; - return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; + total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset); + total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset); + count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset); + count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset); - default: - return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; - } + avg1 = average_ms(total1, count1); + avg2 = average_ms(total2, count2); + if (avg1 != avg2) + return avg2 > avg1 ? 1 : -1; + + return 0; } /* Sort tasks by selected field */ @@ -738,8 +826,9 @@ static void display_results(void) container_stats.nr_stopped, container_stats.nr_uninterruptible, container_stats.nr_io_wait); } - suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n", - cfg.max_processes); + /* Task delay output */ + suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n", + cfg.max_processes, get_name_by_field(cfg.sort_field)); suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND"); suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n", "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)", -- cgit v1.2.3 From 99d9c55f88e69ebbfc90e05ce7c320bdb3901d03 Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Sun, 7 Sep 2025 00:12:52 +0800 Subject: tools/delaytop: add memory verbose mode support The original delaytop tool always displayed detailed memory subsystem breakdown, which could be overwhelming for users who only need high-level overview. Add flexible display control allowing users to choose their preferred information granularity. The new flexibility provides: 1) For quick monitoring: use normal mode to reduce visual clutter 2) For deep analysis: use verbose mode to see all memory subsystem details Link: https://lkml.kernel.org/r/202509070012527934u0ySb3teQ4gOYKnocyNO@zte.com.cn Signed-off-by: Fan Yu Reviewed-by: xu xin Signed-off-by: Andrew Morton --- tools/accounting/delaytop.c | 128 +++++++++++++++++++++++++++++++++----------- 1 file changed, 98 insertions(+), 30 deletions(-) (limited to 'tools') diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 52718714496b..30dc95fb531a 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -69,13 +69,22 @@ int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ ret >= 0; \ }) +#define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count) #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" -#define SORT_FIELD(name) \ +#define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n" +#define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n" +#define SORT_FIELD(name, modes) \ {#name, \ offsetof(struct task_info, name##_delay_total), \ - offsetof(struct task_info, name##_count)} + offsetof(struct task_info, name##_count), \ + modes} #define END_FIELD {NULL, 0, 0} +/* Display mode types */ +#define MODE_TYPE_ALL (0xFFFFFFFF) +#define MODE_DEFAULT (1 << 0) +#define MODE_MEMVERBOSE (1 << 1) + /* PSI statistics structure */ struct psi_stats { double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; @@ -115,6 +124,8 @@ struct task_info { unsigned long long wpcopy_delay_total; unsigned long long irq_count; unsigned long long irq_delay_total; + unsigned long long mem_count; + unsigned long long mem_delay_total; }; /* Container statistics structure */ @@ -131,6 +142,7 @@ struct field_desc { const char *name; /* Field name for cmdline argument */ unsigned long total_offset; /* Offset of total delay in task_info */ unsigned long count_offset; /* Offset of count in task_info */ + size_t supported_modes; /* Supported display modes */ }; /* Program settings structure */ @@ -142,6 +154,7 @@ struct config { int monitor_pid; /* Monitor specific PID */ char *container_path; /* Path to container cgroup */ const struct field_desc *sort_field; /* Current sort field */ + size_t display_mode; /* Current display mode */ }; /* Global variables */ @@ -152,14 +165,15 @@ static int task_count; static int running = 1; static struct container_stats container_stats; static const struct field_desc sort_fields[] = { - SORT_FIELD(cpu), - SORT_FIELD(blkio), - SORT_FIELD(irq), - SORT_FIELD(swapin), - SORT_FIELD(freepages), - SORT_FIELD(thrashing), - SORT_FIELD(compact), - SORT_FIELD(wpcopy), + SORT_FIELD(cpu, MODE_DEFAULT), + SORT_FIELD(blkio, MODE_DEFAULT), + SORT_FIELD(irq, MODE_DEFAULT), + SORT_FIELD(mem, MODE_DEFAULT | MODE_MEMVERBOSE), + SORT_FIELD(swapin, MODE_MEMVERBOSE), + SORT_FIELD(freepages, MODE_MEMVERBOSE), + SORT_FIELD(thrashing, MODE_MEMVERBOSE), + SORT_FIELD(compact, MODE_MEMVERBOSE), + SORT_FIELD(wpcopy, MODE_MEMVERBOSE), END_FIELD }; @@ -207,7 +221,7 @@ static const char *get_name_by_field(const struct field_desc *field) } /* Generate string of available field names */ -static void display_available_fields(void) +static void display_available_fields(size_t mode) { const struct field_desc *field; char buf[MAX_BUF_LEN]; @@ -215,6 +229,8 @@ static void display_available_fields(void) buf[0] = '\0'; for (field = sort_fields; field->name != NULL; field++) { + if (!(field->supported_modes & mode)) + continue; strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1); strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1); buf[MAX_BUF_LEN - 1] = '\0'; @@ -235,7 +251,8 @@ static void usage(void) " -o, --once Display once and exit\n" " -p, --pid=PID Monitor only the specified PID\n" " -C, --container=PATH Monitor the container at specified cgroup path\n" - " -s, --sort=FIELD Sort by delay field (default: cpu)\n"); + " -s, --sort=FIELD Sort by delay field (default: cpu)\n" + " -M, --memverbose Display memory detailed information\n"); exit(0); } @@ -253,6 +270,7 @@ static void parse_args(int argc, char **argv) {"processes", required_argument, 0, 'P'}, {"sort", required_argument, 0, 's'}, {"container", required_argument, 0, 'C'}, + {"memverbose", no_argument, 0, 'M'}, {0, 0, 0, 0} }; @@ -264,11 +282,12 @@ static void parse_args(int argc, char **argv) cfg.output_one_time = 0; cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ cfg.container_path = NULL; + cfg.display_mode = MODE_DEFAULT; while (1) { int option_index = 0; - c = getopt_long(argc, argv, "hd:n:p:oP:C:s:", long_options, &option_index); + c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index); if (c == -1) break; @@ -325,12 +344,16 @@ static void parse_args(int argc, char **argv) /* Show available fields if invalid option provided */ if (!field) { fprintf(stderr, "Error: invalid sort field '%s'\n", optarg); - display_available_fields(); + display_available_fields(MODE_TYPE_ALL); exit(1); } cfg.sort_field = field; break; + case 'M': + cfg.display_mode = MODE_MEMVERBOSE; + cfg.sort_field = get_field_by_name("mem"); + break; default: fprintf(stderr, "Try 'delaytop --help' for more information.\n"); exit(1); @@ -338,6 +361,25 @@ static void parse_args(int argc, char **argv) } } +/* Calculate average delay in milliseconds for overall memory */ +static void set_mem_delay_total(struct task_info *t) +{ + t->mem_delay_total = t->swapin_delay_total + + t->freepages_delay_total + + t->thrashing_delay_total + + t->compact_delay_total + + t->wpcopy_delay_total; +} + +static void set_mem_count(struct task_info *t) +{ + t->mem_count = t->swapin_count + + t->freepages_count + + t->thrashing_count + + t->compact_count + + t->wpcopy_count; +} + /* Create a raw netlink socket and bind */ static int create_nl_socket(void) { @@ -611,6 +653,8 @@ static void fetch_and_fill_task_info(int pid, const char *comm) SET_TASK_STAT(task_count, wpcopy_delay_total); SET_TASK_STAT(task_count, irq_count); SET_TASK_STAT(task_count, irq_delay_total); + set_mem_count(&tasks[task_count]); + set_mem_delay_total(&tasks[task_count]); task_count++; } break; @@ -829,27 +873,44 @@ static void display_results(void) /* Task delay output */ suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n", cfg.max_processes, get_name_by_field(cfg.sort_field)); - suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND"); - suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n", - "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)", - "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)"); - suc &= BOOL_FPRINT(out, "-----------------------------------------------"); - suc &= BOOL_FPRINT(out, "----------------------------------------------\n"); + suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND"); + if (cfg.display_mode == MODE_MEMVERBOSE) { + suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n", + "MEM(ms)", "SWAP(ms)", "RCL(ms)", + "THR(ms)", "CMP(ms)", "WP(ms)"); + suc &= BOOL_FPRINT(out, "-----------------------"); + suc &= BOOL_FPRINT(out, "-----------------------"); + suc &= BOOL_FPRINT(out, "-----------------------"); + suc &= BOOL_FPRINT(out, "---------------------\n"); + } else { + suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n", + "CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)"); + suc &= BOOL_FPRINT(out, "-----------------------"); + suc &= BOOL_FPRINT(out, "-----------------------"); + suc &= BOOL_FPRINT(out, "--------------------------\n"); + } + count = task_count < cfg.max_processes ? task_count : cfg.max_processes; for (i = 0; i < count; i++) { - suc &= BOOL_FPRINT(out, "%5d %5d %-15s", + suc &= BOOL_FPRINT(out, "%8d %8d %-15s", tasks[i].pid, tasks[i].tgid, tasks[i].command); - suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", - average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), - average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), - average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), - average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count), - average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count), - average_ms(tasks[i].compact_delay_total, tasks[i].compact_count), - average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count), - average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); + if (cfg.display_mode == MODE_MEMVERBOSE) { + suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE, + TASK_AVG(tasks[i], mem), + TASK_AVG(tasks[i], swapin), + TASK_AVG(tasks[i], freepages), + TASK_AVG(tasks[i], thrashing), + TASK_AVG(tasks[i], compact), + TASK_AVG(tasks[i], wpcopy)); + } else { + suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT, + TASK_AVG(tasks[i], cpu), + TASK_AVG(tasks[i], blkio), + TASK_AVG(tasks[i], irq), + TASK_AVG(tasks[i], mem)); + } } suc &= BOOL_FPRINT(out, "\n"); @@ -891,6 +952,13 @@ int main(int argc, char **argv) /* Main loop */ while (running) { + /* Exit when sort field do not match display mode */ + if (!(cfg.sort_field->supported_modes & cfg.display_mode)) { + fprintf(stderr, "Sort field not supported in this mode\n"); + display_available_fields(cfg.display_mode); + break; + } + /* Read PSI statistics */ read_psi_stats(); -- cgit v1.2.3 From 5e57515d81f9003555b7a4d246e02f1ee9c74ffa Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Sun, 7 Sep 2025 00:13:38 +0800 Subject: tools/delaytop: add interactive mode with keyboard controls The original delaytop only supported static output with limited interaction. Users had to restart the tool with different command-line options to change sorting or display modes, which disrupted continuous monitoring and reduced productivity during performance investigations. Adds real-time interactive controls through keyboard input: 1) Add interactive menu system with visual prompts 2) Support dynamic sorting changes without restarting 3) Enable toggle of memory verbose mode with 'M' key The interactive mode transforms delaytop from a static monitoring tool into a dynamic investigation platform, allowing users to adapt the view in real-time based on observed performance patterns. Link: https://lkml.kernel.org/r/20250907001338580EURha20BxWFmBSrUpS8D1@zte.com.cn Signed-off-by: Fan Yu Reviewed-by: xu xin Signed-off-by: Andrew Morton --- tools/accounting/delaytop.c | 166 ++++++++++++++++++++++++++++++++------------ 1 file changed, 121 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 30dc95fb531a..7bd1a1eeb354 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -73,8 +73,8 @@ #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" #define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n" #define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n" -#define SORT_FIELD(name, modes) \ - {#name, \ +#define SORT_FIELD(name, cmd, modes) \ + {#name, #cmd, \ offsetof(struct task_info, name##_delay_total), \ offsetof(struct task_info, name##_count), \ modes} @@ -140,6 +140,7 @@ struct container_stats { /* Delay field structure */ struct field_desc { const char *name; /* Field name for cmdline argument */ + const char *cmd_char; /* Interactive command */ unsigned long total_offset; /* Offset of total delay in task_info */ unsigned long count_offset; /* Offset of count in task_info */ size_t supported_modes; /* Supported display modes */ @@ -165,17 +166,18 @@ static int task_count; static int running = 1; static struct container_stats container_stats; static const struct field_desc sort_fields[] = { - SORT_FIELD(cpu, MODE_DEFAULT), - SORT_FIELD(blkio, MODE_DEFAULT), - SORT_FIELD(irq, MODE_DEFAULT), - SORT_FIELD(mem, MODE_DEFAULT | MODE_MEMVERBOSE), - SORT_FIELD(swapin, MODE_MEMVERBOSE), - SORT_FIELD(freepages, MODE_MEMVERBOSE), - SORT_FIELD(thrashing, MODE_MEMVERBOSE), - SORT_FIELD(compact, MODE_MEMVERBOSE), - SORT_FIELD(wpcopy, MODE_MEMVERBOSE), + SORT_FIELD(cpu, c, MODE_DEFAULT), + SORT_FIELD(blkio, i, MODE_DEFAULT), + SORT_FIELD(irq, q, MODE_DEFAULT), + SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE), + SORT_FIELD(swapin, s, MODE_MEMVERBOSE), + SORT_FIELD(freepages, r, MODE_MEMVERBOSE), + SORT_FIELD(thrashing, t, MODE_MEMVERBOSE), + SORT_FIELD(compact, p, MODE_MEMVERBOSE), + SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE), END_FIELD }; +static int sort_selected; /* Netlink socket variables */ static int nl_sd = -1; @@ -197,6 +199,19 @@ static void disable_raw_mode(void) tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); } +/* Find field descriptor by command line */ +static const struct field_desc *get_field_by_cmd_char(char ch) +{ + const struct field_desc *field; + + for (field = sort_fields; field->name != NULL; field++) { + if (field->cmd_char[0] == ch) + return field; + } + + return NULL; +} + /* Find field descriptor by name with string comparison */ static const struct field_desc *get_field_by_name(const char *name) { @@ -870,6 +885,18 @@ static void display_results(void) container_stats.nr_stopped, container_stats.nr_uninterruptible, container_stats.nr_io_wait); } + + /* Interacive command */ + suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n"); + if (sort_selected) { + if (cfg.display_mode == MODE_MEMVERBOSE) + suc &= BOOL_FPRINT(out, + "sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n"); + else + suc &= BOOL_FPRINT(out, + "sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n"); + } + /* Task delay output */ suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n", cfg.max_processes, get_name_by_field(cfg.sort_field)); @@ -919,11 +946,78 @@ static void display_results(void) perror("Error writing to output"); } +/* Check for keyboard input with timeout based on cfg.delay */ +static char check_for_keypress(void) +{ + struct timeval tv = {cfg.delay, 0}; + fd_set readfds; + char ch = 0; + + FD_ZERO(&readfds); + FD_SET(STDIN_FILENO, &readfds); + int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv); + + if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { + read(STDIN_FILENO, &ch, 1); + return ch; + } + + return 0; +} + +#define MAX_MODE_SIZE 2 +static void toggle_display_mode(void) +{ + static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE}; + static size_t cur_index; + + cur_index = (cur_index + 1) % MAX_MODE_SIZE; + cfg.display_mode = modes[cur_index]; +} + +/* Handle keyboard input: sorting selection, mode toggle, or quit */ +static void handle_keypress(char ch, int *running) +{ + const struct field_desc *field; + + /* Change sort field */ + if (sort_selected) { + field = get_field_by_cmd_char(ch); + if (field && (field->supported_modes & cfg.display_mode)) + cfg.sort_field = field; + + sort_selected = 0; + /* Handle mode changes or quit */ + } else { + switch (ch) { + case 'o': + sort_selected = 1; + break; + case 'M': + toggle_display_mode(); + for (field = sort_fields; field->name != NULL; field++) { + if (field->supported_modes & cfg.display_mode) { + cfg.sort_field = field; + break; + } + } + break; + case 'q': + case 'Q': + *running = 0; + break; + default: + break; + } + } +} + /* Main function */ int main(int argc, char **argv) { + const struct field_desc *field; int iterations = 0; - int use_q_quit = 0; + char keypress; /* Parse command line arguments */ parse_args(argc, argv); @@ -943,20 +1037,20 @@ int main(int argc, char **argv) exit(1); } - if (!cfg.output_one_time) { - use_q_quit = 1; - enable_raw_mode(); - printf("Press 'q' to quit.\n"); - fflush(stdout); - } + /* Set terminal to non-canonical mode for interaction */ + enable_raw_mode(); /* Main loop */ while (running) { - /* Exit when sort field do not match display mode */ + /* Auto-switch sort field when not matching display mode */ if (!(cfg.sort_field->supported_modes & cfg.display_mode)) { - fprintf(stderr, "Sort field not supported in this mode\n"); - display_available_fields(cfg.display_mode); - break; + for (field = sort_fields; field->name != NULL; field++) { + if (field->supported_modes & cfg.display_mode) { + cfg.sort_field = field; + printf("Auto-switched sort field to: %s\n", field->name); + break; + } + } } /* Read PSI statistics */ @@ -983,32 +1077,14 @@ int main(int argc, char **argv) if (cfg.output_one_time) break; - /* Check for 'q' key to quit */ - if (use_q_quit) { - struct timeval tv = {cfg.delay, 0}; - fd_set readfds; - - FD_ZERO(&readfds); - FD_SET(STDIN_FILENO, &readfds); - int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv); - - if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { - char ch = 0; - - read(STDIN_FILENO, &ch, 1); - if (ch == 'q' || ch == 'Q') { - running = 0; - break; - } - } - } else { - sleep(cfg.delay); - } + /* Keypress for interactive usage */ + keypress = check_for_keypress(); + if (keypress) + handle_keypress(keypress, &running); } /* Restore terminal mode */ - if (use_q_quit) - disable_raw_mode(); + disable_raw_mode(); /* Cleanup */ close(nl_sd); -- cgit v1.2.3 From 0c10f9cd812f6f32dca928010e132a7d89812666 Mon Sep 17 00:00:00 2001 From: Fan Yu Date: Sun, 7 Sep 2025 00:14:17 +0800 Subject: tools/delaytop: improve error handling for missing PSI support Enhanced display logic to conditionally show PSI information only when successfully read, with helpful guidance for users to enable PSI support (psi=1 cmdline parameter). Link: https://lkml.kernel.org/r/20250907001417537vSx6nUsb3ILqI0iQ-WnGp@zte.com.cn Signed-off-by: Fan Yu Reviewed-by: xu xin Signed-off-by: Andrew Morton --- tools/accounting/delaytop.c | 182 ++++++++++++++++++++++++++++---------------- 1 file changed, 116 insertions(+), 66 deletions(-) (limited to 'tools') diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c index 7bd1a1eeb354..72cc500b44b1 100644 --- a/tools/accounting/delaytop.c +++ b/tools/accounting/delaytop.c @@ -44,13 +44,11 @@ #include #include -#define PSI_CPU_SOME "/proc/pressure/cpu" -#define PSI_CPU_FULL "/proc/pressure/cpu" -#define PSI_MEMORY_SOME "/proc/pressure/memory" -#define PSI_MEMORY_FULL "/proc/pressure/memory" -#define PSI_IO_SOME "/proc/pressure/io" -#define PSI_IO_FULL "/proc/pressure/io" -#define PSI_IRQ_FULL "/proc/pressure/irq" +#define PSI_PATH "/proc/pressure" +#define PSI_CPU_PATH "/proc/pressure/cpu" +#define PSI_MEMORY_PATH "/proc/pressure/memory" +#define PSI_IO_PATH "/proc/pressure/io" +#define PSI_IRQ_PATH "/proc/pressure/irq" #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) @@ -499,87 +497,134 @@ static int get_family_id(int sd) return id; } -static void read_psi_stats(void) +static int read_psi_stats(void) { FILE *fp; char line[256]; int ret = 0; + int error_count = 0; + + /* Check if PSI path exists */ + if (access(PSI_PATH, F_OK) != 0) { + fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH); + fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n"); + return -1; + } + /* Zero all fields */ memset(&psi, 0, sizeof(psi)); + /* CPU pressure */ - fp = fopen(PSI_CPU_SOME, "r"); + fp = fopen(PSI_CPU_PATH, "r"); if (fp) { while (fgets(line, sizeof(line), fp)) { if (strncmp(line, "some", 4) == 0) { ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.cpu_some_avg10, &psi.cpu_some_avg60, &psi.cpu_some_avg300, &psi.cpu_some_total); - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse CPU some PSI data\n"); + error_count++; + } } else if (strncmp(line, "full", 4) == 0) { ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.cpu_full_avg10, &psi.cpu_full_avg60, &psi.cpu_full_avg300, &psi.cpu_full_total); - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse CPU full PSI data\n"); + error_count++; + } } } fclose(fp); + } else { + fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH); + error_count++; } + /* Memory pressure */ - fp = fopen(PSI_MEMORY_SOME, "r"); + fp = fopen(PSI_MEMORY_PATH, "r"); if (fp) { while (fgets(line, sizeof(line), fp)) { if (strncmp(line, "some", 4) == 0) { ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.memory_some_avg10, &psi.memory_some_avg60, &psi.memory_some_avg300, &psi.memory_some_total); - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse Memory some PSI data\n"); + error_count++; + } } else if (strncmp(line, "full", 4) == 0) { ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.memory_full_avg10, &psi.memory_full_avg60, &psi.memory_full_avg300, &psi.memory_full_total); - } - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse Memory full PSI data\n"); + error_count++; + } + } } fclose(fp); + } else { + fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH); + error_count++; } + /* IO pressure */ - fp = fopen(PSI_IO_SOME, "r"); + fp = fopen(PSI_IO_PATH, "r"); if (fp) { while (fgets(line, sizeof(line), fp)) { if (strncmp(line, "some", 4) == 0) { ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.io_some_avg10, &psi.io_some_avg60, &psi.io_some_avg300, &psi.io_some_total); - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse IO some PSI data\n"); + error_count++; + } } else if (strncmp(line, "full", 4) == 0) { ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.io_full_avg10, &psi.io_full_avg60, &psi.io_full_avg300, &psi.io_full_total); - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse IO full PSI data\n"); + error_count++; + } } } fclose(fp); + } else { + fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH); + error_count++; } + /* IRQ pressure (only full) */ - fp = fopen(PSI_IRQ_FULL, "r"); + fp = fopen(PSI_IRQ_PATH, "r"); if (fp) { while (fgets(line, sizeof(line), fp)) { if (strncmp(line, "full", 4) == 0) { ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", &psi.irq_full_avg10, &psi.irq_full_avg60, &psi.irq_full_avg300, &psi.irq_full_total); - if (ret != 4) + if (ret != 4) { fprintf(stderr, "Failed to parse IRQ full PSI data\n"); + error_count++; + } } } fclose(fp); + } else { + fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH); + error_count++; } + + /* Return error count: 0 means success, >0 means warnings, -1 means fatal error */ + if (error_count > 0) { + fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count); + return error_count; + } + + return 0; } static int read_comm(int pid, char *comm_buf, size_t buf_size) @@ -820,7 +865,7 @@ static void get_container_stats(void) } /* Display results to stdout or log file */ -static void display_results(void) +static void display_results(int psi_ret) { time_t now = time(NULL); struct tm *tm_now = localtime(&now); @@ -833,49 +878,53 @@ static void display_results(void) suc &= BOOL_FPRINT(out, "\033[H\033[J"); /* PSI output (one-line, no cat style) */ - suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "CPU some:", - psi.cpu_some_avg10, - psi.cpu_some_avg60, - psi.cpu_some_avg300, - psi.cpu_some_total / 1000); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "CPU full:", - psi.cpu_full_avg10, - psi.cpu_full_avg60, - psi.cpu_full_avg300, - psi.cpu_full_total / 1000); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "Memory full:", - psi.memory_full_avg10, - psi.memory_full_avg60, - psi.memory_full_avg300, - psi.memory_full_total / 1000); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "Memory some:", - psi.memory_some_avg10, - psi.memory_some_avg60, - psi.memory_some_avg300, - psi.memory_some_total / 1000); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "IO full:", - psi.io_full_avg10, - psi.io_full_avg60, - psi.io_full_avg300, - psi.io_full_total / 1000); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "IO some:", - psi.io_some_avg10, - psi.io_some_avg60, - psi.io_some_avg300, - psi.io_some_total / 1000); - suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, - "IRQ full:", - psi.irq_full_avg10, - psi.irq_full_avg60, - psi.irq_full_avg300, - psi.irq_full_total / 1000); + suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n"); + if (psi_ret) { + suc &= BOOL_FPRINT(out, " PSI not found: check if psi=1 enabled in cmdline\n"); + } else { + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "CPU some:", + psi.cpu_some_avg10, + psi.cpu_some_avg60, + psi.cpu_some_avg300, + psi.cpu_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "CPU full:", + psi.cpu_full_avg10, + psi.cpu_full_avg60, + psi.cpu_full_avg300, + psi.cpu_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "Memory full:", + psi.memory_full_avg10, + psi.memory_full_avg60, + psi.memory_full_avg300, + psi.memory_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "Memory some:", + psi.memory_some_avg10, + psi.memory_some_avg60, + psi.memory_some_avg300, + psi.memory_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IO full:", + psi.io_full_avg10, + psi.io_full_avg60, + psi.io_full_avg300, + psi.io_full_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IO some:", + psi.io_some_avg10, + psi.io_some_avg60, + psi.io_some_avg300, + psi.io_some_total / 1000); + suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, + "IRQ full:", + psi.irq_full_avg10, + psi.irq_full_avg60, + psi.irq_full_avg300, + psi.irq_full_total / 1000); + } if (cfg.container_path) { suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); @@ -1017,6 +1066,7 @@ int main(int argc, char **argv) { const struct field_desc *field; int iterations = 0; + int psi_ret = 0; char keypress; /* Parse command line arguments */ @@ -1054,7 +1104,7 @@ int main(int argc, char **argv) } /* Read PSI statistics */ - read_psi_stats(); + psi_ret = read_psi_stats(); /* Get container stats if container path provided */ if (cfg.container_path) @@ -1067,7 +1117,7 @@ int main(int argc, char **argv) sort_tasks(); /* Display results to stdout or log file */ - display_results(); + display_results(psi_ret); /* Check for iterations */ if (cfg.iterations > 0 && ++iterations >= cfg.iterations) -- cgit v1.2.3