From 0e0af57e0e91b304f36b7d1dba859e3c04094273 Mon Sep 17 00:00:00 2001 From: "Dr. Thomas Orgis" Date: Fri, 29 Apr 2022 14:38:03 -0700 Subject: taskstats: version 12 with thread group and exe info The task exit struct needs some crucial information to be able to provide an enhanced version of process and thread accounting. This change provides: 1. ac_tgid in additon to ac_pid 2. thread group execution walltime in ac_tgetime 3. flag AGROUP in ac_flag to indicate the last task in a thread group / process 4. device ID and inode of task's /proc/self/exe in ac_exe_dev and ac_exe_inode 5. tools/accounting/procacct as demonstrator When a task exits, taskstats are reported to userspace including the task's pid and ppid, but without the id of the thread group this task is part of. Without the tgid, the stats of single tasks cannot be correlated to each other as a thread group (process). The taskstats documentation suggests that on process exit a data set consisting of accumulated stats for the whole group is produced. But such an additional set of stats is only produced for actually multithreaded processes, not groups that had only one thread, and also those stats only contain data about delay accounting and not the more basic information about CPU and memory resource usage. Adding the AGROUP flag to be set when the last task of a group exited enables determination of process end also for single-threaded processes. My applicaton basically does enhanced process accounting with summed cputime, biggest maxrss, tasks per process. The data is not available with the traditional BSD process accounting (which is not designed to be extensible) and the taskstats interface allows more efficient on-the-fly grouping and summing of the stats, anyway, without intermediate disk writes. Furthermore, I do carry statistics on which exact program binary is used how often with associated resources, getting a picture on how important which parts of a collection of installed scientific software in different versions are, and how well they put load on the machine. This is enabled by providing information on /proc/self/exe for each task. I assume the two 64-bit fields for device ID and inode are more appropriate than the possibly large resolved path to keep the data volume down. Add the tgid to the stats to complete task identification, the flag AGROUP to mark the last task of a group, the group wallclock time, and inode-based identification of the associated executable file. Add tools/accounting/procacct.c as a simplified fork of getdelays.c to demonstrate process and thread accounting. [thomas.orgis@uni-hamburg.de: fix version number in comment] Link: https://lkml.kernel.org/r/20220405003601.7a5f6008@plasteblaster Link: https://lkml.kernel.org/r/20220331004106.64e5616b@plasteblaster Signed-off-by: Dr. Thomas Orgis Reviewed-by: Ismael Luceno Cc: Balbir Singh Cc: Eric W. Biederman Cc: xu xin Cc: Yang Yang Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux/taskstats.h') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 12327d32378f..736154171489 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 11 +#define TASKSTATS_VERSION 12 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -48,7 +48,8 @@ struct taskstats { __u32 ac_exitcode; /* Exit status */ /* The accounting flags of a task as defined in - * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG. + * Defined values are AFORK, ASU, ACOMPAT, ACORE, AXSIG, and AGROUP. + * (AGROUP since version 12). */ __u8 ac_flag; /* Record flags */ __u8 ac_nice; /* task_nice */ @@ -173,9 +174,26 @@ struct taskstats { /* v10: 64-bit btime to avoid overflow */ __u64 ac_btime64; /* 64-bit begin time */ - /* Delay waiting for memory compact */ + /* v11: Delay waiting for memory compact */ __u64 compact_count; __u64 compact_delay_total; + + /* v12 begin */ + __u32 ac_tgid; /* thread group ID */ + /* Thread group walltime up to now. This is total process walltime if + * AGROUP flag is set. + */ + __u64 ac_tgetime __attribute__((aligned(8))); + /* Lightweight information to identify process binary files. + * This leaves userspace to match this to a file system path, using + * MAJOR() and MINOR() macros to identify a device and mount point, + * the inode to identify the executable file. This is /proc/self/exe + * at the end, so matching the most recent exec(). Values are zero + * for kernel threads. + */ + __u64 ac_exe_dev; /* program binary device ID */ + __u64 ac_exe_inode; /* program binary inode number */ + /* v12 end */ }; -- cgit v1.2.3 From 662ce1dc9caf493c309200edbe38d186f1ea20d0 Mon Sep 17 00:00:00 2001 From: Yang Yang Date: Wed, 1 Jun 2022 15:55:25 -0700 Subject: delayacct: track delays from write-protect copy Delay accounting does not track the delay of write-protect copy. When tasks trigger many write-protect copys(include COW and unsharing of anonymous pages[1]), it may spend a amount of time waiting for them. To get the delay of tasks in write-protect copy, could help users to evaluate the impact of using KSM or fork() or GUP. Also update tools/accounting/getdelays.c: / # ./getdelays -dl -p 231 print delayacct stats ON listen forever PID 231 CPU count real total virtual total delay total delay average 6247 1859000000 2154070021 1674255063 0.268ms IO count delay total delay average 0 0 0ms SWAP count delay total delay average 0 0 0ms RECLAIM count delay total delay average 0 0 0ms THRASHING count delay total delay average 0 0 0ms COMPACT count delay total delay average 3 72758 0ms WPCOPY count delay total delay average 3635 271567604 0ms [1] commit 31cc5bc4af70("mm: support GUP-triggered unsharing of anonymous pages") Link: https://lkml.kernel.org/r/20220409014342.2505532-1-yang.yang29@zte.com.cn Signed-off-by: Yang Yang Reviewed-by: David Hildenbrand Reviewed-by: Jiang Xuexin Reviewed-by: Ran Xiaokai Reviewed-by: wangyong Cc: Jonathan Corbet Cc: Balbir Singh Cc: Mike Kravetz Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- include/uapi/linux/taskstats.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux/taskstats.h') diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h index 736154171489..a7f5b11a8f1b 100644 --- a/include/uapi/linux/taskstats.h +++ b/include/uapi/linux/taskstats.h @@ -34,7 +34,7 @@ */ -#define TASKSTATS_VERSION 12 +#define TASKSTATS_VERSION 13 #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN * in linux/sched.h */ @@ -194,6 +194,10 @@ struct taskstats { __u64 ac_exe_dev; /* program binary device ID */ __u64 ac_exe_inode; /* program binary inode number */ /* v12 end */ + + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; }; -- cgit v1.2.3