From c757249af152c59fd74b85e52e8c090acb33d9c0 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:40 -0700 Subject: [PATCH] per-task-delay-accounting: taskstats interface Create a "taskstats" interface based on generic netlink (NETLINK_GENERIC family), for getting statistics of tasks and thread groups during their lifetime and when they exit. The interface is intended for use by multiple accounting packages though it is being created in the context of delay accounting. This patch creates the interface without populating the fields of the data that is sent to the user in response to a command or upon the exit of a task. Each accounting package interested in using taskstats has to provide an additional patch to add its stats to the common structure. [akpm@osdl.org: cleanups, Kconfig fix] Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Cc: Jes Sorensen Cc: Peter Chubb Cc: Erich Focht Cc: Levent Serinol Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 include/linux/taskstats.h (limited to 'include/linux/taskstats.h') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h new file mode 100644 index 000000000000..51f62759bea9 --- /dev/null +++ b/include/linux/taskstats.h @@ -0,0 +1,84 @@ +/* taskstats.h - exporting per-task statistics + * + * Copyright (C) Shailabh Nagar, IBM Corp. 2006 + * (C) Balbir Singh, IBM Corp. 2006 + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + */ + +#ifndef _LINUX_TASKSTATS_H +#define _LINUX_TASKSTATS_H + +/* Format for per-task data returned to userland when + * - a task exits + * - listener requests stats for a task + * + * The struct is versioned. Newer versions should only add fields to + * the bottom of the struct to maintain backward compatibility. + * + * + * To add new fields + * a) bump up TASKSTATS_VERSION + * b) add comment indicating new version number at end of struct + * c) add new fields after version comment; maintain 64-bit alignment + */ + +#define TASKSTATS_VERSION 1 + +struct taskstats { + + /* Version 1 */ + __u64 version; +}; + + +#define TASKSTATS_LISTEN_GROUP 0x1 + +/* + * Commands sent from userspace + * Not versioned. New commands should only be inserted at the enum's end + * prior to __TASKSTATS_CMD_MAX + */ + +enum { + TASKSTATS_CMD_UNSPEC = 0, /* Reserved */ + TASKSTATS_CMD_GET, /* user->kernel request/get-response */ + TASKSTATS_CMD_NEW, /* kernel->user event */ + __TASKSTATS_CMD_MAX, +}; + +#define TASKSTATS_CMD_MAX (__TASKSTATS_CMD_MAX - 1) + +enum { + TASKSTATS_TYPE_UNSPEC = 0, /* Reserved */ + TASKSTATS_TYPE_PID, /* Process id */ + TASKSTATS_TYPE_TGID, /* Thread group id */ + TASKSTATS_TYPE_STATS, /* taskstats structure */ + TASKSTATS_TYPE_AGGR_PID, /* contains pid + stats */ + TASKSTATS_TYPE_AGGR_TGID, /* contains tgid + stats */ + __TASKSTATS_TYPE_MAX, +}; + +#define TASKSTATS_TYPE_MAX (__TASKSTATS_TYPE_MAX - 1) + +enum { + TASKSTATS_CMD_ATTR_UNSPEC = 0, + TASKSTATS_CMD_ATTR_PID, + TASKSTATS_CMD_ATTR_TGID, + __TASKSTATS_CMD_ATTR_MAX, +}; + +#define TASKSTATS_CMD_ATTR_MAX (__TASKSTATS_CMD_ATTR_MAX - 1) + +/* NETLINK_GENERIC related info */ + +#define TASKSTATS_GENL_NAME "TASKSTATS" +#define TASKSTATS_GENL_VERSION 0x1 + +#endif /* _LINUX_TASKSTATS_H */ -- cgit v1.2.3 From 6f44993fe1d7b2b097f6ac60cd5835c6f5ca0874 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:41 -0700 Subject: [PATCH] per-task-delay-accounting: delay accounting usage of taskstats interface Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Cc: Jes Sorensen Cc: Peter Chubb Cc: Erich Focht Cc: Levent Serinol Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'include/linux/taskstats.h') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index 51f62759bea9..c6aeca32348e 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -34,7 +34,60 @@ struct taskstats { /* Version 1 */ - __u64 version; + __u16 version; + __u16 padding[3]; /* Userspace should not interpret the padding + * field which can be replaced by useful + * fields if struct taskstats is extended. + */ + + /* Delay accounting fields start + * + * All values, until comment "Delay accounting fields end" are + * available only if delay accounting is enabled, even though the last + * few fields are not delays + * + * xxx_count is the number of delay values recorded + * xxx_delay_total is the corresponding cumulative delay in nanoseconds + * + * xxx_delay_total wraps around to zero on overflow + * xxx_count incremented regardless of overflow + */ + + /* Delay waiting for cpu, while runnable + * count, delay_total NOT updated atomically + */ + __u64 cpu_count; + __u64 cpu_delay_total; + + /* Following four fields atomically updated using task->delays->lock */ + + /* Delay waiting for synchronous block I/O to complete + * does not account for delays in I/O submission + */ + __u64 blkio_count; + __u64 blkio_delay_total; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen + * from the kernel in involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_real_total; + + /* cpu "virtual" running time + * Uses time intervals seen by the kernel i.e. no adjustment + * for kernel's involuntary waits due to virtualization. + * Value is cumulative, in nanoseconds, without a corresponding count + * and wraps around to zero silently on overflow + */ + __u64 cpu_run_virtual_total; + /* Delay accounting fields end */ + /* version 1 ends here */ }; -- cgit v1.2.3 From f9fd8914c1acca0d98b69d831b128d5b52f03c51 Mon Sep 17 00:00:00 2001 From: Shailabh Nagar Date: Fri, 14 Jul 2006 00:24:47 -0700 Subject: [PATCH] per-task delay accounting taskstats interface: control exit data through cpumasks On systems with a large number of cpus, with even a modest rate of tasks exiting per cpu, the volume of taskstats data sent on thread exit can overflow a userspace listener's buffers. One approach to avoiding overflow is to allow listeners to get data for a limited and specific set of cpus. By scaling the number of listeners and/or the cpus they monitor, userspace can handle the statistical data overload more gracefully. In this patch, each listener registers to listen to a specific set of cpus by specifying a cpumask. The interest is recorded per-cpu. When a task exits on a cpu, its taskstats data is unicast to each listener interested in that cpu. Thanks to Andrew Morton for pointing out the various scalability and general concerns of previous attempts and for suggesting this design. [akpm@osdl.org: build fix] Signed-off-by: Shailabh Nagar Signed-off-by: Balbir Singh Signed-off-by: Chandra Seetharaman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/taskstats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/taskstats.h') diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h index c6aeca32348e..f1cb6cddd19d 100644 --- a/include/linux/taskstats.h +++ b/include/linux/taskstats.h @@ -91,8 +91,6 @@ struct taskstats { }; -#define TASKSTATS_LISTEN_GROUP 0x1 - /* * Commands sent from userspace * Not versioned. New commands should only be inserted at the enum's end @@ -124,6 +122,8 @@ enum { TASKSTATS_CMD_ATTR_UNSPEC = 0, TASKSTATS_CMD_ATTR_PID, TASKSTATS_CMD_ATTR_TGID, + TASKSTATS_CMD_ATTR_REGISTER_CPUMASK, + TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK, __TASKSTATS_CMD_ATTR_MAX, }; -- cgit v1.2.3