diff options
author | Nathan Zimmer <nzimmer@sgi.com> | 2013-02-21 15:15:08 -0800 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2013-02-22 10:27:24 +0100 |
commit | cb152ff26717961b10d0888cd983ba284cb99cd1 (patch) | |
tree | fa23de9f176321a54e60529e21389f1ac930bc30 /kernel | |
parent | 1c3e826482ab698e418c7a894440e62c76aac893 (diff) |
sched: Fix /proc/sched_stat failure on very very large systems
On systems with 4096 cores doing a cat /proc/sched_stat fails,
because we are trying to push all the data into a single kmalloc
buffer.
The issue is on these very large machines all the data will not
fit in 4mb.
A better solution is to not use the single_open() mechanism but
to provide our own seq_operations.
The output should be identical to previous version and thus not
need the version number.
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Nathan Zimmer <nzimmer@sgi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
[ Fix memleak]
[ Fix spello in comment]
[ Fix warnings]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/stats.c | 79 |
1 files changed, 59 insertions, 20 deletions
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 903ffa9e8872..e036eda1a9c9 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -21,14 +21,17 @@ static int show_schedstat(struct seq_file *seq, void *v) if (mask_str == NULL) return -ENOMEM; - seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); - seq_printf(seq, "timestamp %lu\n", jiffies); - for_each_online_cpu(cpu) { - struct rq *rq = cpu_rq(cpu); + if (v == (void *)1) { + seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); + seq_printf(seq, "timestamp %lu\n", jiffies); + } else { + struct rq *rq; #ifdef CONFIG_SMP struct sched_domain *sd; int dcount = 0; #endif + cpu = (unsigned long)(v - 2); + rq = cpu_rq(cpu); /* runqueue-specific stats */ seq_printf(seq, @@ -77,30 +80,66 @@ static int show_schedstat(struct seq_file *seq, void *v) return 0; } -static int schedstat_open(struct inode *inode, struct file *file) +/* + * This itererator needs some explanation. + * It returns 1 for the header position. + * This means 2 is cpu 0. + * In a hotplugged system some cpus, including cpu 0, may be missing so we have + * to use cpumask_* to iterate over the cpus. + */ +static void *schedstat_start(struct seq_file *file, loff_t *offset) { - unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); - char *buf = kmalloc(size, GFP_KERNEL); - struct seq_file *m; - int res; + unsigned long n = *offset; - if (!buf) - return -ENOMEM; - res = single_open(file, show_schedstat, NULL); - if (!res) { - m = file->private_data; - m->buf = buf; - m->size = size; - } else - kfree(buf); - return res; + if (n == 0) + return (void *) 1; + + n--; + + if (n > 0) + n = cpumask_next(n - 1, cpu_online_mask); + else + n = cpumask_first(cpu_online_mask); + + *offset = n + 1; + + if (n < nr_cpu_ids) + return (void *)(unsigned long)(n + 2); + return NULL; +} + +static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) +{ + (*offset)++; + return schedstat_start(file, offset); +} + +static void schedstat_stop(struct seq_file *file, void *data) +{ +} + +static const struct seq_operations schedstat_sops = { + .start = schedstat_start, + .next = schedstat_next, + .stop = schedstat_stop, + .show = show_schedstat, +}; + +static int schedstat_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &schedstat_sops); } +static int schedstat_release(struct inode *inode, struct file *file) +{ + return 0; +}; + static const struct file_operations proc_schedstat_operations = { .open = schedstat_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = schedstat_release, }; static int __init proc_schedstat_init(void) |