From a4b29ba2f72673aaa60ba11ced74d579771dd578 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:02 +0200 Subject: sched: small sched_debug cleanup small kernel/sched_debug.c cleanup - break up multi-variable assignment. no code changed: text data bss dec hex filename 38869 3550 24 42443 a5cb sched.o.before 38869 3550 24 42443 a5cb sched.o.after Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index c3ee38bd3426..94915f1fd9de 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -279,9 +279,13 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) void proc_sched_set_task(struct task_struct *p) { #ifdef CONFIG_SCHEDSTATS - p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0; - p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0; + p->se.sleep_max = 0; + p->se.block_max = 0; + p->se.exec_max = 0; + p->se.wait_max = 0; + p->se.wait_runtime_overruns = 0; + p->se.wait_runtime_underruns = 0; #endif - p->se.sum_exec_runtime = 0; + p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; } -- cgit v1.2.3 From eba1ed4b7e52720e3099325874811c38a5ec1562 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:02 +0200 Subject: sched: debug: track maximum 'slice' track the maximum amount of time a task has executed while the CPU load was at least 2x. (i.e. at least two nice-0 tasks were runnable) Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 94915f1fd9de..fd080f686f18 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -254,6 +254,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.sleep_max); P(se.block_max); P(se.exec_max); + P(se.slice_max); P(se.wait_max); P(se.wait_runtime_overruns); P(se.wait_runtime_underruns); @@ -282,6 +283,7 @@ void proc_sched_set_task(struct task_struct *p) p->se.sleep_max = 0; p->se.block_max = 0; p->se.exec_max = 0; + p->se.slice_max = 0; p->se.wait_max = 0; p->se.wait_runtime_overruns = 0; p->se.wait_runtime_underruns = 0; -- cgit v1.2.3 From a25707f3aef9cf68c341eba5960d580f364e4e6f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:03 +0200 Subject: sched: remove precise CPU load CPU load calculations are statistical anyway, and there's little benefit from having it calculated on every scheduling event. So remove this code, it gets rid of a divide from the scheduler wakeup and context-switch fastpath. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index fd080f686f18..6b789dae7fdf 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -145,8 +145,6 @@ static void print_cpu(struct seq_file *m, int cpu) P(nr_running); SEQ_printf(m, " .%-30s: %lu\n", "load", rq->ls.load.weight); - P(ls.delta_fair); - P(ls.delta_exec); P(nr_switches); P(nr_load_updates); P(nr_uninterruptible); -- cgit v1.2.3 From 67e12eac328b276dca7e61640632ed996ff1a93a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:05 +0200 Subject: sched: add se->vruntime debugging debug se->vruntime fields. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith --- kernel/sched_debug.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6b789dae7fdf..75ccf7aa98f3 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -44,7 +44,8 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) (long long)(p->nvcsw + p->nivcsw), p->prio); #ifdef CONFIG_SCHEDSTATS - SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n", + SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n", + (long long)p->se.vruntime, (long long)p->se.sum_exec_runtime, (long long)p->se.sum_wait_runtime, (long long)p->se.sum_sleep_runtime, @@ -64,10 +65,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) "\nrunnable tasks:\n" " task PID tree-key delta waiting" " switches prio" - " sum-exec sum-wait sum-sleep" + " exec-runtime sum-exec sum-wait sum-sleep" " wait-overrun wait-underrun\n" "------------------------------------------------------------------" - "----------------" + "--------------------------------" "------------------------------------------------" "--------------------------------\n"); @@ -108,6 +109,11 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { + s64 MIN_vruntime = -1, max_vruntime = -1, spread; + struct rq *rq = &per_cpu(runqueues, cpu); + struct sched_entity *last; + unsigned long flags; + SEQ_printf(m, "\ncfs_rq\n"); #define P(x) \ @@ -115,6 +121,23 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) P(fair_clock); P(exec_clock); + P(min_vruntime); + + spin_lock_irqsave(&rq->lock, flags); + if (cfs_rq->rb_leftmost) + MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime; + last = __pick_last_entity(cfs_rq); + if (last) + max_vruntime = last->vruntime; + spin_unlock_irqrestore(&rq->lock, flags); + SEQ_printf(m, " .%-30s: %Ld\n", "MIN_vruntime", + (long long)MIN_vruntime); + SEQ_printf(m, " .%-30s: %Ld\n", "max_vruntime", + (long long)max_vruntime); + spread = max_vruntime - MIN_vruntime; + SEQ_printf(m, " .%-30s: %Ld\n", "spread", + (long long)spread); + P(wait_runtime); P(wait_runtime_overruns); P(wait_runtime_underruns); @@ -243,6 +266,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.wait_start_fair); P(se.exec_start); P(se.sleep_start_fair); + P(se.vruntime); P(se.sum_exec_runtime); #ifdef CONFIG_SCHEDSTATS -- cgit v1.2.3 From 86d9560cb6bd85986e98b4c63705daec94406bd4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:06 +0200 Subject: sched: add more vruntime statistics add more vruntime statistics. Signed-off-by: Ingo Molnar Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 75ccf7aa98f3..7a61706637c7 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -109,7 +109,8 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { - s64 MIN_vruntime = -1, max_vruntime = -1, spread; + s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, + spread, rq0_min_vruntime, spread0; struct rq *rq = &per_cpu(runqueues, cpu); struct sched_entity *last; unsigned long flags; @@ -121,7 +122,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) P(fair_clock); P(exec_clock); - P(min_vruntime); spin_lock_irqsave(&rq->lock, flags); if (cfs_rq->rb_leftmost) @@ -129,14 +129,21 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) last = __pick_last_entity(cfs_rq); if (last) max_vruntime = last->vruntime; + min_vruntime = rq->cfs.min_vruntime; + rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; spin_unlock_irqrestore(&rq->lock, flags); SEQ_printf(m, " .%-30s: %Ld\n", "MIN_vruntime", (long long)MIN_vruntime); + SEQ_printf(m, " .%-30s: %Ld\n", "min_vruntime", + (long long)min_vruntime); SEQ_printf(m, " .%-30s: %Ld\n", "max_vruntime", (long long)max_vruntime); spread = max_vruntime - MIN_vruntime; SEQ_printf(m, " .%-30s: %Ld\n", "spread", (long long)spread); + spread0 = min_vruntime - rq0_min_vruntime; + SEQ_printf(m, " .%-30s: %Ld\n", "spread0", + (long long)spread0); P(wait_runtime); P(wait_runtime_overruns); -- cgit v1.2.3 From 495eca494aa6006df55e3a04e105462c5940ca17 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Mon, 15 Oct 2007 17:00:06 +0200 Subject: sched: clean up struct load_stat 'struct load_stat' is redundant now so let's get rid of it. Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 7a61706637c7..62965f0ae37c 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -174,7 +174,7 @@ static void print_cpu(struct seq_file *m, int cpu) P(nr_running); SEQ_printf(m, " .%-30s: %lu\n", "load", - rq->ls.load.weight); + rq->load.weight); P(nr_switches); P(nr_load_updates); P(nr_uninterruptible); -- cgit v1.2.3 From e22f5bbf86d8cce710d5c8ba5bf57832e73aab8c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:06 +0200 Subject: sched: remove wait_runtime limit remove the wait_runtime-limit fields and the code depending on it, now that the math has been changed over to rely on the vruntime metric. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 62965f0ae37c..3350169a7d2a 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -148,7 +148,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) P(wait_runtime); P(wait_runtime_overruns); P(wait_runtime_underruns); - P(sleeper_bonus); #undef P print_cfs_rq_runtime_sum(m, cpu, cfs_rq); @@ -272,7 +271,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.wait_runtime); P(se.wait_start_fair); P(se.exec_start); - P(se.sleep_start_fair); P(se.vruntime); P(se.sum_exec_runtime); -- cgit v1.2.3 From bbdba7c0e1161934ae881ad00e4db49830f5ef59 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:06 +0200 Subject: sched: remove wait_runtime fields and features remove wait_runtime based fields and features, now that the CFS math has been changed over to the vruntime metric. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Signed-off-by: Mike Galbraith Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 54 +++++----------------------------------------------- 1 file changed, 5 insertions(+), 49 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 3350169a7d2a..e3b62324ac31 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -36,21 +36,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) else SEQ_printf(m, " "); - SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d ", + SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ", p->comm, p->pid, (long long)p->se.fair_key, - (long long)(p->se.fair_key - rq->cfs.fair_clock), - (long long)p->se.wait_runtime, (long long)(p->nvcsw + p->nivcsw), p->prio); #ifdef CONFIG_SCHEDSTATS - SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n", + SEQ_printf(m, "%15Ld %15Ld %15Ld\n", (long long)p->se.vruntime, (long long)p->se.sum_exec_runtime, - (long long)p->se.sum_wait_runtime, - (long long)p->se.sum_sleep_runtime, - (long long)p->se.wait_runtime_overruns, - (long long)p->se.wait_runtime_underruns); + (long long)p->se.sum_sleep_runtime); #else SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n", 0LL, 0LL, 0LL, 0LL, 0LL); @@ -63,10 +58,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) SEQ_printf(m, "\nrunnable tasks:\n" - " task PID tree-key delta waiting" - " switches prio" - " exec-runtime sum-exec sum-wait sum-sleep" - " wait-overrun wait-underrun\n" + " task PID tree-key switches prio" + " exec-runtime sum-exec sum-sleep\n" "------------------------------------------------------------------" "--------------------------------" "------------------------------------------------" @@ -84,29 +77,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) read_unlock_irq(&tasklist_lock); } -static void -print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) -{ - s64 wait_runtime_rq_sum = 0; - struct task_struct *p; - struct rb_node *curr; - unsigned long flags; - struct rq *rq = &per_cpu(runqueues, cpu); - - spin_lock_irqsave(&rq->lock, flags); - curr = first_fair(cfs_rq); - while (curr) { - p = rb_entry(curr, struct task_struct, se.run_node); - wait_runtime_rq_sum += p->se.wait_runtime; - - curr = rb_next(curr); - } - spin_unlock_irqrestore(&rq->lock, flags); - - SEQ_printf(m, " .%-30s: %Ld\n", "wait_runtime_rq_sum", - (long long)wait_runtime_rq_sum); -} - void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) { s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1, @@ -120,7 +90,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x)) - P(fair_clock); P(exec_clock); spin_lock_irqsave(&rq->lock, flags); @@ -144,13 +113,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld\n", "spread0", (long long)spread0); - - P(wait_runtime); - P(wait_runtime_overruns); - P(wait_runtime_underruns); #undef P - - print_cfs_rq_runtime_sum(m, cpu, cfs_rq); } static void print_cpu(struct seq_file *m, int cpu) @@ -268,8 +231,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) #define P(F) \ SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F) - P(se.wait_runtime); - P(se.wait_start_fair); P(se.exec_start); P(se.vruntime); P(se.sum_exec_runtime); @@ -283,9 +244,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(se.exec_max); P(se.slice_max); P(se.wait_max); - P(se.wait_runtime_overruns); - P(se.wait_runtime_underruns); - P(se.sum_wait_runtime); #endif SEQ_printf(m, "%-25s:%20Ld\n", "nr_switches", (long long)(p->nvcsw + p->nivcsw)); @@ -312,8 +270,6 @@ void proc_sched_set_task(struct task_struct *p) p->se.exec_max = 0; p->se.slice_max = 0; p->se.wait_max = 0; - p->se.wait_runtime_overruns = 0; - p->se.wait_runtime_underruns = 0; #endif p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; -- cgit v1.2.3 From 30cfdcfc5f180fc21a3dad6ae3b7b2a9ee112186 Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Mon, 15 Oct 2007 17:00:07 +0200 Subject: sched: do not keep current in the tree and get rid of sched_entity::fair_key Get rid of 'sched_entity::fair_key'. As a side effect, 'current' is not kept withing the tree for SCHED_NORMAL/BATCH tasks anymore. This simplifies some parts of code (e.g. entity_tick() and yield_task_fair()) and also somewhat optimizes them (e.g. a single update_curr() now vs. dequeue/enqueue() before in entity_tick()). Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index e3b62324ac31..bb34b8188f61 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -38,7 +38,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ", p->comm, p->pid, - (long long)p->se.fair_key, + (long long)p->se.vruntime, (long long)(p->nvcsw + p->nivcsw), p->prio); #ifdef CONFIG_SCHEDSTATS -- cgit v1.2.3 From 1a75b94f7bda591f4c53af86baa50e1eaee35927 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:08 +0200 Subject: sched: prettify /proc/sched_debug output print the correct amount of dashes in /proc/sched_debug. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index bb34b8188f61..22cf74c1dc03 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -60,10 +60,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) "\nrunnable tasks:\n" " task PID tree-key switches prio" " exec-runtime sum-exec sum-sleep\n" - "------------------------------------------------------------------" - "--------------------------------" - "------------------------------------------------" - "--------------------------------\n"); + "------------------------------------------------------" + "------------------------------------------------"); read_lock_irq(&tasklist_lock); -- cgit v1.2.3 From ef83a5714d9a817b2e9b97f04a6d070fbd6ecf80 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:08 +0200 Subject: sched: enhance debug output enhance debug output by changing 12345678 nsecs to 12.345678 output, this is more human-readable. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 108 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 40 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 22cf74c1dc03..e2c1e0dfdf50 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -28,6 +28,31 @@ printk(x); \ } while (0) +/* + * Ease the printing of nsec fields: + */ +static long long nsec_high(long long nsec) +{ + if (nsec < 0) { + nsec = -nsec; + do_div(nsec, 1000000); + return -nsec; + } + do_div(nsec, 1000000); + + return nsec; +} + +static unsigned long nsec_low(long long nsec) +{ + if (nsec < 0) + nsec = -nsec; + + return do_div(nsec, 1000000); +} + +#define SPLIT_NS(x) nsec_high(x), nsec_low(x) + static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { @@ -36,19 +61,19 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) else SEQ_printf(m, " "); - SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ", + SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ", p->comm, p->pid, - (long long)p->se.vruntime, + SPLIT_NS(p->se.vruntime), (long long)(p->nvcsw + p->nivcsw), p->prio); #ifdef CONFIG_SCHEDSTATS - SEQ_printf(m, "%15Ld %15Ld %15Ld\n", - (long long)p->se.vruntime, - (long long)p->se.sum_exec_runtime, - (long long)p->se.sum_sleep_runtime); + SEQ_printf(m, "%15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n", + SPLIT_NS(p->se.vruntime), + SPLIT_NS(p->se.sum_exec_runtime), + SPLIT_NS(p->se.sum_sleep_runtime)); #else - SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n", - 0LL, 0LL, 0LL, 0LL, 0LL); + SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n", + 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); #endif } @@ -85,10 +110,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, "\ncfs_rq\n"); -#define P(x) \ - SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x)) - - P(exec_clock); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", + SPLIT_NS(cfs_rq->exec_clock)); spin_lock_irqsave(&rq->lock, flags); if (cfs_rq->rb_leftmost) @@ -99,19 +122,18 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) min_vruntime = rq->cfs.min_vruntime; rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; spin_unlock_irqrestore(&rq->lock, flags); - SEQ_printf(m, " .%-30s: %Ld\n", "MIN_vruntime", - (long long)MIN_vruntime); - SEQ_printf(m, " .%-30s: %Ld\n", "min_vruntime", - (long long)min_vruntime); - SEQ_printf(m, " .%-30s: %Ld\n", "max_vruntime", - (long long)max_vruntime); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", + SPLIT_NS(MIN_vruntime)); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", + SPLIT_NS(min_vruntime)); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "max_vruntime", + SPLIT_NS(max_vruntime)); spread = max_vruntime - MIN_vruntime; - SEQ_printf(m, " .%-30s: %Ld\n", "spread", - (long long)spread); + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", + SPLIT_NS(spread)); spread0 = min_vruntime - rq0_min_vruntime; - SEQ_printf(m, " .%-30s: %Ld\n", "spread0", - (long long)spread0); -#undef P + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", + SPLIT_NS(spread0)); } static void print_cpu(struct seq_file *m, int cpu) @@ -131,6 +153,8 @@ static void print_cpu(struct seq_file *m, int cpu) #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x)) +#define PN(x) \ + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) P(nr_running); SEQ_printf(m, " .%-30s: %lu\n", "load", @@ -139,21 +163,22 @@ static void print_cpu(struct seq_file *m, int cpu) P(nr_load_updates); P(nr_uninterruptible); SEQ_printf(m, " .%-30s: %lu\n", "jiffies", jiffies); - P(next_balance); + PN(next_balance); P(curr->pid); - P(clock); - P(idle_clock); - P(prev_clock_raw); + PN(clock); + PN(idle_clock); + PN(prev_clock_raw); P(clock_warps); P(clock_overflows); P(clock_deep_idle_events); - P(clock_max_delta); + PN(clock_max_delta); P(cpu_load[0]); P(cpu_load[1]); P(cpu_load[2]); P(cpu_load[3]); P(cpu_load[4]); #undef P +#undef PN print_cfs_stats(m, cpu); @@ -170,7 +195,7 @@ static int sched_debug_show(struct seq_file *m, void *v) (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now); + SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); for_each_online_cpu(cpu) print_cpu(m, cpu); @@ -228,20 +253,22 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) SEQ_printf(m, "----------------------------------------------\n"); #define P(F) \ SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F) +#define PN(F) \ + SEQ_printf(m, "%-25s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) - P(se.exec_start); - P(se.vruntime); - P(se.sum_exec_runtime); + PN(se.exec_start); + PN(se.vruntime); + PN(se.sum_exec_runtime); #ifdef CONFIG_SCHEDSTATS - P(se.wait_start); - P(se.sleep_start); - P(se.block_start); - P(se.sleep_max); - P(se.block_max); - P(se.exec_max); - P(se.slice_max); - P(se.wait_max); + PN(se.wait_start); + PN(se.sleep_start); + PN(se.block_start); + PN(se.sleep_max); + PN(se.block_max); + PN(se.exec_max); + PN(se.slice_max); + PN(se.wait_max); #endif SEQ_printf(m, "%-25s:%20Ld\n", "nr_switches", (long long)(p->nvcsw + p->nivcsw)); @@ -249,6 +276,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) P(policy); P(prio); #undef P +#undef PN { u64 t0, t1; -- cgit v1.2.3 From c86da3a3d40f6e7a032edfaea191fb51e9626c8f Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Mon, 15 Oct 2007 17:00:08 +0200 Subject: sched: fix formatting of /proc/sched_debug fix formatting of /proc/sched_debug Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index e2c1e0dfdf50..4eaaf96559d6 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -67,7 +67,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) (long long)(p->nvcsw + p->nivcsw), p->prio); #ifdef CONFIG_SCHEDSTATS - SEQ_printf(m, "%15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n", + SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld\n", SPLIT_NS(p->se.vruntime), SPLIT_NS(p->se.sum_exec_runtime), SPLIT_NS(p->se.sum_sleep_runtime)); @@ -83,10 +83,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) SEQ_printf(m, "\nrunnable tasks:\n" - " task PID tree-key switches prio" - " exec-runtime sum-exec sum-sleep\n" + " task PID tree-key switches prio" + " exec-runtime sum-exec sum-sleep\n" "------------------------------------------------------" - "------------------------------------------------"); + "----------------------------------------------------\n"); read_lock_irq(&tasklist_lock); -- cgit v1.2.3 From 545f3b18152355acbb8da59873506fcf66c7c60e Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:09 +0200 Subject: sched: print nr_running and load in /proc/sched_debug - print nr_running and load information for cfs_rq in /proc/sched_debug Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4eaaf96559d6..3e47e870b043 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -134,6 +134,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) spread0 = min_vruntime - rq0_min_vruntime; SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread0", SPLIT_NS(spread0)); + SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); + SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); } static void print_cpu(struct seq_file *m, int cpu) -- cgit v1.2.3 From 24e377a83220ef05c9b5bec7e01d65eed6609aa6 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:09 +0200 Subject: sched: add fair-user scheduler Enable user-id based fair group scheduling. This is useful for anyone who wants to test the group scheduler w/o having to enable CONFIG_CGROUPS. A separate scheduling group (i.e struct task_grp) is automatically created for every new user added to the system. Upon uid change for a task, it is made to move to the corresponding scheduling group. A /proc tunable (/proc/root_user_share) is also provided to tune root user's quota of cpu bandwidth. Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 3e47e870b043..57ee9d5630a8 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -212,6 +212,49 @@ static void sysrq_sched_debug_show(void) sched_debug_show(NULL, NULL); } +#ifdef CONFIG_FAIR_USER_SCHED + +static DEFINE_MUTEX(root_user_share_mutex); + +static int +root_user_share_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d\n", init_task_grp_load); + + return len; +} + +static int +root_user_share_write_proc(struct file *file, const char __user *buffer, + unsigned long count, void *data) +{ + unsigned long shares; + char kbuf[sizeof(unsigned long)+1]; + int rc = 0; + + if (copy_from_user(kbuf, buffer, sizeof(kbuf))) + return -EFAULT; + + shares = simple_strtoul(kbuf, NULL, 0); + + if (!shares) + shares = NICE_0_LOAD; + + mutex_lock(&root_user_share_mutex); + + init_task_grp_load = shares; + rc = sched_group_set_shares(&init_task_grp, shares); + + mutex_unlock(&root_user_share_mutex); + + return (rc < 0 ? rc : count); +} + +#endif /* CONFIG_FAIR_USER_SCHED */ + static int sched_debug_open(struct inode *inode, struct file *filp) { return single_open(filp, sched_debug_show, NULL); @@ -234,6 +277,15 @@ static int __init init_sched_debug_procfs(void) pe->proc_fops = &sched_debug_fops; +#ifdef CONFIG_FAIR_USER_SCHED + pe = create_proc_entry("root_user_share", 0644, NULL); + if (!pe) + return -ENOMEM; + + pe->read_proc = root_user_share_read_proc; + pe->write_proc = root_user_share_write_proc; +#endif + return 0; } -- cgit v1.2.3 From b8efb56172bc55082b8490778b07ef73eea0b551 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched debug: BKL usage statistics add per task and per rq BKL usage statistics. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 57ee9d5630a8..823b63a3a3e1 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -136,6 +136,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SPLIT_NS(spread0)); SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); + SEQ_printf(m, " .%-30s: %ld\n", "bkl_cnt", + rq->bkl_cnt); } static void print_cpu(struct seq_file *m, int cpu) @@ -323,6 +325,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.exec_max); PN(se.slice_max); PN(se.wait_max); + P(sched_info.bkl_cnt); #endif SEQ_printf(m, "%-25s:%20Ld\n", "nr_switches", (long long)(p->nvcsw + p->nivcsw)); @@ -350,6 +353,7 @@ void proc_sched_set_task(struct task_struct *p) p->se.exec_max = 0; p->se.slice_max = 0; p->se.wait_max = 0; + p->sched_info.bkl_cnt = 0; #endif p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; -- cgit v1.2.3 From fdd71d132badad542a9ab99ab4a9c3c08fa6412f Mon Sep 17 00:00:00 2001 From: "S.Caglar Onur" Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched debug: BKL usage statistics, fix build fix for the SCHED_DEBUG && !SCHEDSTATS case. Signed-off-by: S.Ceglar Onur Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 823b63a3a3e1..b6d0a94d4120 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -136,8 +136,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SPLIT_NS(spread0)); SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); +#ifdef CONFIG_SCHEDSTATS SEQ_printf(m, " .%-30s: %ld\n", "bkl_cnt", rq->bkl_cnt); +#endif } static void print_cpu(struct seq_file *m, int cpu) -- cgit v1.2.3 From 1aa4731eff7dab7bd01747b46f654f449f1cfc2c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched debug: print settings print the current value of all tunables in /proc/sched_debug output. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index b6d0a94d4120..d79e1ec5b06a 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -203,6 +203,19 @@ static int sched_debug_show(struct seq_file *m, void *v) SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); +#define P(x) \ + SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(x)) +#define PN(x) \ + SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(x)) + PN(sysctl_sched_latency); + PN(sysctl_sched_min_granularity); + PN(sysctl_sched_wakeup_granularity); + PN(sysctl_sched_batch_wakeup_granularity); + PN(sysctl_sched_child_runs_first); + P(sysctl_sched_features); +#undef PN +#undef P + for_each_online_cpu(cpu) print_cpu(m, cpu); -- cgit v1.2.3 From d822cecedad88b69a7d68aa8d49e1f238aa320c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched debug: more width for parameter printouts more width for parameter printouts in /proc/sched_debug. Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index d79e1ec5b06a..b24f17de19e3 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -204,9 +204,9 @@ static int sched_debug_show(struct seq_file *m, void *v) SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now)); #define P(x) \ - SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(x)) + SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) #define PN(x) \ - SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(x)) + SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); PN(sysctl_sched_min_granularity); PN(sysctl_sched_wakeup_granularity); -- cgit v1.2.3 From ddc972975091ba5f839bf24d0f9ef54fe90ee741 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Oct 2007 17:00:10 +0200 Subject: sched debug: check spread debug feature: check how well we schedule within a reasonable vruntime 'spread' range. (note that CPU overload can increase the spread, so this is not a hard condition, but normal loads should be within the spread.) Signed-off-by: Ingo Molnar Signed-off-by: Peter Zijlstra --- kernel/sched_debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index b24f17de19e3..4659c90c3418 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -140,6 +140,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %ld\n", "bkl_cnt", rq->bkl_cnt); #endif + SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", + cfs_rq->nr_spread_over); } static void print_cpu(struct seq_file *m, int cpu) -- cgit v1.2.3 From 2d72376b3af1e7d4d4515ebfd0f4383f2e92c343 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:12 +0200 Subject: sched: clean up schedstats, cnt -> count rename all 'cnt' fields and variables to the less yucky 'count' name. yuckage noticed by Andrew Morton. no change in code, other than the /proc/sched_debug bkl_count string got a bit larger: text data bss dec hex filename 38236 3506 24 41766 a326 sched.o.before 38240 3506 24 41770 a32a sched.o.after Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4659c90c3418..be79cd6d9e80 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -137,8 +137,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); #ifdef CONFIG_SCHEDSTATS - SEQ_printf(m, " .%-30s: %ld\n", "bkl_cnt", - rq->bkl_cnt); + SEQ_printf(m, " .%-30s: %ld\n", "bkl_count", + rq->bkl_count); #endif SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", cfs_rq->nr_spread_over); @@ -342,7 +342,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.exec_max); PN(se.slice_max); PN(se.wait_max); - P(sched_info.bkl_cnt); + P(sched_info.bkl_count); #endif SEQ_printf(m, "%-25s:%20Ld\n", "nr_switches", (long long)(p->nvcsw + p->nivcsw)); @@ -370,7 +370,7 @@ void proc_sched_set_task(struct task_struct *p) p->se.exec_max = 0; p->se.slice_max = 0; p->se.wait_max = 0; - p->sched_info.bkl_cnt = 0; + p->sched_info.bkl_count = 0; #endif p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; -- cgit v1.2.3 From 5f6d858ecca78f71755859a346d845e302973cd1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Oct 2007 17:00:12 +0200 Subject: sched: speed up and simplify vslice calculations speed up and simplify vslice calculations. [ From: Mike Galbraith : build fix ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index be79cd6d9e80..995bbd384a97 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -210,7 +210,7 @@ static int sched_debug_show(struct seq_file *m, void *v) #define PN(x) \ SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) PN(sysctl_sched_latency); - PN(sysctl_sched_min_granularity); + PN(sysctl_sched_nr_latency); PN(sysctl_sched_wakeup_granularity); PN(sysctl_sched_batch_wakeup_granularity); PN(sysctl_sched_child_runs_first); -- cgit v1.2.3 From fb615581c78efee25e4d04f1145e8fa8ec705dc3 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Mon, 15 Oct 2007 17:00:12 +0200 Subject: sched: group scheduler, fix coding style issues Fix coding style issues reported by Randy Dunlap and others Signed-off-by: Dhaval Giani Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Ingo Molnar Reviewed-by: Thomas Gleixner --- kernel/sched_debug.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 995bbd384a97..48748d04144d 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -239,11 +239,7 @@ static int root_user_share_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - int len; - - len = sprintf(page, "%d\n", init_task_grp_load); - - return len; + return sprintf(page, "%d\n", init_task_grp_load); } static int @@ -297,7 +293,7 @@ static int __init init_sched_debug_procfs(void) pe->proc_fops = &sched_debug_fops; #ifdef CONFIG_FAIR_USER_SCHED - pe = create_proc_entry("root_user_share", 0644, NULL); + pe = create_proc_entry("root_user_cpu_share", 0644, NULL); if (!pe) return -ENOMEM; -- cgit v1.2.3 From 4cf86d77f5942336e7cd9de874b38b3c83b54d5e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: cleanup: rename task_grp to task_group cleanup: rename task_grp to task_group. No need to save two characters and 'grp' is annoying to read. Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 48748d04144d..6f87b31d233c 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -239,7 +239,7 @@ static int root_user_share_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) { - return sprintf(page, "%d\n", init_task_grp_load); + return sprintf(page, "%d\n", init_task_group_load); } static int @@ -260,8 +260,8 @@ root_user_share_write_proc(struct file *file, const char __user *buffer, mutex_lock(&root_user_share_mutex); - init_task_grp_load = shares; - rc = sched_group_set_shares(&init_task_grp, shares); + init_task_group_load = shares; + rc = sched_group_set_shares(&init_task_group, shares); mutex_unlock(&root_user_share_mutex); -- cgit v1.2.3 From 5cb350baf580017da38199625b7365b1763d7180 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Mon, 15 Oct 2007 17:00:14 +0200 Subject: sched: group scheduling, sysfs tunables Add tunables in sysfs to modify a user's cpu share. A directory is created in sysfs for each new user in the system. /sys/kernel/uids//cpu_share Reading this file returns the cpu shares granted for the user. Writing into this file modifies the cpu share for the user. Only an administrator is allowed to modify a user's cpu share. Ex: # cd /sys/kernel/uids/ # cat 512/cpu_share 1024 # echo 2048 > 512/cpu_share # cat 512/cpu_share 2048 # Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Dhaval Giani Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6f87b31d233c..0aab455a7b41 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -231,45 +231,6 @@ static void sysrq_sched_debug_show(void) sched_debug_show(NULL, NULL); } -#ifdef CONFIG_FAIR_USER_SCHED - -static DEFINE_MUTEX(root_user_share_mutex); - -static int -root_user_share_read_proc(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - return sprintf(page, "%d\n", init_task_group_load); -} - -static int -root_user_share_write_proc(struct file *file, const char __user *buffer, - unsigned long count, void *data) -{ - unsigned long shares; - char kbuf[sizeof(unsigned long)+1]; - int rc = 0; - - if (copy_from_user(kbuf, buffer, sizeof(kbuf))) - return -EFAULT; - - shares = simple_strtoul(kbuf, NULL, 0); - - if (!shares) - shares = NICE_0_LOAD; - - mutex_lock(&root_user_share_mutex); - - init_task_group_load = shares; - rc = sched_group_set_shares(&init_task_group, shares); - - mutex_unlock(&root_user_share_mutex); - - return (rc < 0 ? rc : count); -} - -#endif /* CONFIG_FAIR_USER_SCHED */ - static int sched_debug_open(struct inode *inode, struct file *filp) { return single_open(filp, sched_debug_show, NULL); @@ -292,15 +253,6 @@ static int __init init_sched_debug_procfs(void) pe->proc_fops = &sched_debug_fops; -#ifdef CONFIG_FAIR_USER_SCHED - pe = create_proc_entry("root_user_cpu_share", 0644, NULL); - if (!pe) - return -ENOMEM; - - pe->read_proc = root_user_share_read_proc; - pe->write_proc = root_user_share_write_proc; -#endif - return 0; } -- cgit v1.2.3 From 2d92f22784b7b8879ebe3254e44c92cb8792b0dd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:18 +0200 Subject: sched: debug: increase width of debug line increase width of debug line - in preparation of more debugging info. Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 0aab455a7b41..755815937417 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -198,7 +198,7 @@ static int sched_debug_show(struct seq_file *m, void *v) u64 now = ktime_to_ns(ktime_get()); int cpu; - SEQ_printf(m, "Sched Debug Version: v0.05-v20, %s %.*s\n", + SEQ_printf(m, "Sched Debug Version: v0.06-v22, %s %.*s\n", init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); @@ -271,11 +271,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) rcu_read_unlock(); SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads); - SEQ_printf(m, "----------------------------------------------\n"); + SEQ_printf(m, + "---------------------------------------------------------\n"); #define P(F) \ - SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F) + SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F) #define PN(F) \ - SEQ_printf(m, "%-25s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) + SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) PN(se.exec_start); PN(se.vruntime); @@ -292,7 +293,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.wait_max); P(sched_info.bkl_count); #endif - SEQ_printf(m, "%-25s:%20Ld\n", + SEQ_printf(m, "%-35s:%21Ld\n", "nr_switches", (long long)(p->nvcsw + p->nivcsw)); P(se.load.weight); P(policy); @@ -305,7 +306,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) t0 = sched_clock(); t1 = sched_clock(); - SEQ_printf(m, "%-25s:%20Ld\n", + SEQ_printf(m, "%-35s:%21Ld\n", "clock-delta", (long long)(t1-t0)); } } -- cgit v1.2.3 From cc367732ff0b1c63d0d7bdd11e6d1661794ef6a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Oct 2007 17:00:18 +0200 Subject: sched: debug, improve migration statistics add new migration statistics when SCHED_DEBUG and SCHEDSTATS is enabled. Available in /proc//sched. Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 10 deletions(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 755815937417..27e82cbccaa5 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -260,6 +260,7 @@ __initcall(init_sched_debug_procfs); void proc_sched_show_task(struct task_struct *p, struct seq_file *m) { + unsigned long nr_switches; unsigned long flags; int num_threads = 1; @@ -273,8 +274,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads); SEQ_printf(m, "---------------------------------------------------------\n"); +#define __P(F) \ + SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F) #define P(F) \ SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F) +#define __PN(F) \ + SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F)) #define PN(F) \ SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) @@ -282,6 +287,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.vruntime); PN(se.sum_exec_runtime); + nr_switches = p->nvcsw + p->nivcsw; + #ifdef CONFIG_SCHEDSTATS PN(se.wait_start); PN(se.sleep_start); @@ -292,14 +299,55 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) PN(se.slice_max); PN(se.wait_max); P(sched_info.bkl_count); + P(se.nr_migrations); + P(se.nr_migrations_cold); + P(se.nr_failed_migrations_affine); + P(se.nr_failed_migrations_running); + P(se.nr_failed_migrations_hot); + P(se.nr_forced_migrations); + P(se.nr_forced2_migrations); + P(se.nr_wakeups); + P(se.nr_wakeups_sync); + P(se.nr_wakeups_migrate); + P(se.nr_wakeups_local); + P(se.nr_wakeups_remote); + P(se.nr_wakeups_affine); + P(se.nr_wakeups_affine_attempts); + P(se.nr_wakeups_passive); + P(se.nr_wakeups_idle); + + { + u64 avg_atom, avg_per_cpu; + + avg_atom = p->se.sum_exec_runtime; + if (nr_switches) + do_div(avg_atom, nr_switches); + else + avg_atom = -1LL; + + avg_per_cpu = p->se.sum_exec_runtime; + if (p->se.nr_migrations) + avg_per_cpu = div64_64(avg_per_cpu, p->se.nr_migrations); + else + avg_per_cpu = -1LL; + + __PN(avg_atom); + __PN(avg_per_cpu); + } #endif + __P(nr_switches); SEQ_printf(m, "%-35s:%21Ld\n", - "nr_switches", (long long)(p->nvcsw + p->nivcsw)); + "nr_voluntary_switches", (long long)p->nvcsw); + SEQ_printf(m, "%-35s:%21Ld\n", + "nr_involuntary_switches", (long long)p->nivcsw); + P(se.load.weight); P(policy); P(prio); -#undef P #undef PN +#undef __PN +#undef P +#undef __P { u64 t0, t1; @@ -314,13 +362,32 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) void proc_sched_set_task(struct task_struct *p) { #ifdef CONFIG_SCHEDSTATS - p->se.sleep_max = 0; - p->se.block_max = 0; - p->se.exec_max = 0; - p->se.slice_max = 0; - p->se.wait_max = 0; - p->sched_info.bkl_count = 0; + p->se.wait_max = 0; + p->se.sleep_max = 0; + p->se.sum_sleep_runtime = 0; + p->se.block_max = 0; + p->se.exec_max = 0; + p->se.slice_max = 0; + p->se.nr_migrations = 0; + p->se.nr_migrations_cold = 0; + p->se.nr_failed_migrations_affine = 0; + p->se.nr_failed_migrations_running = 0; + p->se.nr_failed_migrations_hot = 0; + p->se.nr_forced_migrations = 0; + p->se.nr_forced2_migrations = 0; + p->se.nr_wakeups = 0; + p->se.nr_wakeups_sync = 0; + p->se.nr_wakeups_migrate = 0; + p->se.nr_wakeups_local = 0; + p->se.nr_wakeups_remote = 0; + p->se.nr_wakeups_affine = 0; + p->se.nr_wakeups_affine_attempts = 0; + p->se.nr_wakeups_passive = 0; + p->se.nr_wakeups_idle = 0; + p->sched_info.bkl_count = 0; #endif - p->se.sum_exec_runtime = 0; - p->se.prev_sum_exec_runtime = 0; + p->se.sum_exec_runtime = 0; + p->se.prev_sum_exec_runtime = 0; + p->nvcsw = 0; + p->nivcsw = 0; } -- cgit v1.2.3 From 0dbee3a6b006dbe814d002cb18e94bf24a216451 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 15 Oct 2007 17:00:19 +0200 Subject: Make scheduler debug file operations const In general, struct file_operations are const in the kernel, to not have false cacheline sharing and to catch bugs at compiletime with accidental writes to them. The new scheduler code introduces a new non-const one; fix this up. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sched_debug.c') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 27e82cbccaa5..a5e517ec07c3 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -236,7 +236,7 @@ static int sched_debug_open(struct inode *inode, struct file *filp) return single_open(filp, sched_debug_show, NULL); } -static struct file_operations sched_debug_fops = { +static const struct file_operations sched_debug_fops = { .open = sched_debug_open, .read = seq_read, .llseek = seq_lseek, -- cgit v1.2.3