From a4b29ba2f72673aaa60ba11ced74d579771dd578 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:02 +0200
Subject: sched: small sched_debug cleanup

small kernel/sched_debug.c cleanup - break up
multi-variable assignment.

no code changed:

   text    data     bss     dec     hex filename
   38869    3550      24   42443    a5cb sched.o.before
   38869    3550      24   42443    a5cb sched.o.after

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index c3ee38bd3426..94915f1fd9de 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -279,9 +279,13 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-	p->se.sleep_max = p->se.block_max = p->se.exec_max = p->se.wait_max = 0;
-	p->se.wait_runtime_overruns = p->se.wait_runtime_underruns = 0;
+	p->se.sleep_max			= 0;
+	p->se.block_max			= 0;
+	p->se.exec_max			= 0;
+	p->se.wait_max			= 0;
+	p->se.wait_runtime_overruns	= 0;
+	p->se.wait_runtime_underruns	= 0;
 #endif
-	p->se.sum_exec_runtime = 0;
+	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 }
-- 
cgit v1.2.3


From eba1ed4b7e52720e3099325874811c38a5ec1562 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:02 +0200
Subject: sched: debug: track maximum 'slice'

track the maximum amount of time a task has executed while
the CPU load was at least 2x. (i.e. at least two nice-0
tasks were runnable)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 94915f1fd9de..fd080f686f18 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -254,6 +254,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.sleep_max);
 	P(se.block_max);
 	P(se.exec_max);
+	P(se.slice_max);
 	P(se.wait_max);
 	P(se.wait_runtime_overruns);
 	P(se.wait_runtime_underruns);
@@ -282,6 +283,7 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.sleep_max			= 0;
 	p->se.block_max			= 0;
 	p->se.exec_max			= 0;
+	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
 	p->se.wait_runtime_overruns	= 0;
 	p->se.wait_runtime_underruns	= 0;
-- 
cgit v1.2.3


From a25707f3aef9cf68c341eba5960d580f364e4e6f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:03 +0200
Subject: sched: remove precise CPU load

CPU load calculations are statistical anyway, and there's little benefit
from having it calculated on every scheduling event. So remove this code,
it gets rid of a divide from the scheduler wakeup and context-switch
fastpath.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index fd080f686f18..6b789dae7fdf 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -145,8 +145,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(nr_running);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
 		   rq->ls.load.weight);
-	P(ls.delta_fair);
-	P(ls.delta_exec);
 	P(nr_switches);
 	P(nr_load_updates);
 	P(nr_uninterruptible);
-- 
cgit v1.2.3


From 67e12eac328b276dca7e61640632ed996ff1a93a Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:05 +0200
Subject: sched: add se->vruntime debugging

debug se->vruntime fields.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
---
 kernel/sched_debug.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6b789dae7fdf..75ccf7aa98f3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -44,7 +44,8 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
+	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n",
+		(long long)p->se.vruntime,
 		(long long)p->se.sum_exec_runtime,
 		(long long)p->se.sum_wait_runtime,
 		(long long)p->se.sum_sleep_runtime,
@@ -64,10 +65,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	"\nrunnable tasks:\n"
 	"            task   PID        tree-key         delta       waiting"
 	"  switches  prio"
-	"        sum-exec        sum-wait       sum-sleep"
+	"    exec-runtime        sum-exec        sum-wait       sum-sleep"
 	"    wait-overrun   wait-underrun\n"
 	"------------------------------------------------------------------"
-	"----------------"
+	"--------------------------------"
 	"------------------------------------------------"
 	"--------------------------------\n");
 
@@ -108,6 +109,11 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
+	s64 MIN_vruntime = -1, max_vruntime = -1, spread;
+	struct rq *rq = &per_cpu(runqueues, cpu);
+	struct sched_entity *last;
+	unsigned long flags;
+
 	SEQ_printf(m, "\ncfs_rq\n");
 
 #define P(x) \
@@ -115,6 +121,23 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 	P(fair_clock);
 	P(exec_clock);
+	P(min_vruntime);
+
+	spin_lock_irqsave(&rq->lock, flags);
+	if (cfs_rq->rb_leftmost)
+		MIN_vruntime = (__pick_next_entity(cfs_rq))->vruntime;
+	last = __pick_last_entity(cfs_rq);
+	if (last)
+		max_vruntime = last->vruntime;
+	spin_unlock_irqrestore(&rq->lock, flags);
+	SEQ_printf(m, "  .%-30s: %Ld\n", "MIN_vruntime",
+			(long long)MIN_vruntime);
+	SEQ_printf(m, "  .%-30s: %Ld\n", "max_vruntime",
+			(long long)max_vruntime);
+	spread = max_vruntime - MIN_vruntime;
+	SEQ_printf(m, "  .%-30s: %Ld\n", "spread",
+			(long long)spread);
+
 	P(wait_runtime);
 	P(wait_runtime_overruns);
 	P(wait_runtime_underruns);
@@ -243,6 +266,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.wait_start_fair);
 	P(se.exec_start);
 	P(se.sleep_start_fair);
+	P(se.vruntime);
 	P(se.sum_exec_runtime);
 
 #ifdef CONFIG_SCHEDSTATS
-- 
cgit v1.2.3


From 86d9560cb6bd85986e98b4c63705daec94406bd4 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:06 +0200
Subject: sched: add more vruntime statistics

add more vruntime statistics.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 75ccf7aa98f3..7a61706637c7 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -109,7 +109,8 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-	s64 MIN_vruntime = -1, max_vruntime = -1, spread;
+	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
+		spread, rq0_min_vruntime, spread0;
 	struct rq *rq = &per_cpu(runqueues, cpu);
 	struct sched_entity *last;
 	unsigned long flags;
@@ -121,7 +122,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 	P(fair_clock);
 	P(exec_clock);
-	P(min_vruntime);
 
 	spin_lock_irqsave(&rq->lock, flags);
 	if (cfs_rq->rb_leftmost)
@@ -129,14 +129,21 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	last = __pick_last_entity(cfs_rq);
 	if (last)
 		max_vruntime = last->vruntime;
+	min_vruntime = rq->cfs.min_vruntime;
+	rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
 	spin_unlock_irqrestore(&rq->lock, flags);
 	SEQ_printf(m, "  .%-30s: %Ld\n", "MIN_vruntime",
 			(long long)MIN_vruntime);
+	SEQ_printf(m, "  .%-30s: %Ld\n", "min_vruntime",
+			(long long)min_vruntime);
 	SEQ_printf(m, "  .%-30s: %Ld\n", "max_vruntime",
 			(long long)max_vruntime);
 	spread = max_vruntime - MIN_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld\n", "spread",
 			(long long)spread);
+	spread0 = min_vruntime - rq0_min_vruntime;
+	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
+			(long long)spread0);
 
 	P(wait_runtime);
 	P(wait_runtime_overruns);
-- 
cgit v1.2.3


From 495eca494aa6006df55e3a04e105462c5940ca17 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Mon, 15 Oct 2007 17:00:06 +0200
Subject: sched: clean up struct load_stat

'struct load_stat' is redundant now so let's get rid of it.

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 7a61706637c7..62965f0ae37c 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -174,7 +174,7 @@ static void print_cpu(struct seq_file *m, int cpu)
 
 	P(nr_running);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
-		   rq->ls.load.weight);
+		   rq->load.weight);
 	P(nr_switches);
 	P(nr_load_updates);
 	P(nr_uninterruptible);
-- 
cgit v1.2.3


From e22f5bbf86d8cce710d5c8ba5bf57832e73aab8c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:06 +0200
Subject: sched: remove wait_runtime limit

remove the wait_runtime-limit fields and the code depending on it, now
that the math has been changed over to rely on the vruntime metric.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 62965f0ae37c..3350169a7d2a 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -148,7 +148,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	P(wait_runtime);
 	P(wait_runtime_overruns);
 	P(wait_runtime_underruns);
-	P(sleeper_bonus);
 #undef P
 
 	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
@@ -272,7 +271,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.wait_runtime);
 	P(se.wait_start_fair);
 	P(se.exec_start);
-	P(se.sleep_start_fair);
 	P(se.vruntime);
 	P(se.sum_exec_runtime);
 
-- 
cgit v1.2.3


From bbdba7c0e1161934ae881ad00e4db49830f5ef59 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:06 +0200
Subject: sched: remove wait_runtime fields and features

remove wait_runtime based fields and features, now that the CFS
math has been changed over to the vruntime metric.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 54 +++++-----------------------------------------------
 1 file changed, 5 insertions(+), 49 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3350169a7d2a..e3b62324ac31 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -36,21 +36,16 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " ");
 
-	SEQ_printf(m, "%15s %5d %15Ld %13Ld %13Ld %9Ld %5d ",
+	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
 		p->comm, p->pid,
 		(long long)p->se.fair_key,
-		(long long)(p->se.fair_key - rq->cfs.fair_clock),
-		(long long)p->se.wait_runtime,
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld %15Ld\n",
+	SEQ_printf(m, "%15Ld %15Ld %15Ld\n",
 		(long long)p->se.vruntime,
 		(long long)p->se.sum_exec_runtime,
-		(long long)p->se.sum_wait_runtime,
-		(long long)p->se.sum_sleep_runtime,
-		(long long)p->se.wait_runtime_overruns,
-		(long long)p->se.wait_runtime_underruns);
+		(long long)p->se.sum_sleep_runtime);
 #else
 	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
 		0LL, 0LL, 0LL, 0LL, 0LL);
@@ -63,10 +58,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 	SEQ_printf(m,
 	"\nrunnable tasks:\n"
-	"            task   PID        tree-key         delta       waiting"
-	"  switches  prio"
-	"    exec-runtime        sum-exec        sum-wait       sum-sleep"
-	"    wait-overrun   wait-underrun\n"
+	"            task   PID        tree-key  switches  prio"
+	"    exec-runtime        sum-exec       sum-sleep\n"
 	"------------------------------------------------------------------"
 	"--------------------------------"
 	"------------------------------------------------"
@@ -84,29 +77,6 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	read_unlock_irq(&tasklist_lock);
 }
 
-static void
-print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
-{
-	s64 wait_runtime_rq_sum = 0;
-	struct task_struct *p;
-	struct rb_node *curr;
-	unsigned long flags;
-	struct rq *rq = &per_cpu(runqueues, cpu);
-
-	spin_lock_irqsave(&rq->lock, flags);
-	curr = first_fair(cfs_rq);
-	while (curr) {
-		p = rb_entry(curr, struct task_struct, se.run_node);
-		wait_runtime_rq_sum += p->se.wait_runtime;
-
-		curr = rb_next(curr);
-	}
-	spin_unlock_irqrestore(&rq->lock, flags);
-
-	SEQ_printf(m, "  .%-30s: %Ld\n", "wait_runtime_rq_sum",
-		(long long)wait_runtime_rq_sum);
-}
-
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
 	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
@@ -120,7 +90,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
 
-	P(fair_clock);
 	P(exec_clock);
 
 	spin_lock_irqsave(&rq->lock, flags);
@@ -144,13 +113,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	spread0 = min_vruntime - rq0_min_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
 			(long long)spread0);
-
-	P(wait_runtime);
-	P(wait_runtime_overruns);
-	P(wait_runtime_underruns);
 #undef P
-
-	print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -268,8 +231,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 #define P(F) \
 	SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
 
-	P(se.wait_runtime);
-	P(se.wait_start_fair);
 	P(se.exec_start);
 	P(se.vruntime);
 	P(se.sum_exec_runtime);
@@ -283,9 +244,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(se.exec_max);
 	P(se.slice_max);
 	P(se.wait_max);
-	P(se.wait_runtime_overruns);
-	P(se.wait_runtime_underruns);
-	P(se.sum_wait_runtime);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -312,8 +270,6 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
-	p->se.wait_runtime_overruns	= 0;
-	p->se.wait_runtime_underruns	= 0;
 #endif
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-- 
cgit v1.2.3


From 30cfdcfc5f180fc21a3dad6ae3b7b2a9ee112186 Mon Sep 17 00:00:00 2001
From: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Date: Mon, 15 Oct 2007 17:00:07 +0200
Subject: sched: do not keep current in the tree and get rid of
 sched_entity::fair_key

Get rid of 'sched_entity::fair_key'.

As a side effect, 'current' is not kept withing the tree for
SCHED_NORMAL/BATCH tasks anymore. This simplifies some parts of code
(e.g. entity_tick() and yield_task_fair()) and also somewhat optimizes
them (e.g. a single update_curr() now vs. dequeue/enqueue() before in
entity_tick()).

Signed-off-by: Dmitry Adamushko <dmitry.adamushko@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index e3b62324ac31..bb34b8188f61 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -38,7 +38,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 
 	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
 		p->comm, p->pid,
-		(long long)p->se.fair_key,
+		(long long)p->se.vruntime,
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-- 
cgit v1.2.3


From 1a75b94f7bda591f4c53af86baa50e1eaee35927 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:08 +0200
Subject: sched: prettify /proc/sched_debug output

print the correct amount of dashes in /proc/sched_debug.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index bb34b8188f61..22cf74c1dc03 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -60,10 +60,8 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 	"\nrunnable tasks:\n"
 	"            task   PID        tree-key  switches  prio"
 	"    exec-runtime        sum-exec       sum-sleep\n"
-	"------------------------------------------------------------------"
-	"--------------------------------"
-	"------------------------------------------------"
-	"--------------------------------\n");
+	"------------------------------------------------------"
+	"------------------------------------------------");
 
 	read_lock_irq(&tasklist_lock);
 
-- 
cgit v1.2.3


From ef83a5714d9a817b2e9b97f04a6d070fbd6ecf80 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:08 +0200
Subject: sched: enhance debug output

enhance debug output by changing 12345678 nsecs to 12.345678 output,
this is more human-readable.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 108 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 68 insertions(+), 40 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 22cf74c1dc03..e2c1e0dfdf50 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -28,6 +28,31 @@
 		printk(x);			\
  } while (0)
 
+/*
+ * Ease the printing of nsec fields:
+ */
+static long long nsec_high(long long nsec)
+{
+	if (nsec < 0) {
+		nsec = -nsec;
+		do_div(nsec, 1000000);
+		return -nsec;
+	}
+	do_div(nsec, 1000000);
+
+	return nsec;
+}
+
+static unsigned long nsec_low(long long nsec)
+{
+	if (nsec < 0)
+		nsec = -nsec;
+
+	return do_div(nsec, 1000000);
+}
+
+#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
+
 static void
 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 {
@@ -36,19 +61,19 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	else
 		SEQ_printf(m, " ");
 
-	SEQ_printf(m, "%15s %5d %15Ld %13Ld %5d ",
+	SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
 		p->comm, p->pid,
-		(long long)p->se.vruntime,
+		SPLIT_NS(p->se.vruntime),
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld %15Ld %15Ld\n",
-		(long long)p->se.vruntime,
-		(long long)p->se.sum_exec_runtime,
-		(long long)p->se.sum_sleep_runtime);
+	SEQ_printf(m, "%15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
+		SPLIT_NS(p->se.vruntime),
+		SPLIT_NS(p->se.sum_exec_runtime),
+		SPLIT_NS(p->se.sum_sleep_runtime));
 #else
-	SEQ_printf(m, "%15Ld %15Ld %15Ld %15Ld %15Ld\n",
-		0LL, 0LL, 0LL, 0LL, 0LL);
+	SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
+		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
 #endif
 }
 
@@ -85,10 +110,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 
 	SEQ_printf(m, "\ncfs_rq\n");
 
-#define P(x) \
-	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
-
-	P(exec_clock);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
+			SPLIT_NS(cfs_rq->exec_clock));
 
 	spin_lock_irqsave(&rq->lock, flags);
 	if (cfs_rq->rb_leftmost)
@@ -99,19 +122,18 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	min_vruntime = rq->cfs.min_vruntime;
 	rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
 	spin_unlock_irqrestore(&rq->lock, flags);
-	SEQ_printf(m, "  .%-30s: %Ld\n", "MIN_vruntime",
-			(long long)MIN_vruntime);
-	SEQ_printf(m, "  .%-30s: %Ld\n", "min_vruntime",
-			(long long)min_vruntime);
-	SEQ_printf(m, "  .%-30s: %Ld\n", "max_vruntime",
-			(long long)max_vruntime);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
+			SPLIT_NS(MIN_vruntime));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
+			SPLIT_NS(min_vruntime));
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
+			SPLIT_NS(max_vruntime));
 	spread = max_vruntime - MIN_vruntime;
-	SEQ_printf(m, "  .%-30s: %Ld\n", "spread",
-			(long long)spread);
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
+			SPLIT_NS(spread));
 	spread0 = min_vruntime - rq0_min_vruntime;
-	SEQ_printf(m, "  .%-30s: %Ld\n", "spread0",
-			(long long)spread0);
-#undef P
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
+			SPLIT_NS(spread0));
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -131,6 +153,8 @@ static void print_cpu(struct seq_file *m, int cpu)
 
 #define P(x) \
 	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x))
+#define PN(x) \
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
 
 	P(nr_running);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
@@ -139,21 +163,22 @@ static void print_cpu(struct seq_file *m, int cpu)
 	P(nr_load_updates);
 	P(nr_uninterruptible);
 	SEQ_printf(m, "  .%-30s: %lu\n", "jiffies", jiffies);
-	P(next_balance);
+	PN(next_balance);
 	P(curr->pid);
-	P(clock);
-	P(idle_clock);
-	P(prev_clock_raw);
+	PN(clock);
+	PN(idle_clock);
+	PN(prev_clock_raw);
 	P(clock_warps);
 	P(clock_overflows);
 	P(clock_deep_idle_events);
-	P(clock_max_delta);
+	PN(clock_max_delta);
 	P(cpu_load[0]);
 	P(cpu_load[1]);
 	P(cpu_load[2]);
 	P(cpu_load[3]);
 	P(cpu_load[4]);
 #undef P
+#undef PN
 
 	print_cfs_stats(m, cpu);
 
@@ -170,7 +195,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
 
-	SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
+	SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
 
 	for_each_online_cpu(cpu)
 		print_cpu(m, cpu);
@@ -228,20 +253,22 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	SEQ_printf(m, "----------------------------------------------\n");
 #define P(F) \
 	SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
+#define PN(F) \
+	SEQ_printf(m, "%-25s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
-	P(se.exec_start);
-	P(se.vruntime);
-	P(se.sum_exec_runtime);
+	PN(se.exec_start);
+	PN(se.vruntime);
+	PN(se.sum_exec_runtime);
 
 #ifdef CONFIG_SCHEDSTATS
-	P(se.wait_start);
-	P(se.sleep_start);
-	P(se.block_start);
-	P(se.sleep_max);
-	P(se.block_max);
-	P(se.exec_max);
-	P(se.slice_max);
-	P(se.wait_max);
+	PN(se.wait_start);
+	PN(se.sleep_start);
+	PN(se.block_start);
+	PN(se.sleep_max);
+	PN(se.block_max);
+	PN(se.exec_max);
+	PN(se.slice_max);
+	PN(se.wait_max);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -249,6 +276,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	P(policy);
 	P(prio);
 #undef P
+#undef PN
 
 	{
 		u64 t0, t1;
-- 
cgit v1.2.3


From c86da3a3d40f6e7a032edfaea191fb51e9626c8f Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 15 Oct 2007 17:00:08 +0200
Subject: sched: fix formatting of /proc/sched_debug

fix formatting of /proc/sched_debug

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index e2c1e0dfdf50..4eaaf96559d6 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -67,7 +67,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 		(long long)(p->nvcsw + p->nivcsw),
 		p->prio);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "%15Ld.%06ld %15Ld.%06ld %15Ld.%06ld\n",
+	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld\n",
 		SPLIT_NS(p->se.vruntime),
 		SPLIT_NS(p->se.sum_exec_runtime),
 		SPLIT_NS(p->se.sum_sleep_runtime));
@@ -83,10 +83,10 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 	SEQ_printf(m,
 	"\nrunnable tasks:\n"
-	"            task   PID        tree-key  switches  prio"
-	"    exec-runtime        sum-exec       sum-sleep\n"
+	"            task   PID         tree-key  switches  prio"
+	"     exec-runtime         sum-exec        sum-sleep\n"
 	"------------------------------------------------------"
-	"------------------------------------------------");
+	"----------------------------------------------------\n");
 
 	read_lock_irq(&tasklist_lock);
 
-- 
cgit v1.2.3


From 545f3b18152355acbb8da59873506fcf66c7c60e Mon Sep 17 00:00:00 2001
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Date: Mon, 15 Oct 2007 17:00:09 +0200
Subject: sched: print nr_running and load in /proc/sched_debug

- print nr_running and load information for cfs_rq in /proc/sched_debug

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 4eaaf96559d6..3e47e870b043 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -134,6 +134,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	spread0 = min_vruntime - rq0_min_vruntime;
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
 			SPLIT_NS(spread0));
+	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
+	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
-- 
cgit v1.2.3


From 24e377a83220ef05c9b5bec7e01d65eed6609aa6 Mon Sep 17 00:00:00 2001
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Date: Mon, 15 Oct 2007 17:00:09 +0200
Subject: sched: add fair-user scheduler

Enable user-id based fair group scheduling. This is useful for anyone
who wants to test the group scheduler w/o having to enable
CONFIG_CGROUPS.

A separate scheduling group (i.e struct task_grp) is automatically created for
every new user added to the system. Upon uid change for a task, it is made to
move to the corresponding scheduling group.

A /proc tunable (/proc/root_user_share) is also provided to tune root
user's quota of cpu bandwidth.

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 3e47e870b043..57ee9d5630a8 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -212,6 +212,49 @@ static void sysrq_sched_debug_show(void)
 	sched_debug_show(NULL, NULL);
 }
 
+#ifdef CONFIG_FAIR_USER_SCHED
+
+static DEFINE_MUTEX(root_user_share_mutex);
+
+static int
+root_user_share_read_proc(char *page, char **start, off_t off, int count,
+				 int *eof, void *data)
+{
+	int len;
+
+	len = sprintf(page, "%d\n", init_task_grp_load);
+
+	return len;
+}
+
+static int
+root_user_share_write_proc(struct file *file, const char __user *buffer,
+				 unsigned long count, void *data)
+{
+	unsigned long shares;
+	char kbuf[sizeof(unsigned long)+1];
+	int rc = 0;
+
+	if (copy_from_user(kbuf, buffer, sizeof(kbuf)))
+		return -EFAULT;
+
+	shares = simple_strtoul(kbuf, NULL, 0);
+
+	if (!shares)
+		shares = NICE_0_LOAD;
+
+	mutex_lock(&root_user_share_mutex);
+
+	init_task_grp_load = shares;
+	rc = sched_group_set_shares(&init_task_grp, shares);
+
+	mutex_unlock(&root_user_share_mutex);
+
+	return (rc < 0 ? rc : count);
+}
+
+#endif	/* CONFIG_FAIR_USER_SCHED */
+
 static int sched_debug_open(struct inode *inode, struct file *filp)
 {
 	return single_open(filp, sched_debug_show, NULL);
@@ -234,6 +277,15 @@ static int __init init_sched_debug_procfs(void)
 
 	pe->proc_fops = &sched_debug_fops;
 
+#ifdef CONFIG_FAIR_USER_SCHED
+	pe = create_proc_entry("root_user_share", 0644, NULL);
+	if (!pe)
+		return -ENOMEM;
+
+	pe->read_proc = root_user_share_read_proc;
+	pe->write_proc = root_user_share_write_proc;
+#endif
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From b8efb56172bc55082b8490778b07ef73eea0b551 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:10 +0200
Subject: sched debug: BKL usage statistics

add per task and per rq BKL usage statistics.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 57ee9d5630a8..823b63a3a3e1 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -136,6 +136,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			SPLIT_NS(spread0));
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
+	SEQ_printf(m, "  .%-30s: %ld\n", "bkl_cnt",
+			rq->bkl_cnt);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
@@ -323,6 +325,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.exec_max);
 	PN(se.slice_max);
 	PN(se.wait_max);
+	P(sched_info.bkl_cnt);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -350,6 +353,7 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
+	p->sched_info.bkl_cnt		= 0;
 #endif
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-- 
cgit v1.2.3


From fdd71d132badad542a9ab99ab4a9c3c08fa6412f Mon Sep 17 00:00:00 2001
From: "S.Caglar Onur" <caglar@pardus.org.tr>
Date: Mon, 15 Oct 2007 17:00:10 +0200
Subject: sched debug: BKL usage statistics, fix

build fix for the SCHED_DEBUG && !SCHEDSTATS case.

Signed-off-by: S.Ceglar Onur <caglar@pardus.org.tr>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 823b63a3a3e1..b6d0a94d4120 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -136,8 +136,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 			SPLIT_NS(spread0));
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
+#ifdef CONFIG_SCHEDSTATS
 	SEQ_printf(m, "  .%-30s: %ld\n", "bkl_cnt",
 			rq->bkl_cnt);
+#endif
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
-- 
cgit v1.2.3


From 1aa4731eff7dab7bd01747b46f654f449f1cfc2c Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:10 +0200
Subject: sched debug: print settings

print the current value of all tunables in /proc/sched_debug output.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index b6d0a94d4120..d79e1ec5b06a 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -203,6 +203,19 @@ static int sched_debug_show(struct seq_file *m, void *v)
 
 	SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
 
+#define P(x) \
+	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(x))
+#define PN(x) \
+	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+	PN(sysctl_sched_latency);
+	PN(sysctl_sched_min_granularity);
+	PN(sysctl_sched_wakeup_granularity);
+	PN(sysctl_sched_batch_wakeup_granularity);
+	PN(sysctl_sched_child_runs_first);
+	P(sysctl_sched_features);
+#undef PN
+#undef P
+
 	for_each_online_cpu(cpu)
 		print_cpu(m, cpu);
 
-- 
cgit v1.2.3


From d822cecedad88b69a7d68aa8d49e1f238aa320c7 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:10 +0200
Subject: sched debug: more width for parameter printouts

more width for parameter printouts in /proc/sched_debug.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index d79e1ec5b06a..b24f17de19e3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -204,9 +204,9 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	SEQ_printf(m, "now at %Lu.%06ld msecs\n", SPLIT_NS(now));
 
 #define P(x) \
-	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(x))
+	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
-	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(x))
+	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 	PN(sysctl_sched_latency);
 	PN(sysctl_sched_min_granularity);
 	PN(sysctl_sched_wakeup_granularity);
-- 
cgit v1.2.3


From ddc972975091ba5f839bf24d0f9ef54fe90ee741 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 15 Oct 2007 17:00:10 +0200
Subject: sched debug: check spread

debug feature: check how well we schedule within a reasonable
vruntime 'spread' range. (note that CPU overload can increase
the spread, so this is not a hard condition, but normal loads
should be within the spread.)

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched_debug.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index b24f17de19e3..4659c90c3418 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -140,6 +140,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	SEQ_printf(m, "  .%-30s: %ld\n", "bkl_cnt",
 			rq->bkl_cnt);
 #endif
+	SEQ_printf(m, "  .%-30s: %ld\n", "nr_spread_over",
+			cfs_rq->nr_spread_over);
 }
 
 static void print_cpu(struct seq_file *m, int cpu)
-- 
cgit v1.2.3


From 2d72376b3af1e7d4d4515ebfd0f4383f2e92c343 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:12 +0200
Subject: sched: clean up schedstats, cnt -> count

rename all 'cnt' fields and variables to the less yucky 'count' name.

yuckage noticed by Andrew Morton.

no change in code, other than the /proc/sched_debug bkl_count string got
a bit larger:

   text    data     bss     dec     hex filename
  38236    3506      24   41766    a326 sched.o.before
  38240    3506      24   41770    a32a sched.o.after

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 4659c90c3418..be79cd6d9e80 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -137,8 +137,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
 #ifdef CONFIG_SCHEDSTATS
-	SEQ_printf(m, "  .%-30s: %ld\n", "bkl_cnt",
-			rq->bkl_cnt);
+	SEQ_printf(m, "  .%-30s: %ld\n", "bkl_count",
+			rq->bkl_count);
 #endif
 	SEQ_printf(m, "  .%-30s: %ld\n", "nr_spread_over",
 			cfs_rq->nr_spread_over);
@@ -342,7 +342,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.exec_max);
 	PN(se.slice_max);
 	PN(se.wait_max);
-	P(sched_info.bkl_cnt);
+	P(sched_info.bkl_count);
 #endif
 	SEQ_printf(m, "%-25s:%20Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
@@ -370,7 +370,7 @@ void proc_sched_set_task(struct task_struct *p)
 	p->se.exec_max			= 0;
 	p->se.slice_max			= 0;
 	p->se.wait_max			= 0;
-	p->sched_info.bkl_cnt		= 0;
+	p->sched_info.bkl_count		= 0;
 #endif
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
-- 
cgit v1.2.3


From 5f6d858ecca78f71755859a346d845e302973cd1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 15 Oct 2007 17:00:12 +0200
Subject: sched: speed up and simplify vslice calculations

speed up and simplify vslice calculations.

[ From: Mike Galbraith <efault@gmx.de>: build fix ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index be79cd6d9e80..995bbd384a97 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -210,7 +210,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 #define PN(x) \
 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
 	PN(sysctl_sched_latency);
-	PN(sysctl_sched_min_granularity);
+	PN(sysctl_sched_nr_latency);
 	PN(sysctl_sched_wakeup_granularity);
 	PN(sysctl_sched_batch_wakeup_granularity);
 	PN(sysctl_sched_child_runs_first);
-- 
cgit v1.2.3


From fb615581c78efee25e4d04f1145e8fa8ec705dc3 Mon Sep 17 00:00:00 2001
From: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Date: Mon, 15 Oct 2007 17:00:12 +0200
Subject: sched: group scheduler, fix coding style issues

Fix coding style issues reported by Randy Dunlap and others

Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
---
 kernel/sched_debug.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 995bbd384a97..48748d04144d 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -239,11 +239,7 @@ static int
 root_user_share_read_proc(char *page, char **start, off_t off, int count,
 				 int *eof, void *data)
 {
-	int len;
-
-	len = sprintf(page, "%d\n", init_task_grp_load);
-
-	return len;
+	return sprintf(page, "%d\n", init_task_grp_load);
 }
 
 static int
@@ -297,7 +293,7 @@ static int __init init_sched_debug_procfs(void)
 	pe->proc_fops = &sched_debug_fops;
 
 #ifdef CONFIG_FAIR_USER_SCHED
-	pe = create_proc_entry("root_user_share", 0644, NULL);
+	pe = create_proc_entry("root_user_cpu_share", 0644, NULL);
 	if (!pe)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From 4cf86d77f5942336e7cd9de874b38b3c83b54d5e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:14 +0200
Subject: sched: cleanup: rename task_grp to task_group

cleanup: rename task_grp to task_group. No need to save two characters
and 'grp' is annoying to read.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 48748d04144d..6f87b31d233c 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -239,7 +239,7 @@ static int
 root_user_share_read_proc(char *page, char **start, off_t off, int count,
 				 int *eof, void *data)
 {
-	return sprintf(page, "%d\n", init_task_grp_load);
+	return sprintf(page, "%d\n", init_task_group_load);
 }
 
 static int
@@ -260,8 +260,8 @@ root_user_share_write_proc(struct file *file, const char __user *buffer,
 
 	mutex_lock(&root_user_share_mutex);
 
-	init_task_grp_load = shares;
-	rc = sched_group_set_shares(&init_task_grp, shares);
+	init_task_group_load = shares;
+	rc = sched_group_set_shares(&init_task_group, shares);
 
 	mutex_unlock(&root_user_share_mutex);
 
-- 
cgit v1.2.3


From 5cb350baf580017da38199625b7365b1763d7180 Mon Sep 17 00:00:00 2001
From: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Date: Mon, 15 Oct 2007 17:00:14 +0200
Subject: sched: group scheduling, sysfs tunables

Add tunables in sysfs to modify a user's cpu share.

A directory is created in sysfs for each new user in the system.

	/sys/kernel/uids/<uid>/cpu_share

Reading this file returns the cpu shares granted for the user.
Writing into this file modifies the cpu share for the user. Only an
administrator is allowed to modify a user's cpu share.

Ex:
	# cd /sys/kernel/uids/
	# cat 512/cpu_share
	1024
	# echo 2048 > 512/cpu_share
	# cat 512/cpu_share
	2048
	#

Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 48 ------------------------------------------------
 1 file changed, 48 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6f87b31d233c..0aab455a7b41 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -231,45 +231,6 @@ static void sysrq_sched_debug_show(void)
 	sched_debug_show(NULL, NULL);
 }
 
-#ifdef CONFIG_FAIR_USER_SCHED
-
-static DEFINE_MUTEX(root_user_share_mutex);
-
-static int
-root_user_share_read_proc(char *page, char **start, off_t off, int count,
-				 int *eof, void *data)
-{
-	return sprintf(page, "%d\n", init_task_group_load);
-}
-
-static int
-root_user_share_write_proc(struct file *file, const char __user *buffer,
-				 unsigned long count, void *data)
-{
-	unsigned long shares;
-	char kbuf[sizeof(unsigned long)+1];
-	int rc = 0;
-
-	if (copy_from_user(kbuf, buffer, sizeof(kbuf)))
-		return -EFAULT;
-
-	shares = simple_strtoul(kbuf, NULL, 0);
-
-	if (!shares)
-		shares = NICE_0_LOAD;
-
-	mutex_lock(&root_user_share_mutex);
-
-	init_task_group_load = shares;
-	rc = sched_group_set_shares(&init_task_group, shares);
-
-	mutex_unlock(&root_user_share_mutex);
-
-	return (rc < 0 ? rc : count);
-}
-
-#endif	/* CONFIG_FAIR_USER_SCHED */
-
 static int sched_debug_open(struct inode *inode, struct file *filp)
 {
 	return single_open(filp, sched_debug_show, NULL);
@@ -292,15 +253,6 @@ static int __init init_sched_debug_procfs(void)
 
 	pe->proc_fops = &sched_debug_fops;
 
-#ifdef CONFIG_FAIR_USER_SCHED
-	pe = create_proc_entry("root_user_cpu_share", 0644, NULL);
-	if (!pe)
-		return -ENOMEM;
-
-	pe->read_proc = root_user_share_read_proc;
-	pe->write_proc = root_user_share_write_proc;
-#endif
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2d92f22784b7b8879ebe3254e44c92cb8792b0dd Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:18 +0200
Subject: sched: debug: increase width of debug line

increase width of debug line - in preparation of more debugging info.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 0aab455a7b41..755815937417 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -198,7 +198,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
 	u64 now = ktime_to_ns(ktime_get());
 	int cpu;
 
-	SEQ_printf(m, "Sched Debug Version: v0.05-v20, %s %.*s\n",
+	SEQ_printf(m, "Sched Debug Version: v0.06-v22, %s %.*s\n",
 		init_utsname()->release,
 		(int)strcspn(init_utsname()->version, " "),
 		init_utsname()->version);
@@ -271,11 +271,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	rcu_read_unlock();
 
 	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
-	SEQ_printf(m, "----------------------------------------------\n");
+	SEQ_printf(m,
+		"---------------------------------------------------------\n");
 #define P(F) \
-	SEQ_printf(m, "%-25s:%20Ld\n", #F, (long long)p->F)
+	SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
 #define PN(F) \
-	SEQ_printf(m, "%-25s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
+	SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
 	PN(se.exec_start);
 	PN(se.vruntime);
@@ -292,7 +293,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.wait_max);
 	P(sched_info.bkl_count);
 #endif
-	SEQ_printf(m, "%-25s:%20Ld\n",
+	SEQ_printf(m, "%-35s:%21Ld\n",
 		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
 	P(se.load.weight);
 	P(policy);
@@ -305,7 +306,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
 		t0 = sched_clock();
 		t1 = sched_clock();
-		SEQ_printf(m, "%-25s:%20Ld\n",
+		SEQ_printf(m, "%-35s:%21Ld\n",
 			   "clock-delta", (long long)(t1-t0));
 	}
 }
-- 
cgit v1.2.3


From cc367732ff0b1c63d0d7bdd11e6d1661794ef6a3 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 15 Oct 2007 17:00:18 +0200
Subject: sched: debug, improve migration statistics

add new migration statistics when SCHED_DEBUG and SCHEDSTATS
is enabled. Available in /proc/<PID>/sched.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 77 insertions(+), 10 deletions(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 755815937417..27e82cbccaa5 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -260,6 +260,7 @@ __initcall(init_sched_debug_procfs);
 
 void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 {
+	unsigned long nr_switches;
 	unsigned long flags;
 	int num_threads = 1;
 
@@ -273,8 +274,12 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid, num_threads);
 	SEQ_printf(m,
 		"---------------------------------------------------------\n");
+#define __P(F) \
+	SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F)
 #define P(F) \
 	SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
+#define __PN(F) \
+	SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
 #define PN(F) \
 	SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
 
@@ -282,6 +287,8 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.vruntime);
 	PN(se.sum_exec_runtime);
 
+	nr_switches = p->nvcsw + p->nivcsw;
+
 #ifdef CONFIG_SCHEDSTATS
 	PN(se.wait_start);
 	PN(se.sleep_start);
@@ -292,14 +299,55 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.slice_max);
 	PN(se.wait_max);
 	P(sched_info.bkl_count);
+	P(se.nr_migrations);
+	P(se.nr_migrations_cold);
+	P(se.nr_failed_migrations_affine);
+	P(se.nr_failed_migrations_running);
+	P(se.nr_failed_migrations_hot);
+	P(se.nr_forced_migrations);
+	P(se.nr_forced2_migrations);
+	P(se.nr_wakeups);
+	P(se.nr_wakeups_sync);
+	P(se.nr_wakeups_migrate);
+	P(se.nr_wakeups_local);
+	P(se.nr_wakeups_remote);
+	P(se.nr_wakeups_affine);
+	P(se.nr_wakeups_affine_attempts);
+	P(se.nr_wakeups_passive);
+	P(se.nr_wakeups_idle);
+
+	{
+		u64 avg_atom, avg_per_cpu;
+
+		avg_atom = p->se.sum_exec_runtime;
+		if (nr_switches)
+			do_div(avg_atom, nr_switches);
+		else
+			avg_atom = -1LL;
+
+		avg_per_cpu = p->se.sum_exec_runtime;
+		if (p->se.nr_migrations)
+			avg_per_cpu = div64_64(avg_per_cpu, p->se.nr_migrations);
+		else
+			avg_per_cpu = -1LL;
+
+		__PN(avg_atom);
+		__PN(avg_per_cpu);
+	}
 #endif
+	__P(nr_switches);
 	SEQ_printf(m, "%-35s:%21Ld\n",
-		   "nr_switches", (long long)(p->nvcsw + p->nivcsw));
+		   "nr_voluntary_switches", (long long)p->nvcsw);
+	SEQ_printf(m, "%-35s:%21Ld\n",
+		   "nr_involuntary_switches", (long long)p->nivcsw);
+
 	P(se.load.weight);
 	P(policy);
 	P(prio);
-#undef P
 #undef PN
+#undef __PN
+#undef P
+#undef __P
 
 	{
 		u64 t0, t1;
@@ -314,13 +362,32 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-	p->se.sleep_max			= 0;
-	p->se.block_max			= 0;
-	p->se.exec_max			= 0;
-	p->se.slice_max			= 0;
-	p->se.wait_max			= 0;
-	p->sched_info.bkl_count		= 0;
+	p->se.wait_max				= 0;
+	p->se.sleep_max				= 0;
+	p->se.sum_sleep_runtime			= 0;
+	p->se.block_max				= 0;
+	p->se.exec_max				= 0;
+	p->se.slice_max				= 0;
+	p->se.nr_migrations			= 0;
+	p->se.nr_migrations_cold		= 0;
+	p->se.nr_failed_migrations_affine	= 0;
+	p->se.nr_failed_migrations_running	= 0;
+	p->se.nr_failed_migrations_hot		= 0;
+	p->se.nr_forced_migrations		= 0;
+	p->se.nr_forced2_migrations		= 0;
+	p->se.nr_wakeups			= 0;
+	p->se.nr_wakeups_sync			= 0;
+	p->se.nr_wakeups_migrate		= 0;
+	p->se.nr_wakeups_local			= 0;
+	p->se.nr_wakeups_remote			= 0;
+	p->se.nr_wakeups_affine			= 0;
+	p->se.nr_wakeups_affine_attempts	= 0;
+	p->se.nr_wakeups_passive		= 0;
+	p->se.nr_wakeups_idle			= 0;
+	p->sched_info.bkl_count			= 0;
 #endif
-	p->se.sum_exec_runtime		= 0;
-	p->se.prev_sum_exec_runtime	= 0;
+	p->se.sum_exec_runtime			= 0;
+	p->se.prev_sum_exec_runtime		= 0;
+	p->nvcsw				= 0;
+	p->nivcsw				= 0;
 }
-- 
cgit v1.2.3


From 0dbee3a6b006dbe814d002cb18e94bf24a216451 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 15 Oct 2007 17:00:19 +0200
Subject: Make scheduler debug file operations const

In general, struct file_operations are const in the kernel, to not have
false cacheline sharing and to catch bugs at compiletime with accidental
writes to them. The new scheduler code introduces a new non-const one;
fix this up.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/sched_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel/sched_debug.c')

diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 27e82cbccaa5..a5e517ec07c3 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -236,7 +236,7 @@ static int sched_debug_open(struct inode *inode, struct file *filp)
 	return single_open(filp, sched_debug_show, NULL);
 }
 
-static struct file_operations sched_debug_fops = {
+static const struct file_operations sched_debug_fops = {
 	.open		= sched_debug_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-- 
cgit v1.2.3