cpuquiet: Update averaging of nr_runnables

Doing a Exponential moving average per nr_running++/-- does not guarantee a fixed sample rate which induces errors if there are lots of threads being enqueued/dequeued from the rq (Linpack mt). Instead of keeping track of the avg, the scheduler now keeps track of the integral of nr_running and allows the readers to perform filtering on top. Implemented a proper exponential moving average for the runnables governor and a straight 100ms average for the balanced governor. Tweaked the thresholds for the runnables governor to minimize latency. Also, decreased sample_rate for the runnables governor to the absolute minimum of 10msecs. Updated to K3.4 Change-Id: Ia25bf8baf2a1a015ba188b2c06e551e89b16c5f8 Signed-off-by: Sai Charan Gurrappadi <sgurrappadi@nvidia.com> Signed-off-by: Peter De Schrijver <pdeschrijver@nvidia.com> Reviewed-on: http://git-master/r/131147 Reviewed-by: Juha Tukkinen <jtukkinen@nvidia.com> Rebase-Id: R7a20292e2cfb551a875962f0903647f69b78a0ab
author: Sai Charan Gurrappadi <sgurrappadi@nvidia.com> 2012-08-24 18:42:36 -0700
committer: Dan Willemsen <dwillemsen@nvidia.com> 2013-09-14 12:34:24 -0700
commit: 2413a06b9892c5e12a28b9127e64d9632c42dfeb (patch)
tree: 046bf4914878d1f16b675ae7f84cc62712bbb1ed /kernel
parent: eca4ff7a2ef49b7500452acf3c3723b3cf1c06de (diff)
3 files changed, 26 insertions, 46 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7ce4b3ad1f76..b4b921df344a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2075,35 +2075,10 @@ unsigned long this_cpu_load(void)
 	return this->cpu_load[0];
 }
 
-unsigned long avg_nr_running(void)
-{
-	unsigned long i, sum = 0;
-	unsigned int seqcnt, ave_nr_running;
-
-	for_each_online_cpu(i) {
-		struct rq *q = cpu_rq(i);
-
-		/*
-		 * Update average to avoid reading stalled value if there were
-		 * no run-queue changes for a long time. On the other hand if
-		 * the changes are happening right now, just read current value
-		 * directly.
-		 */
-		seqcnt = read_seqcount_begin(&q->ave_seqcnt);
-		ave_nr_running = do_avg_nr_running(q);
-		if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
-			read_seqcount_begin(&q->ave_seqcnt);
-			ave_nr_running = q->ave_nr_running;
-		}
-
-		sum += ave_nr_running;
-	}
-
-	return sum;
-}
-
-unsigned long get_avg_nr_running(unsigned int cpu)
+u64 nr_running_integral(unsigned int cpu)
 {
+	unsigned int seqcnt;
+	u64 integral;
 	struct rq *q;
 
 	if (cpu >= nr_cpu_ids)
@@ -2111,7 +2086,21 @@ unsigned long get_avg_nr_running(unsigned int cpu)
 
 	q = cpu_rq(cpu);
 
-	return q->ave_nr_running;
+	/*
+	 * Update average to avoid reading stalled value if there were
+	 * no run-queue changes for a long time. On the other hand if
+	 * the changes are happening right now, just read current value
+	 * directly.
+	 */
+
+	seqcnt = read_seqcount_begin(&q->ave_seqcnt);
+	integral = do_nr_running_integral(q);
+	if (read_seqcount_retry(&q->ave_seqcnt, seqcnt)) {
+		read_seqcount_begin(&q->ave_seqcnt);
+		integral = q->nr_running_integral;
+	}
+
+	return integral;
 }
 
 /*
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index ad1b66c36096..75024a673520 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -281,9 +281,6 @@ do {									\
 	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
 
 	P(nr_running);
-	SEQ_printf(m, "  .%-30s: %d.%03d   \n", "ave_nr_running",
-		   rq->ave_nr_running / FIXED_1,
-		   ((rq->ave_nr_running % FIXED_1) * 1000) / FIXED_1);
 	SEQ_printf(m, "  .%-30s: %lu\n", "load",
 		   rq->load.weight);
 	P(nr_switches);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b75fd8944ecc..242a28cf4eac 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -417,7 +417,7 @@ struct rq {
 
 	/* time-based average load */
 	u64 nr_last_stamp;
-	unsigned int ave_nr_running;
+	u64 nr_running_integral;
 	seqcount_t ave_seqcnt;
 
 	/* capture load from *all* tasks on this cpu: */
@@ -1083,32 +1083,26 @@ static inline u64 steal_ticks(u64 steal)
  * 25 ~=  33554432ns =  33.5ms
  * 24 ~=  16777216ns =  16.8ms
  */
-#define NR_AVE_PERIOD_EXP	27
 #define NR_AVE_SCALE(x)		((x) << FSHIFT)
-#define NR_AVE_PERIOD		(1 << NR_AVE_PERIOD_EXP)
-#define NR_AVE_DIV_PERIOD(x)	((x) >> NR_AVE_PERIOD_EXP)
 
-static inline unsigned int do_avg_nr_running(struct rq *rq)
+
+static inline u64 do_nr_running_integral(struct rq *rq)
 {
 	s64 nr, deltax;
-	unsigned int ave_nr_running = rq->ave_nr_running;
+	u64 nr_running_integral = rq->nr_running_integral;
 
 	deltax = rq->clock_task - rq->nr_last_stamp;
 	nr = NR_AVE_SCALE(rq->nr_running);
 
-	if (deltax > NR_AVE_PERIOD)
-		ave_nr_running = nr;
-	else
-		ave_nr_running +=
-			NR_AVE_DIV_PERIOD(deltax * (nr - ave_nr_running));
+	nr_running_integral += nr * deltax;
 
-	return ave_nr_running;
+	return nr_running_integral;
 }
 
 static inline void inc_nr_running(struct rq *rq)
 {
 	write_seqcount_begin(&rq->ave_seqcnt);
-	rq->ave_nr_running = do_avg_nr_running(rq);
+	rq->nr_running_integral = do_nr_running_integral(rq);
 	rq->nr_last_stamp = rq->clock_task;
 	rq->nr_running++;
 	write_seqcount_end(&rq->ave_seqcnt);
@@ -1127,7 +1121,7 @@ static inline void inc_nr_running(struct rq *rq)
 static inline void dec_nr_running(struct rq *rq)
 {
 	write_seqcount_begin(&rq->ave_seqcnt);
-	rq->ave_nr_running = do_avg_nr_running(rq);
+	rq->nr_running_integral = do_nr_running_integral(rq);
 	rq->nr_last_stamp = rq->clock_task;
 	rq->nr_running--;
 	write_seqcount_end(&rq->ave_seqcnt);
author	Sai Charan Gurrappadi <sgurrappadi@nvidia.com>	2012-08-24 18:42:36 -0700
committer	Dan Willemsen <dwillemsen@nvidia.com>	2013-09-14 12:34:24 -0700
commit	2413a06b9892c5e12a28b9127e64d9632c42dfeb (patch)
tree	046bf4914878d1f16b675ae7f84cc62712bbb1ed /kernel
parent	eca4ff7a2ef49b7500452acf3c3723b3cf1c06de (diff)