From d84b31313ef8a8de55a2cbfb72f76f36d8c927fb Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <frederic@kernel.org>
Date: Wed, 21 Feb 2018 05:17:27 +0100
Subject: sched/isolation: Offload residual 1Hz scheduler tick

When a CPU runs in full dynticks mode, a 1Hz tick remains in order to
keep the scheduler stats alive. However this residual tick is a burden
for bare metal tasks that can't stand any interruption at all, or want
to minimize them.

The usual boot parameters "nohz_full=" or "isolcpus=nohz" will now
outsource these scheduler ticks to the global workqueue so that a
housekeeping CPU handles those remotely. The sched_class::task_tick()
implementations have been audited and look safe to be called remotely
as the target runqueue and its current task are passed in parameter
and don't seem to be accessed locally.

Note that in the case of using isolcpus, it's still up to the user to
affine the global workqueues to the housekeeping CPUs through
/sys/devices/virtual/workqueue/cpumask or domains isolation
"isolcpus=nohz,domain".

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chris Metcalf <cmetcalf@mellanox.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Luiz Capitulino <lcapitulino@redhat.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wanpeng Li <kernellwp@gmail.com>
Link: http://lkml.kernel.org/r/1519186649-3242-6-git-send-email-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'kernel/sched/rt.c')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index aad49451584e..c80563b4f6b9 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2292,6 +2292,14 @@ static void watchdog(struct rq *rq, struct task_struct *p)
 static inline void watchdog(struct rq *rq, struct task_struct *p) { }
 #endif
 
+/*
+ * scheduler tick hitting a task of our scheduling class.
+ *
+ * NOTE: This function can be called remotely by the tick offload that
+ * goes along full dynticks. Therefore no local assumption can be made
+ * and everything must be accessed through the @rq and @curr passed in
+ * parameters.
+ */
 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 {
 	struct sched_rt_entity *rt_se = &p->rt;
-- 
cgit v1.2.3


From 97fb7a0a8944bd6d2c5634e1e0fa689a5c40bc22 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 3 Mar 2018 14:01:12 +0100
Subject: sched: Clean up and harmonize the coding style of the scheduler code
 base

A good number of small style inconsistencies have accumulated
in the scheduler core, so do a pass over them to harmonize
all these details:

 - fix speling in comments,

 - use curly braces for multi-line statements,

 - remove unnecessary parentheses from integer literals,

 - capitalize consistently,

 - remove stray newlines,

 - add comments where necessary,

 - remove invalid/unnecessary comments,

 - align structure definitions and other data types vertically,

 - add missing newlines for increased readability,

 - fix vertical tabulation where it's misaligned,

 - harmonize preprocessor conditional block labeling
   and vertical alignment,

 - remove line-breaks where they uglify the code,

 - add newline after local variable definitions,

No change in functionality:

  md5:
     1191fa0a890cfa8132156d2959d7e9e2  built-in.o.before.asm
     1191fa0a890cfa8132156d2959d7e9e2  built-in.o.after.asm

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'kernel/sched/rt.c')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c80563b4f6b9..e40498872111 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1453,9 +1453,9 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 		return;
 
 	/*
-	 * There appears to be other cpus that can accept
-	 * current and none to run 'p', so lets reschedule
-	 * to try and push current away:
+	 * There appear to be other CPUs that can accept
+	 * the current task but none can run 'p', so lets reschedule
+	 * to try and push the current task away:
 	 */
 	requeue_task_rt(rq, p, 1);
 	resched_curr(rq);
@@ -1596,12 +1596,13 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 	if (!task_running(rq, p) &&
 	    cpumask_test_cpu(cpu, &p->cpus_allowed))
 		return 1;
+
 	return 0;
 }
 
 /*
  * Return the highest pushable rq's task, which is suitable to be executed
- * on the cpu, NULL otherwise
+ * on the CPU, NULL otherwise
  */
 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
 {
@@ -1639,11 +1640,11 @@ static int find_lowest_rq(struct task_struct *task)
 		return -1; /* No targets found */
 
 	/*
-	 * At this point we have built a mask of cpus representing the
+	 * At this point we have built a mask of CPUs representing the
 	 * lowest priority tasks in the system.  Now we want to elect
 	 * the best one based on our affinity and topology.
 	 *
-	 * We prioritize the last cpu that the task executed on since
+	 * We prioritize the last CPU that the task executed on since
 	 * it is most likely cache-hot in that location.
 	 */
 	if (cpumask_test_cpu(cpu, lowest_mask))
@@ -1651,7 +1652,7 @@ static int find_lowest_rq(struct task_struct *task)
 
 	/*
 	 * Otherwise, we consult the sched_domains span maps to figure
-	 * out which cpu is logically closest to our hot cache data.
+	 * out which CPU is logically closest to our hot cache data.
 	 */
 	if (!cpumask_test_cpu(this_cpu, lowest_mask))
 		this_cpu = -1; /* Skip this_cpu opt if not among lowest */
@@ -1692,6 +1693,7 @@ static int find_lowest_rq(struct task_struct *task)
 	cpu = cpumask_any(lowest_mask);
 	if (cpu < nr_cpu_ids)
 		return cpu;
+
 	return -1;
 }
 
@@ -1827,7 +1829,7 @@ retry:
 			 * The task hasn't migrated, and is still the next
 			 * eligible task, but we failed to find a run-queue
 			 * to push it to.  Do not retry in this case, since
-			 * other cpus will pull from us when ready.
+			 * other CPUs will pull from us when ready.
 			 */
 			goto out;
 		}
@@ -1919,7 +1921,7 @@ static int rto_next_cpu(struct root_domain *rd)
 	 * rt_next_cpu() will simply return the first CPU found in
 	 * the rto_mask.
 	 *
-	 * If rto_next_cpu() is called with rto_cpu is a valid cpu, it
+	 * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
 	 * will return the next CPU found in the rto_mask.
 	 *
 	 * If there are no more CPUs left in the rto_mask, then a check is made
@@ -1980,7 +1982,7 @@ static void tell_cpu_to_push(struct rq *rq)
 	raw_spin_lock(&rq->rd->rto_lock);
 
 	/*
-	 * The rto_cpu is updated under the lock, if it has a valid cpu
+	 * The rto_cpu is updated under the lock, if it has a valid CPU
 	 * then the IPI is still running and will continue due to the
 	 * update to loop_next, and nothing needs to be done here.
 	 * Otherwise it is finishing up and an ipi needs to be sent.
@@ -2105,7 +2107,7 @@ static void pull_rt_task(struct rq *this_rq)
 
 			/*
 			 * There's a chance that p is higher in priority
-			 * than what's currently running on its cpu.
+			 * than what's currently running on its CPU.
 			 * This is just that p is wakeing up and hasn't
 			 * had a chance to schedule. We only pull
 			 * p if it is lower in priority than the
@@ -2693,6 +2695,7 @@ int sched_rr_handler(struct ctl_table *table, int write,
 			msecs_to_jiffies(sysctl_sched_rr_timeslice);
 	}
 	mutex_unlock(&mutex);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 325ea10c0809406ce23f038602abbc454f3f761d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 3 Mar 2018 12:20:47 +0100
Subject: sched/headers: Simplify and clean up header usage in the scheduler

Do the following cleanups and simplifications:

 - sched/sched.h already includes <asm/paravirt.h>, so no need to
   include it in sched/core.c again.

 - order the <linux/sched/*.h> headers alphabetically

 - add all <linux/sched/*.h> headers to kernel/sched/sched.h

 - remove all unnecessary includes from the .c files that
   are already included in kernel/sched/sched.h.

Finally, make all scheduler .c files use a single common header:

  #include "sched.h"

... which now contains a union of the relied upon headers.

This makes the various .c files easier to read and easier to handle.

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'kernel/sched/rt.c')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index e40498872111..a3d438fec46c 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -3,12 +3,8 @@
  * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
  * policies)
  */
-
 #include "sched.h"
 
-#include <linux/slab.h>
-#include <linux/irq_work.h>
-
 int sched_rr_timeslice = RR_TIMESLICE;
 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
 
-- 
cgit v1.2.3


From 02d8ec9456f47b8865f1ff3fbb532e12a760d3b5 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sat, 3 Mar 2018 16:27:54 +0100
Subject: sched/deadline, rt: Rename queue_push_tasks/queue_pull_task to create
 separate namespace

There are similarly named functions in both of these modules:

  kernel/sched/deadline.c:static inline void queue_push_tasks(struct rq *rq)
  kernel/sched/deadline.c:static inline void queue_pull_task(struct rq *rq)
  kernel/sched/deadline.c:static inline void queue_push_tasks(struct rq *rq)
  kernel/sched/deadline.c:static inline void queue_pull_task(struct rq *rq)
  kernel/sched/deadline.c:	queue_push_tasks(rq);
  kernel/sched/deadline.c:	queue_pull_task(rq);
  kernel/sched/deadline.c:			queue_push_tasks(rq);
  kernel/sched/deadline.c:			queue_pull_task(rq);
  kernel/sched/rt.c:static inline void queue_push_tasks(struct rq *rq)
  kernel/sched/rt.c:static inline void queue_pull_task(struct rq *rq)
  kernel/sched/rt.c:static inline void queue_push_tasks(struct rq *rq)
  kernel/sched/rt.c:	queue_push_tasks(rq);
  kernel/sched/rt.c:	queue_pull_task(rq);
  kernel/sched/rt.c:			queue_push_tasks(rq);
  kernel/sched/rt.c:			queue_pull_task(rq);

... which makes it harder to grep for them. Prefix them with
deadline_ and rt_, respectively.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'kernel/sched/rt.c')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index a3d438fec46c..4f4fd3b157f1 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -355,7 +355,7 @@ static DEFINE_PER_CPU(struct callback_head, rt_pull_head);
 static void push_rt_tasks(struct rq *);
 static void pull_rt_task(struct rq *);
 
-static inline void queue_push_tasks(struct rq *rq)
+static inline void rt_queue_push_tasks(struct rq *rq)
 {
 	if (!has_pushable_tasks(rq))
 		return;
@@ -363,7 +363,7 @@ static inline void queue_push_tasks(struct rq *rq)
 	queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks);
 }
 
-static inline void queue_pull_task(struct rq *rq)
+static inline void rt_queue_pull_task(struct rq *rq)
 {
 	queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task);
 }
@@ -421,7 +421,7 @@ static inline void pull_rt_task(struct rq *this_rq)
 {
 }
 
-static inline void queue_push_tasks(struct rq *rq)
+static inline void rt_queue_push_tasks(struct rq *rq)
 {
 }
 #endif /* CONFIG_SMP */
@@ -1565,7 +1565,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	/* The running task is never eligible for pushing */
 	dequeue_pushable_task(rq, p);
 
-	queue_push_tasks(rq);
+	rt_queue_push_tasks(rq);
 
 	return p;
 }
@@ -2185,7 +2185,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 	if (!task_on_rq_queued(p) || rq->rt.rt_nr_running)
 		return;
 
-	queue_pull_task(rq);
+	rt_queue_pull_task(rq);
 }
 
 void __init init_sched_rt_class(void)
@@ -2216,7 +2216,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 	if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
 		if (p->nr_cpus_allowed > 1 && rq->rt.overloaded)
-			queue_push_tasks(rq);
+			rt_queue_push_tasks(rq);
 #endif /* CONFIG_SMP */
 		if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq)))
 			resched_curr(rq);
@@ -2240,7 +2240,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
 		 * may need to pull tasks to this runqueue.
 		 */
 		if (oldprio < p->prio)
-			queue_pull_task(rq);
+			rt_queue_pull_task(rq);
 
 		/*
 		 * If there's a higher priority task waiting to run
-- 
cgit v1.2.3


From 8f111bc357aa811e0bb5fdfe34c4c9efdafc15b9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Dec 2017 16:26:12 +0100
Subject: cpufreq/schedutil: Rewrite CPUFREQ_RT support

Instead of trying to duplicate scheduler state to track if an RT task
is running, directly use the scheduler runqueue state for it.

This vastly simplifies things and fixes a number of bugs related to
sugov and the scheduler getting out of sync wrt this state.

As a consequence we not also update the remove cfs/dl state when
iterating the shared mask.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Viresh Kumar <viresh.kumar@linaro.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 kernel/sched/rt.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'kernel/sched/rt.c')

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 4f4fd3b157f1..86b77987435e 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,6 @@ static void update_curr_rt(struct rq *rq)
 	if (unlikely((s64)delta_exec <= 0))
 		return;
 
-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
-
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
 
@@ -1001,6 +998,9 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 
 	sub_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 0;
+
+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+	cpufreq_update_util(rq, 0);
 }
 
 static void
@@ -1017,6 +1017,9 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
 
 	add_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 1;
+
+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+	cpufreq_update_util(rq, 0);
 }
 
 #if defined CONFIG_SMP
-- 
cgit v1.2.3