sched/rt: Up the root domain ref count when passing it around via IPIs

commit 364f56653708ba8bcdefd4f0da2a42904baa8eeb upstream. When issuing an IPI RT push, where an IPI is sent to each CPU that has more than one RT task scheduled on it, it references the root domain's rto_mask, that contains all the CPUs within the root domain that has more than one RT task in the runable state. The problem is, after the IPIs are initiated, the rq->lock is released. This means that the root domain that is associated to the run queue could be freed while the IPIs are going around. Add a sched_get_rd() and a sched_put_rd() that will increment and decrement the root domain's ref count respectively. This way when initiating the IPIs, the scheduler will up the root domain's ref count before releasing the rq->lock, ensuring that the root domain does not go away until the IPI round is complete. Reported-by: Pavan Kondeti <pkondeti@codeaurora.org> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 4bdced5c9a292 ("sched/rt: Simplify the IPI based RT balancing logic") Link: http://lkml.kernel.org/r/CAEU1=PkiHO35Dzna8EQqNSKW1fr1y1zRQ5y66X117MG06sQtNA@mail.gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Steven Rostedt (VMware) <rostedt@goodmis.org> 2018-01-23 20:45:38 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2018-02-17 13:21:13 +0100
commit: a384e5437f705972d2884cea17b931c1a2cd3277 (patch)
tree: 22bf4974341bf155bf5e37b6caa451f36af97002 /kernel/sched
parent: 1c679981309b4d36b024fc954cfcf2111a007de0 (diff)
3 files changed, 22 insertions, 2 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e5066955cc3a..bce3a7ad4253 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5864,6 +5864,19 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
 		call_rcu_sched(&old_rd->rcu, free_rootdomain);
 }
 
+void sched_get_rd(struct root_domain *rd)
+{
+	atomic_inc(&rd->refcount);
+}
+
+void sched_put_rd(struct root_domain *rd)
+{
+	if (!atomic_dec_and_test(&rd->refcount))
+		return;
+
+	call_rcu_sched(&rd->rcu, free_rootdomain);
+}
+
 static int init_rootdomain(struct root_domain *rd)
 {
 	memset(rd, 0, sizeof(*rd));
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 27b6209a2928..f6d68ddfa2f3 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1978,8 +1978,11 @@ static void tell_cpu_to_push(struct rq *rq)
 
 	rto_start_unlock(&rq->rd->rto_loop_start);
 
-	if (cpu >= 0)
+	if (cpu >= 0) {
+		/* Make sure the rd does not get freed while pushing */
+		sched_get_rd(rq->rd);
 		irq_work_queue_on(&rq->rd->rto_push_work, cpu);
+	}
 }
 
 /* Called from hardirq context */
@@ -2009,8 +2012,10 @@ void rto_push_irq_work_func(struct irq_work *work)
 
 	raw_spin_unlock(&rd->rto_lock);
 
-	if (cpu < 0)
+	if (cpu < 0) {
+		sched_put_rd(rd);
 		return;
+	}
 
 	/* Try the next RT overloaded CPU */
 	irq_work_queue_on(&rd->rto_push_work, cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index cff985feb6e7..f564a1d2c9d5 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -590,6 +590,8 @@ struct root_domain {
 };
 
 extern struct root_domain def_root_domain;
+extern void sched_get_rd(struct root_domain *rd);
+extern void sched_put_rd(struct root_domain *rd);
 
 #ifdef HAVE_RT_PUSH_IPI
 extern void rto_push_irq_work_func(struct irq_work *work);
author	Steven Rostedt (VMware) <rostedt@goodmis.org>	2018-01-23 20:45:38 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2018-02-17 13:21:13 +0100
commit	a384e5437f705972d2884cea17b931c1a2cd3277 (patch)
tree	22bf4974341bf155bf5e37b6caa451f36af97002 /kernel/sched
parent	1c679981309b4d36b024fc954cfcf2111a007de0 (diff)