From e087816db9423fdc49302d3cd7ec01e487477a71 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jan 2016 20:25:09 -0800
Subject: rcu: Add event tracing definitions for expedited grace periods

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 78 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 76 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index ef72c4aada56..aacc172eba7e 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -171,6 +171,76 @@ TRACE_EVENT(rcu_grace_period_init,
 		  __entry->grplo, __entry->grphi, __entry->qsmask)
 );
 
+/*
+ * Tracepoint for expedited grace-period events.  Takes a string identifying
+ * the RCU flavor, the expedited grace-period sequence number, and a string
+ * identifying the grace-period-related event as follows:
+ *
+ *	"snap": Captured snapshot of expedited grace period sequence number.
+ *	"start": Started a real expedited grace period.
+ *	"end": Ended a real expedited grace period.
+ *	"done": Someone else did the expedited grace period for us.
+ */
+TRACE_EVENT(rcu_exp_grace_period,
+
+	TP_PROTO(const char *rcuname, unsigned long gpseq, const char *gpevent),
+
+	TP_ARGS(rcuname, gpseq, gpevent),
+
+	TP_STRUCT__entry(
+		__field(const char *, rcuname)
+		__field(unsigned long, gpseq)
+		__field(const char *, gpevent)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->gpseq = gpseq;
+		__entry->gpevent = gpevent;
+	),
+
+	TP_printk("%s %lu %s",
+		  __entry->rcuname, __entry->gpseq, __entry->gpevent)
+);
+
+/*
+ * Tracepoint for expedited grace-period funnel-locking events.  Takes a
+ * string identifying the RCU flavor, an integer identifying the rcu_node
+ * combining-tree level, another pair of integers identifying the lowest-
+ * and highest-numbered CPU associated with the current rcu_node structure,
+ * and a string.  identifying the grace-period-related event as follows:
+ *
+ *	"acq": Acquired a level of funnel lock
+ *	"rel": Released a level of funnel lock
+ */
+TRACE_EVENT(rcu_exp_funnel_lock,
+
+	TP_PROTO(const char *rcuname, u8 level, int grplo, int grphi,
+		 const char *gpevent),
+
+	TP_ARGS(rcuname, level, grplo, grphi, gpevent),
+
+	TP_STRUCT__entry(
+		__field(const char *, rcuname)
+		__field(u8, level)
+		__field(int, grplo)
+		__field(int, grphi)
+		__field(const char *, gpevent)
+	),
+
+	TP_fast_assign(
+		__entry->rcuname = rcuname;
+		__entry->level = level;
+		__entry->grplo = grplo;
+		__entry->grphi = grphi;
+		__entry->gpevent = gpevent;
+	),
+
+	TP_printk("%s %d %d %d %s",
+		  __entry->rcuname, __entry->level, __entry->grplo,
+		  __entry->grphi, __entry->gpevent)
+);
+
 /*
  * Tracepoint for RCU no-CBs CPU callback handoffs.  This event is intended
  * to assist debugging of these handoffs.
@@ -704,11 +774,15 @@ TRACE_EVENT(rcu_barrier,
 #else /* #ifdef CONFIG_RCU_TRACE */
 
 #define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
-				    qsmask) do { } while (0)
 #define trace_rcu_future_grace_period(rcuname, gpnum, completed, c, \
 				      level, grplo, grphi, event) \
 				      do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+				    qsmask) do { } while (0)
+#define trace_rcu_exp_grace_period(rcuname, gqseq, gpevent) \
+	do { } while (0)
+#define trace_rcu_exp_funnel_lock(rcuname, level, grplo, grphi, gpevent) \
+	do { } while (0)
 #define trace_rcu_nocb_wake(rcuname, cpu, reason) do { } while (0)
 #define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
 #define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
-- 
cgit v1.2.3


From f6a12f34a448cc8a624070fd365c29c890138a48 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 30 Jan 2016 17:57:35 -0800
Subject: rcu: Enforce expedited-GP fairness via funnel wait queue

The current mutex-based funnel-locking approach used by expedited grace
periods is subject to severe unfairness.  The problem arises when a
few tasks, making a path from leaves to root, all wake up before other
tasks do.  A new task can then follow this path all the way to the root,
which needlessly delays tasks whose grace period is done, but who do
not happen to acquire the lock quickly enough.

This commit avoids this problem by maintaining per-rcu_node wait queues,
along with a per-rcu_node counter that tracks the latest grace period
sought by an earlier task to visit this node.  If that grace period
would satisfy the current task, instead of proceeding up the tree,
it waits on the current rcu_node structure using a pair of wait queues
provided for that purpose.  This decouples awakening of old tasks from
the arrival of new tasks.

If the wakeups prove to be a bottleneck, additional kthreads can be
brought to bear for that purpose.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/trace/events/rcu.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index aacc172eba7e..d3e756539d44 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -179,6 +179,7 @@ TRACE_EVENT(rcu_grace_period_init,
  *	"snap": Captured snapshot of expedited grace period sequence number.
  *	"start": Started a real expedited grace period.
  *	"end": Ended a real expedited grace period.
+ *	"endwake": Woke piggybackers up.
  *	"done": Someone else did the expedited grace period for us.
  */
 TRACE_EVENT(rcu_exp_grace_period,
@@ -210,8 +211,8 @@ TRACE_EVENT(rcu_exp_grace_period,
  * and highest-numbered CPU associated with the current rcu_node structure,
  * and a string.  identifying the grace-period-related event as follows:
  *
- *	"acq": Acquired a level of funnel lock
- *	"rel": Released a level of funnel lock
+ *	"nxtlvl": Advance to next level of rcu_node funnel
+ *	"wait": Wait for someone else to do expedited GP
  */
 TRACE_EVENT(rcu_exp_funnel_lock,
 
-- 
cgit v1.2.3


From 293e2421fe25839500207eda123cc4475f8d17b8 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Wed, 23 Mar 2016 23:11:48 +0800
Subject: rcu: Remove superfluous versions of rcu_read_lock_sched_held()

Currently, we have four versions of rcu_read_lock_sched_held(), depending
on the combined choices on PREEMPT_COUNT and DEBUG_LOCK_ALLOC.  However,
there is an existing function preemptible() that already distinguishes
between the PREEMPT_COUNT=y and PREEMPT_COUNT=n cases, and allows these
four implementations to be consolidated down to two.

This commit therefore uses preemptible() to achieve this consolidation.

Note that there could be a small performance regression in the case
of CONFIG_DEBUG_LOCK_ALLOC=y && PREEMPT_COUNT=n.  However, given the
overhead associated with CONFIG_DEBUG_LOCK_ALLOC=y, this should be
down in the noise.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 45de591657a6..5f1533e3d032 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -508,14 +508,7 @@ int rcu_read_lock_bh_held(void);
  * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
  * critical section unless it can prove otherwise.
  */
-#ifdef CONFIG_PREEMPT_COUNT
 int rcu_read_lock_sched_held(void);
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
-	return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
 
 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
@@ -532,18 +525,10 @@ static inline int rcu_read_lock_bh_held(void)
 	return 1;
 }
 
-#ifdef CONFIG_PREEMPT_COUNT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return preempt_count() != 0 || irqs_disabled();
+	return !preemptible();
 }
-#else /* #ifdef CONFIG_PREEMPT_COUNT */
-static inline int rcu_read_lock_sched_held(void)
-{
-	return 1;
-}
-#endif /* #else #ifdef CONFIG_PREEMPT_COUNT */
-
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
 #ifdef CONFIG_PROVE_RCU
-- 
cgit v1.2.3


From 291783b8ad77a83a6fdf91d55eee7f1ad72ed4d1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 12 Jan 2016 13:43:30 -0800
Subject: rcutorture: Expedited-GP batch progress access to torturing

This commit provides rcu_exp_batches_completed() and
rcu_exp_batches_completed_sched() functions to allow torture-test modules
to check how many expedited grace period batches have completed.
These are analogous to the existing rcu_batches_completed(),
rcu_batches_completed_bh(), and rcu_batches_completed_sched() functions.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 16 ++++++++++++++++
 include/linux/rcutree.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 64809aea661c..93aea75029fb 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -149,6 +149,22 @@ static inline unsigned long rcu_batches_completed_sched(void)
 	return 0;
 }
 
+/*
+ * Return the number of expedited grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed(void)
+{
+	return 0;
+}
+
+/*
+ * Return the number of expedited sched grace periods completed.
+ */
+static inline unsigned long rcu_exp_batches_completed_sched(void)
+{
+	return 0;
+}
+
 static inline void rcu_force_quiescent_state(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index ad1eda9fa4da..5043cb823fb2 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -87,6 +87,8 @@ unsigned long rcu_batches_started_sched(void);
 unsigned long rcu_batches_completed(void);
 unsigned long rcu_batches_completed_bh(void);
 unsigned long rcu_batches_completed_sched(void);
+unsigned long rcu_exp_batches_completed(void);
+unsigned long rcu_exp_batches_completed_sched(void);
 void show_rcu_gp_kthreads(void);
 
 void rcu_force_quiescent_state(void);
-- 
cgit v1.2.3