From 8203d6d0ee784cfb2ebf89053f7fe399abc867d7 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 2 Aug 2015 13:53:17 -0700
Subject: rcu: Use single-stage IPI algorithm for RCU expedited grace period

The current preemptible-RCU expedited grace-period algorithm invokes
synchronize_sched_expedited() to enqueue all tasks currently running
in a preemptible-RCU read-side critical section, then waits for all the
->blkd_tasks lists to drain.  This works, but results in both an IPI and
a double context switch even on CPUs that do not happen to be running
in a preemptible RCU read-side critical section.

This commit implements a new algorithm that causes less OS jitter.
This new algorithm IPIs all online CPUs that are not idle (from an
RCU perspective), but refrains from self-IPIs.  If a CPU receiving
this IPI is not in a preemptible RCU read-side critical section (or
is just now exiting one), it pushes quiescence up the rcu_node tree,
otherwise, it sets a flag that will be handled by the upcoming outermost
rcu_read_unlock(), which will then push quiescence up the tree.

The expedited grace period must of course wait on any pre-existing blocked
readers, and newly blocked readers must be queued carefully based on
the state of both the normal and the expedited grace periods.  This
new queueing approach also avoids the need to update boost state,
courtesy of the fact that blocked tasks are no longer ever migrated to
the root rcu_node structure.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index a4ab9daa387c..7fa8c4d372e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1330,10 +1330,12 @@ struct sched_dl_entity {
 
 union rcu_special {
 	struct {
-		bool blocked;
-		bool need_qs;
-	} b;
-	short s;
+		u8 blocked;
+		u8 need_qs;
+		u8 exp_need_qs;
+		u8 pad;	/* Otherwise the compiler can store garbage here. */
+	} b; /* Bits. */
+	u32 s; /* Set of bits. */
 };
 struct rcu_node;
 
-- 
cgit v1.2.3


From b6a4ae766e3133a4db73fabc81e522d1601cb623 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Wed, 29 Jul 2015 13:29:38 +0800
Subject: rcu: Use rcu_callback_t in call_rcu*() and friends

As we now have rcu_callback_t typedefs as the type of rcu callbacks, we
should use it in call_rcu*() and friends as the type of parameters. This
could save us a few lines of code and make it clear which function
requires an rcu callbacks rather than other callbacks as its argument.

Besides, this can also help cscope to generate a better database for
code reading.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 10 +++++-----
 include/linux/rcutiny.h  |  2 +-
 include/linux/rcutree.h  |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 581abf848566..d63bb77dab35 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -160,7 +160,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
  * more than one CPU).
  */
 void call_rcu(struct rcu_head *head,
-	      void (*func)(struct rcu_head *head));
+	      rcu_callback_t func);
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
@@ -191,7 +191,7 @@ void call_rcu(struct rcu_head *head,
  * memory ordering guarantees.
  */
 void call_rcu_bh(struct rcu_head *head,
-		 void (*func)(struct rcu_head *head));
+		 rcu_callback_t func);
 
 /**
  * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
@@ -213,7 +213,7 @@ void call_rcu_bh(struct rcu_head *head,
  * memory ordering guarantees.
  */
 void call_rcu_sched(struct rcu_head *head,
-		    void (*func)(struct rcu_head *rcu));
+		    rcu_callback_t func);
 
 void synchronize_sched(void);
 
@@ -274,7 +274,7 @@ do {									\
  * See the description of call_rcu() for more detailed information on
  * memory ordering guarantees.
  */
-void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void rcu_barrier_tasks(void);
 
@@ -1065,7 +1065,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #define __kfree_rcu(head, offset) \
 	do { \
 		BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \
-		kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \
+		kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \
 	} while (0)
 
 /**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ff968b7af3a4..c8a0722f77ea 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -83,7 +83,7 @@ static inline void synchronize_sched_expedited(void)
 }
 
 static inline void kfree_call_rcu(struct rcu_head *head,
-				  void (*func)(struct rcu_head *rcu))
+				  rcu_callback_t func)
 {
 	call_rcu(head, func);
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5abec82f325e..60d15a080d7c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -48,7 +48,7 @@ void synchronize_rcu_bh(void);
 void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);
 
-void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 /**
  * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
-- 
cgit v1.2.3


From bb73c52bad3666997ed2ec83c0c80c3f8ef55008 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Thu, 30 Jul 2015 16:55:38 -0700
Subject: rcu: Don't disable preemption for Tiny and Tree RCU readers

Because preempt_disable() maps to barrier() for non-debug builds,
it forces the compiler to spill and reload registers.  Because Tree
RCU and Tiny RCU now only appear in CONFIG_PREEMPT=n builds, these
barrier() instances generate needless extra code for each instance of
rcu_read_lock() and rcu_read_unlock().  This extra code slows down Tree
RCU and bloats Tiny RCU.

This commit therefore removes the preempt_disable() and preempt_enable()
from the non-preemptible implementations of __rcu_read_lock() and
__rcu_read_unlock(), respectively.  However, for debug purposes,
preempt_disable() and preempt_enable() are still invoked if
CONFIG_PREEMPT_COUNT=y, because this allows detection of sleeping inside
atomic sections in non-preemptible kernels.

However, Tiny and Tree RCU operates by coalescing all RCU read-side
critical sections on a given CPU that lie between successive quiescent
states.  It is therefore necessary to compensate for removing barriers
from __rcu_read_lock() and __rcu_read_unlock() by adding them to a
couple of the RCU functions invoked during quiescent states, namely to
rcu_all_qs() and rcu_note_context_switch().  However, note that the latter
is more paranoia than necessity, at least until link-time optimizations
become more aggressive.

This is based on an earlier patch by Paul E. McKenney, fixing
a bug encountered in kernels built with CONFIG_PREEMPT=n and
CONFIG_PREEMPT_COUNT=y.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 6 ++++--
 include/linux/rcutiny.h  | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d63bb77dab35..6c3ceceb6148 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -297,12 +297,14 @@ void synchronize_rcu(void);
 
 static inline void __rcu_read_lock(void)
 {
-	preempt_disable();
+	if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+		preempt_disable();
 }
 
 static inline void __rcu_read_unlock(void)
 {
-	preempt_enable();
+	if (IS_ENABLED(CONFIG_PREEMPT_COUNT))
+		preempt_enable();
 }
 
 static inline void synchronize_rcu(void)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index c8a0722f77ea..4c1aaf9cce7b 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -216,6 +216,7 @@ static inline bool rcu_is_watching(void)
 
 static inline void rcu_all_qs(void)
 {
+	barrier(); /* Avoid RCU read-side critical sections leaking across. */
 }
 
 #endif /* __LINUX_RCUTINY_H */
-- 
cgit v1.2.3


From 49f5903b473c5f63f3b57856d1bd4593db0a2eef Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Sep 2015 00:42:57 -0700
Subject: rcu: Move preemption disabling out of __srcu_read_lock()

Currently, __srcu_read_lock() cannot be invoked from restricted
environments because it contains calls to preempt_disable() and
preempt_enable(), both of which can invoke lockdep, which is a bad
idea in some restricted execution modes.  This commit therefore moves
the preempt_disable() and preempt_enable() from __srcu_read_lock()
to srcu_read_lock().  It also inserts the preempt_disable() and
preempt_enable() around the call to __srcu_read_lock() in do_exit().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/srcu.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index bdeb4567b71e..f5f80c5643ac 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -215,8 +215,11 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
  */
 static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
 {
-	int retval = __srcu_read_lock(sp);
+	int retval;
 
+	preempt_disable();
+	retval = __srcu_read_lock(sp);
+	preempt_enable();
 	rcu_lock_acquire(&(sp)->dep_map);
 	return retval;
 }
-- 
cgit v1.2.3


From c3ac7cf1847a4e68c909984f60d36adef2088e35 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 10 Sep 2015 16:29:02 -0700
Subject: rcu: Add rcu_pointer_handoff()

This commit adds an rcu_pointer_handoff() that is intended to mark
situations where a structure's protection transitions from RCU to some
other mechanism (locking, reference counting, whatever).  These markings
should allow external tools to more easily spot bugs involving leaking
pointers out of RCU read-side critical sections.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6c3ceceb6148..587eb057e2fa 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -812,6 +812,28 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
 
+/**
+ * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism
+ * @p: The pointer to hand off
+ *
+ * This is simply an identity function, but it documents where a pointer
+ * is handed off from RCU to some other synchronization mechanism, for
+ * example, reference counting or locking.  In C11, it would map to
+ * kill_dependency().  It could be used as follows:
+ *
+ *	rcu_read_lock();
+ *	p = rcu_dereference(gp);
+ *	long_lived = is_long_lived(p);
+ *	if (long_lived) {
+ *		if (!atomic_inc_not_zero(p->refcnt))
+ *			long_lived = false;
+ *		else
+ *			p = rcu_pointer_handoff(p);
+ *	}
+ *	rcu_read_unlock();
+ */
+#define rcu_pointer_handoff(p) (p)
+
 /**
  * rcu_read_lock() - mark the beginning of an RCU read-side critical section
  *
-- 
cgit v1.2.3


From 8db70b132dd57696cfc7560203a72e90c51bfdda Mon Sep 17 00:00:00 2001
From: Patrick Marlier <patrick.marlier@gmail.com>
Date: Fri, 11 Sep 2015 15:50:35 -0700
Subject: rculist: Make list_entry_rcu() use lockless_dereference()

The current list_entry_rcu() implementation copies the pointer to a stack
variable, then invokes rcu_dereference_raw() on it.  This results in an
additional store-load pair.  Now, most compilers will emit normal store
and load instructions, which might seem to be of negligible overhead,
but this results in a load-hit-store situation that can cause surprisingly
long pipeline stalls, even on modern microprocessors.  The problem is
that it takes time for the store to get the store buffer updated, which
can delay the subsequent load, which immediately follows.

This commit therefore switches to the lockless_dereference() primitive,
which does not expect the __rcu annotations (that are anyway not present
in the list_head structure) and which, like rcu_dereference_raw(),
does not check for an enclosing RCU read-side critical section.
Most importantly, it does not copy the pointer, thus avoiding the
load-hit-store overhead.

Signed-off-by: Patrick Marlier <patrick.marlier@gmail.com>
[ paulmck: Switched to lockless_dereference() to suppress sparse warnings. ]
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rculist.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 17c6b1f84a77..5ed540986019 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -247,10 +247,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
  */
 #define list_entry_rcu(ptr, type, member) \
-({ \
-	typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
-	container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
-})
+	container_of(lockless_dereference(ptr), type, member)
 
 /**
  * Where are list_empty_rcu() and list_first_entry_rcu()?
-- 
cgit v1.2.3


From e62e3f620ba8d437f4998441fc11cf3dc9d466d1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 14 Sep 2015 12:23:01 -0700
Subject: rcu: Remove deprecated rcu_lockdep_assert()

The old rcu_lockdep_assert() was retained to ease handling of incoming
patches, but any use will result in deprecated warnings.  However, its
replacement, RCU_LOCKDEP_WARN(), is now upstream.  It is therefore
time to remove rcu_lockdep_assert(), which this commit does.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h | 21 ---------------------
 1 file changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 587eb057e2fa..a0189ba67fde 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -537,28 +537,8 @@ static inline int rcu_read_lock_sched_held(void)
 
 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-/* Deprecate rcu_lockdep_assert():  Use RCU_LOCKDEP_WARN() instead. */
-static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void)
-{
-}
-
 #ifdef CONFIG_PROVE_RCU
 
-/**
- * rcu_lockdep_assert - emit lockdep splat if specified condition not met
- * @c: condition to check
- * @s: informative message
- */
-#define rcu_lockdep_assert(c, s)					\
-	do {								\
-		static bool __section(.data.unlikely) __warned;		\
-		deprecate_rcu_lockdep_assert();				\
-		if (debug_lockdep_rcu_enabled() && !__warned && !(c)) {	\
-			__warned = true;				\
-			lockdep_rcu_suspicious(__FILE__, __LINE__, s);	\
-		}							\
-	} while (0)
-
 /**
  * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met
  * @c: condition to check
@@ -596,7 +576,6 @@ static inline void rcu_preempt_sleep_check(void)
 
 #else /* #ifdef CONFIG_PROVE_RCU */
 
-#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert()
 #define RCU_LOCKDEP_WARN(c, s) do { } while (0)
 #define rcu_sleep_check() do { } while (0)
 
-- 
cgit v1.2.3


From 7f5f873c6a0772970d5fee1f364231207051ecd8 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 18 Sep 2015 08:45:22 -0700
Subject: rculist: Use WRITE_ONCE() when deleting from reader-visible list

The various RCU list-deletion macros (list_del_rcu(),
hlist_del_init_rcu(), hlist_del_rcu(), hlist_bl_del_init_rcu(),
hlist_bl_del_rcu(), hlist_nulls_del_init_rcu(), and hlist_nulls_del_rcu())
do plain stores into the ->next pointer of the preceding list elemment.
Unfortunately, the compiler is within its rights to (for example) use
byte-at-a-time writes to update the pointer, which would fatally confuse
concurrent readers.  This patch therefore adds the needed WRITE_ONCE()
macros.

KernelThreadSanitizer (KTSAN) reported the __hlist_del() issue, which
is a problem when __hlist_del() is invoked by hlist_del_rcu().

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/list.h       | 5 +++--
 include/linux/list_bl.h    | 5 +++--
 include/linux/list_nulls.h | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index 3e3e64a61002..993395a2e55c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -87,7 +87,7 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
 static inline void __list_del(struct list_head * prev, struct list_head * next)
 {
 	next->prev = prev;
-	prev->next = next;
+	WRITE_ONCE(prev->next, next);
 }
 
 /**
@@ -615,7 +615,8 @@ static inline void __hlist_del(struct hlist_node *n)
 {
 	struct hlist_node *next = n->next;
 	struct hlist_node **pprev = n->pprev;
-	*pprev = next;
+
+	WRITE_ONCE(*pprev, next);
 	if (next)
 		next->pprev = pprev;
 }
diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h
index 2eb88556c5c5..8132214e8efd 100644
--- a/include/linux/list_bl.h
+++ b/include/linux/list_bl.h
@@ -93,9 +93,10 @@ static inline void __hlist_bl_del(struct hlist_bl_node *n)
 	LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK);
 
 	/* pprev may be `first`, so be careful not to lose the lock bit */
-	*pprev = (struct hlist_bl_node *)
+	WRITE_ONCE(*pprev,
+		   (struct hlist_bl_node *)
 			((unsigned long)next |
-			 ((unsigned long)*pprev & LIST_BL_LOCKMASK));
+			 ((unsigned long)*pprev & LIST_BL_LOCKMASK)));
 	if (next)
 		next->pprev = pprev;
 }
diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index f266661d2666..444d2b1313bd 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -76,7 +76,8 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
 {
 	struct hlist_nulls_node *next = n->next;
 	struct hlist_nulls_node **pprev = n->pprev;
-	*pprev = next;
+
+	WRITE_ONCE(*pprev, next);
 	if (!is_a_nulls(next))
 		next->pprev = pprev;
 }
-- 
cgit v1.2.3


From cc44ca848f5e517aeca9f5eabbe13609a3f71450 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:44 +0200
Subject: rcu: Create rcu_sync infrastructure

The rcu_sync infrastructure can be thought of as infrastructure to be
used to implement reader-writer primitives having extremely lightweight
readers during times when there are no writers.  The first use is in
the percpu_rwsem used by the VFS subsystem.

This infrastructure is functionally equivalent to

        struct rcu_sync_struct {
                atomic_t counter;
        };

	/* Check possibility of fast-path read-side operations. */
        static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
        {
                return atomic_read(&rss->counter) == 0;
        }

	/* Tell readers to use slowpaths. */
        static inline void rcu_sync_enter(struct rcu_sync_struct *rss)
        {
                atomic_inc(&rss->counter);
                synchronize_sched();
        }

	/* Allow readers to once again use fastpaths. */
        static inline void rcu_sync_exit(struct rcu_sync_struct *rss)
        {
                synchronize_sched();
                atomic_dec(&rss->counter);
        }

The main difference is that it records the state and only calls
synchronize_sched() if required.  At least some of the calls to
synchronize_sched() will be optimized away when rcu_sync_enter() and
rcu_sync_exit() are invoked repeatedly in quick succession.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 include/linux/rcu_sync.h

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
new file mode 100644
index 000000000000..cb044df2e21c
--- /dev/null
+++ b/include/linux/rcu_sync.h
@@ -0,0 +1,94 @@
+/*
+ * RCU-based infrastructure for lightweight reader-writer locking
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ * Copyright (c) 2015, Red Hat, Inc.
+ *
+ * Author: Oleg Nesterov <oleg@redhat.com>
+ */
+
+#ifndef _LINUX_RCU_SYNC_H_
+#define _LINUX_RCU_SYNC_H_
+
+#include <linux/wait.h>
+#include <linux/rcupdate.h>
+
+/* Structure to mediate between updaters and fastpath-using readers.  */
+struct rcu_sync {
+	int			gp_state;
+	int			gp_count;
+	wait_queue_head_t	gp_wait;
+
+	int			cb_state;
+	struct rcu_head		cb_head;
+
+	void (*sync)(void);
+	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+};
+
+#define ___RCU_SYNC_INIT(name)						\
+	.gp_state = 0,							\
+	.gp_count = 0,							\
+	.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),		\
+	.cb_state = 0
+
+#define __RCU_SCHED_SYNC_INIT(name) {					\
+	___RCU_SYNC_INIT(name),						\
+	.sync = synchronize_sched,					\
+	.call = call_rcu_sched,						\
+}
+
+#define __RCU_BH_SYNC_INIT(name) {					\
+	___RCU_SYNC_INIT(name),						\
+	.sync = synchronize_rcu_bh,					\
+	.call = call_rcu_bh,						\
+}
+
+#define __RCU_SYNC_INIT(name) {						\
+	___RCU_SYNC_INIT(name),						\
+	.sync = synchronize_rcu,					\
+	.call = call_rcu,						\
+}
+
+#define DEFINE_RCU_SCHED_SYNC(name)					\
+	struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name)
+
+#define DEFINE_RCU_BH_SYNC(name)					\
+	struct rcu_sync name = __RCU_BH_SYNC_INIT(name)
+
+#define DEFINE_RCU_SYNC(name)						\
+	struct rcu_sync name = __RCU_SYNC_INIT(name)
+
+/**
+ * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
+ * @rsp: Pointer to rcu_sync structure to use for synchronization
+ *
+ * Returns true if readers are permitted to use their fastpaths.
+ * Must be invoked within an RCU read-side critical section whose
+ * flavor matches that of the rcu_sync struture.
+ */
+static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
+{
+	return !rsp->gp_state; /* GP_IDLE */
+}
+
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
+extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
+extern void rcu_sync_enter(struct rcu_sync *);
+extern void rcu_sync_exit(struct rcu_sync *);
+
+#endif /* _LINUX_RCU_SYNC_H_ */
-- 
cgit v1.2.3


From 82e8c565be8a72957570d7da8dd9b441db7bb648 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:47 +0200
Subject: rcu_sync: Simplify rcu_sync using new rcu_sync_ops structure

This commit adds the new struct rcu_sync_ops which holds sync/call
methods, and turns the function pointers in rcu_sync_struct into an array
of struct rcu_sync_ops.  This simplifies the "init" helpers by collapsing
a switch statement and explicit multiple definitions into a simple
assignment and a helper macro, respectively.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 60 +++++++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index cb044df2e21c..c6d2272c4459 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -26,6 +26,8 @@
 #include <linux/wait.h>
 #include <linux/rcupdate.h>
 
+enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
+
 /* Structure to mediate between updaters and fastpath-using readers.  */
 struct rcu_sync {
 	int			gp_state;
@@ -35,43 +37,9 @@ struct rcu_sync {
 	int			cb_state;
 	struct rcu_head		cb_head;
 
-	void (*sync)(void);
-	void (*call)(struct rcu_head *, void (*)(struct rcu_head *));
+	enum rcu_sync_type	gp_type;
 };
 
-#define ___RCU_SYNC_INIT(name)						\
-	.gp_state = 0,							\
-	.gp_count = 0,							\
-	.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),		\
-	.cb_state = 0
-
-#define __RCU_SCHED_SYNC_INIT(name) {					\
-	___RCU_SYNC_INIT(name),						\
-	.sync = synchronize_sched,					\
-	.call = call_rcu_sched,						\
-}
-
-#define __RCU_BH_SYNC_INIT(name) {					\
-	___RCU_SYNC_INIT(name),						\
-	.sync = synchronize_rcu_bh,					\
-	.call = call_rcu_bh,						\
-}
-
-#define __RCU_SYNC_INIT(name) {						\
-	___RCU_SYNC_INIT(name),						\
-	.sync = synchronize_rcu,					\
-	.call = call_rcu,						\
-}
-
-#define DEFINE_RCU_SCHED_SYNC(name)					\
-	struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name)
-
-#define DEFINE_RCU_BH_SYNC(name)					\
-	struct rcu_sync name = __RCU_BH_SYNC_INIT(name)
-
-#define DEFINE_RCU_SYNC(name)						\
-	struct rcu_sync name = __RCU_SYNC_INIT(name)
-
 /**
  * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
  * @rsp: Pointer to rcu_sync structure to use for synchronization
@@ -85,10 +53,28 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 	return !rsp->gp_state; /* GP_IDLE */
 }
 
-enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC };
-
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
 extern void rcu_sync_enter(struct rcu_sync *);
 extern void rcu_sync_exit(struct rcu_sync *);
 
+#define __RCU_SYNC_INITIALIZER(name, type) {				\
+		.gp_state = 0,						\
+		.gp_count = 0,						\
+		.gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait),	\
+		.cb_state = 0,						\
+		.gp_type = type,					\
+	}
+
+#define	__DEFINE_RCU_SYNC(name, type)	\
+	struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type)
+
+#define DEFINE_RCU_SYNC(name)		\
+	__DEFINE_RCU_SYNC(name, RCU_SYNC)
+
+#define DEFINE_RCU_SCHED_SYNC(name)	\
+	__DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC)
+
+#define DEFINE_RCU_BH_SYNC(name)	\
+	__DEFINE_RCU_SYNC(name, RCU_BH_SYNC)
+
 #endif /* _LINUX_RCU_SYNC_H_ */
-- 
cgit v1.2.3


From 3a518b76af7bb411efe6dd090fbf098e29accb2e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:50 +0200
Subject: rcu_sync: Add CONFIG_PROVE_RCU checks

This commit validates that the caller of rcu_sync_is_idle() holds the
corresponding type of RCU read-side lock, but only in kernels built
with CONFIG_PROVE_RCU=y.  This validation is carried out via a new
rcu_sync_ops->held() method that is checked within rcu_sync_is_idle().

Note that although this does add code to the fast path, it only does so
in kernels built with CONFIG_PROVE_RCU=y.

Suggested-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index c6d2272c4459..1f2d4fc30b04 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -40,6 +40,8 @@ struct rcu_sync {
 	enum rcu_sync_type	gp_type;
 };
 
+extern bool __rcu_sync_is_idle(struct rcu_sync *);
+
 /**
  * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
  * @rsp: Pointer to rcu_sync structure to use for synchronization
@@ -50,7 +52,11 @@ struct rcu_sync {
  */
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
+#ifdef CONFIG_PROVE_RCU
+	return __rcu_sync_is_idle(rsp);
+#else
 	return !rsp->gp_state; /* GP_IDLE */
+#endif
 }
 
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
-- 
cgit v1.2.3


From 07899a6e5f56136028c44a57ad0451e797365ac3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:52 +0200
Subject: rcu_sync: Introduce rcu_sync_dtor()

This commit allows rcu_sync structures to be safely deallocated,
The trick is to add a new ->wait field to the gp_ops array.
This field is a pointer to the rcu_barrier() function corresponding
to the flavor of RCU in question.  This allows a new rcu_sync_dtor()
to wait for any outstanding callbacks before freeing the rcu_sync
structure.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 1f2d4fc30b04..8069d6468bc4 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -62,6 +62,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
 extern void rcu_sync_enter(struct rcu_sync *);
 extern void rcu_sync_exit(struct rcu_sync *);
+extern void rcu_sync_dtor(struct rcu_sync *);
 
 #define __RCU_SYNC_INITIALIZER(name, type) {				\
 		.gp_state = 0,						\
-- 
cgit v1.2.3


From 001dac627ff37433d5528ffb0d897cd19c2b1e43 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 21 Aug 2015 19:42:57 +0200
Subject: locking/percpu-rwsem: Make use of the rcu_sync infrastructure

Currently down_write/up_write calls synchronize_sched_expedited()
twice, which is evil.  Change this code to rely on rcu-sync primitives.
This avoids the _expedited "big hammer", and this can be faster in
the contended case or even in the case when a single thread does
down_write/up_write in a loop.

Of course, a single down_write() will take more time, but otoh it
will be much more friendly to the whole system.

To simplify the review this patch doesn't update the comments, fixed
by the next change.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/percpu-rwsem.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 834c4e52cb2d..c2fa3ecb0dce 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -5,11 +5,12 @@
 #include <linux/rwsem.h>
 #include <linux/percpu.h>
 #include <linux/wait.h>
+#include <linux/rcu_sync.h>
 #include <linux/lockdep.h>
 
 struct percpu_rw_semaphore {
+	struct rcu_sync		rss;
 	unsigned int __percpu	*fast_read_ctr;
-	atomic_t		write_ctr;
 	struct rw_semaphore	rw_sem;
 	atomic_t		slow_read_ctr;
 	wait_queue_head_t	write_waitq;
-- 
cgit v1.2.3


From 4bace7344d6dbd7a1b0b801abf24ea9878064317 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 Sep 2015 17:59:18 +0200
Subject: rcu_sync: Cleanup the CONFIG_PROVE_RCU checks

1. Rename __rcu_sync_is_idle() to rcu_sync_lockdep_assert() and
   change it to use rcu_lockdep_assert().

2. Change rcu_sync_is_idle() to return rsp->gp_state == GP_IDLE
   unconditonally, this way we can remove the same check from
   rcu_sync_lockdep_assert() and clearly isolate the debugging
   code.

Note: rcu_sync_enter()->wait_event(gp_state == GP_PASSED) needs
another CONFIG_PROVE_RCU check, the same as is done in ->sync(); but
this needs some simple preparations in the core RCU code to avoid the
code duplication.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcu_sync.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h
index 8069d6468bc4..a63a33e6196e 100644
--- a/include/linux/rcu_sync.h
+++ b/include/linux/rcu_sync.h
@@ -40,7 +40,7 @@ struct rcu_sync {
 	enum rcu_sync_type	gp_type;
 };
 
-extern bool __rcu_sync_is_idle(struct rcu_sync *);
+extern void rcu_sync_lockdep_assert(struct rcu_sync *);
 
 /**
  * rcu_sync_is_idle() - Are readers permitted to use their fastpaths?
@@ -53,10 +53,9 @@ extern bool __rcu_sync_is_idle(struct rcu_sync *);
 static inline bool rcu_sync_is_idle(struct rcu_sync *rsp)
 {
 #ifdef CONFIG_PROVE_RCU
-	return __rcu_sync_is_idle(rsp);
-#else
-	return !rsp->gp_state; /* GP_IDLE */
+	rcu_sync_lockdep_assert(rsp);
 #endif
+	return !rsp->gp_state; /* GP_IDLE */
 }
 
 extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type);
-- 
cgit v1.2.3


From 02ef3c4a2aae65a1632b27770bfea3f83ca06772 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 7 Aug 2015 15:14:30 -0700
Subject: cpu: Remove try_get_online_cpus()

Now that synchronize_sched_expedited() no longer uses it, there are
no users of try_get_online_cpus() in mainline.  This commit therefore
removes it.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/cpu.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 23c30bdcca86..d2ca8c38f9c4 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,7 +228,6 @@ extern struct bus_type cpu_subsys;
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
-extern bool try_get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
@@ -246,7 +245,6 @@ int cpu_down(unsigned int cpu);
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
-#define try_get_online_cpus()	true
 #define put_online_cpus()	do { } while (0)
 #define cpu_hotplug_disable()	do { } while (0)
 #define cpu_hotplug_enable()	do { } while (0)
-- 
cgit v1.2.3