summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-19 17:12:34 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-20 10:10:18 -0700
commit5928a2b60cfdbad730f93696acab142d0b607280 (patch)
tree49bb21c9219673e61bad7a7c9202c7f25f5fe1be /include
parent5ed59af85077d28875a3a137b21933aaf1b4cd50 (diff)
parentbdd4431c8d071491a68a65d9457996f222b5ecd3 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU changes for v3.4 from Ingo Molnar. The major features of this series are: - making RCU more aggressive about entering dyntick-idle mode in order to improve energy efficiency - converting a few more call_rcu()s to kfree_rcu()s - applying a number of rcutree fixes and cleanups to rcutiny - removing CONFIG_SMP #ifdefs from treercu - allowing RCU CPU stall times to be set via sysfs - adding CPU-stall capability to rcutorture - adding more RCU-abuse diagnostics - updating documentation - fixing yet more issues located by the still-ongoing top-to-bottom inspection of RCU, this time with a special focus on the CPU-hotplug code path. * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (48 commits) rcu: Stop spurious warnings from synchronize_sched_expedited rcu: Hold off RCU_FAST_NO_HZ after timer posted rcu: Eliminate softirq-mediated RCU_FAST_NO_HZ idle-entry loop rcu: Add RCU_NONIDLE() for idle-loop RCU read-side critical sections rcu: Allow nesting of rcu_idle_enter() and rcu_idle_exit() rcu: Remove redundant check for rcu_head misalignment PTR_ERR should be called before its argument is cleared. rcu: Convert WARN_ON_ONCE() in rcu_lock_acquire() to lockdep rcu: Trace only after NULL-pointer check rcu: Call out dangers of expedited RCU primitives rcu: Rework detection of use of RCU by offline CPUs lockdep: Add CPU-idle/offline warning to lockdep-RCU splat rcu: No interrupt disabling for rcu_prepare_for_idle() rcu: Move synchronize_sched_expedited() to rcutree.c rcu: Check for illegal use of RCU from offlined CPUs rcu: Update stall-warning documentation rcu: Add CPU-stall capability to rcutorture rcu: Make documentation give more realistic rcutorture duration rcutorture: Permit holding off CPU-hotplug operations during boot rcu: Print scheduling-clock information on RCU CPU stall-warning messages ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/rcupdate.h83
-rw-r--r--include/linux/rcutiny.h10
-rw-r--r--include/linux/rcutree.h19
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/srcu.h15
-rw-r--r--include/trace/events/rcu.h63
6 files changed, 156 insertions, 37 deletions
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 81c04f4348ec..937217425c47 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -190,6 +190,33 @@ extern void rcu_idle_exit(void);
extern void rcu_irq_enter(void);
extern void rcu_irq_exit(void);
+/**
+ * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
+ * @a: Code that RCU needs to pay attention to.
+ *
+ * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
+ * in the inner idle loop, that is, between the rcu_idle_enter() and
+ * the rcu_idle_exit() -- RCU will happily ignore any such read-side
+ * critical sections. However, things like powertop need tracepoints
+ * in the inner idle loop.
+ *
+ * This macro provides the way out: RCU_NONIDLE(do_something_with_RCU())
+ * will tell RCU that it needs to pay attending, invoke its argument
+ * (in this example, a call to the do_something_with_RCU() function),
+ * and then tell RCU to go back to ignoring this CPU. It is permissible
+ * to nest RCU_NONIDLE() wrappers, but the nesting level is currently
+ * quite limited. If deeper nesting is required, it will be necessary
+ * to adjust DYNTICK_TASK_NESTING_VALUE accordingly.
+ *
+ * This macro may be used from process-level code only.
+ */
+#define RCU_NONIDLE(a) \
+ do { \
+ rcu_idle_exit(); \
+ do { a; } while (0); \
+ rcu_idle_enter(); \
+ } while (0)
+
/*
* Infrastructure to implement the synchronize_() primitives in
* TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -226,6 +253,15 @@ static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
+#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
+bool rcu_lockdep_current_cpu_online(void);
+#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+static inline bool rcu_lockdep_current_cpu_online(void)
+{
+ return 1;
+}
+#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#ifdef CONFIG_PROVE_RCU
@@ -239,13 +275,11 @@ static inline int rcu_is_cpu_idle(void)
static inline void rcu_lock_acquire(struct lockdep_map *map)
{
- WARN_ON_ONCE(rcu_is_cpu_idle());
lock_acquire(map, 0, 0, 2, 1, NULL, _THIS_IP_);
}
static inline void rcu_lock_release(struct lockdep_map *map)
{
- WARN_ON_ONCE(rcu_is_cpu_idle());
lock_release(map, 1, _THIS_IP_);
}
@@ -270,6 +304,9 @@ extern int debug_lockdep_rcu_enabled(void);
* occur in the same context, for example, it is illegal to invoke
* rcu_read_unlock() in process context if the matching rcu_read_lock()
* was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
*/
static inline int rcu_read_lock_held(void)
{
@@ -277,6 +314,8 @@ static inline int rcu_read_lock_held(void)
return 1;
if (rcu_is_cpu_idle())
return 0;
+ if (!rcu_lockdep_current_cpu_online())
+ return 0;
return lock_is_held(&rcu_lock_map);
}
@@ -313,6 +352,9 @@ extern int rcu_read_lock_bh_held(void);
* notice an extended quiescent state to other CPUs that started a grace
* period. Otherwise we would delay any grace period as long as we run in
* the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
*/
#ifdef CONFIG_PREEMPT_COUNT
static inline int rcu_read_lock_sched_held(void)
@@ -323,6 +365,8 @@ static inline int rcu_read_lock_sched_held(void)
return 1;
if (rcu_is_cpu_idle())
return 0;
+ if (!rcu_lockdep_current_cpu_online())
+ return 0;
if (debug_locks)
lockdep_opinion = lock_is_held(&rcu_sched_lock_map);
return lockdep_opinion || preempt_count() != 0 || irqs_disabled();
@@ -381,8 +425,22 @@ extern int rcu_my_thread_group_empty(void);
} \
} while (0)
+#if defined(CONFIG_PROVE_RCU) && !defined(CONFIG_PREEMPT_RCU)
+static inline void rcu_preempt_sleep_check(void)
+{
+ rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
+ "Illegal context switch in RCU read-side "
+ "critical section");
+}
+#else /* #ifdef CONFIG_PROVE_RCU */
+static inline void rcu_preempt_sleep_check(void)
+{
+}
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
+
#define rcu_sleep_check() \
do { \
+ rcu_preempt_sleep_check(); \
rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \
"Illegal context switch in RCU-bh" \
" read-side critical section"); \
@@ -470,6 +528,13 @@ extern int rcu_my_thread_group_empty(void);
* NULL. Although rcu_access_pointer() may also be used in cases where
* update-side locks prevent the value of the pointer from changing, you
* should instead use rcu_dereference_protected() for this use case.
+ *
+ * It is also permissible to use rcu_access_pointer() when read-side
+ * access to the pointer was removed at least one grace period ago, as
+ * is the case in the context of the RCU callback that is freeing up
+ * the data, or after a synchronize_rcu() returns. This can be useful
+ * when tearing down multi-linked structures after a grace period
+ * has elapsed.
*/
#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
@@ -659,6 +724,8 @@ static inline void rcu_read_lock(void)
__rcu_read_lock();
__acquire(RCU);
rcu_lock_acquire(&rcu_lock_map);
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_lock() used illegally while idle");
}
/*
@@ -678,6 +745,8 @@ static inline void rcu_read_lock(void)
*/
static inline void rcu_read_unlock(void)
{
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_unlock() used illegally while idle");
rcu_lock_release(&rcu_lock_map);
__release(RCU);
__rcu_read_unlock();
@@ -705,6 +774,8 @@ static inline void rcu_read_lock_bh(void)
local_bh_disable();
__acquire(RCU_BH);
rcu_lock_acquire(&rcu_bh_lock_map);
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_lock_bh() used illegally while idle");
}
/*
@@ -714,6 +785,8 @@ static inline void rcu_read_lock_bh(void)
*/
static inline void rcu_read_unlock_bh(void)
{
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_unlock_bh() used illegally while idle");
rcu_lock_release(&rcu_bh_lock_map);
__release(RCU_BH);
local_bh_enable();
@@ -737,6 +810,8 @@ static inline void rcu_read_lock_sched(void)
preempt_disable();
__acquire(RCU_SCHED);
rcu_lock_acquire(&rcu_sched_lock_map);
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_lock_sched() used illegally while idle");
}
/* Used by lockdep and tracing: cannot be traced, cannot call lockdep. */
@@ -753,6 +828,8 @@ static inline notrace void rcu_read_lock_sched_notrace(void)
*/
static inline void rcu_read_unlock_sched(void)
{
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "rcu_read_unlock_sched() used illegally while idle");
rcu_lock_release(&rcu_sched_lock_map);
__release(RCU_SCHED);
preempt_enable();
@@ -841,7 +918,7 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
/* See the kfree_rcu() header comment. */
BUILD_BUG_ON(!__is_kfree_rcu_offset(offset));
- call_rcu(head, (rcu_callback)offset);
+ kfree_call_rcu(head, (rcu_callback)offset);
}
/**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 00b7a5e493d2..e93df77176d1 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,13 +27,9 @@
#include <linux/cache.h>
-#ifdef CONFIG_RCU_BOOST
static inline void rcu_init(void)
{
}
-#else /* #ifdef CONFIG_RCU_BOOST */
-void rcu_init(void);
-#endif /* #else #ifdef CONFIG_RCU_BOOST */
static inline void rcu_barrier_bh(void)
{
@@ -83,6 +79,12 @@ static inline void synchronize_sched_expedited(void)
synchronize_sched();
}
+static inline void kfree_call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu))
+{
+ call_rcu(head, func);
+}
+
#ifdef CONFIG_TINY_RCU
static inline void rcu_preempt_note_context_switch(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 67458468f1a8..e8ee5dd0854c 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -61,6 +61,24 @@ extern void synchronize_rcu_bh(void);
extern void synchronize_sched_expedited(void);
extern void synchronize_rcu_expedited(void);
+void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
+
+/**
+ * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
+ *
+ * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly. This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code. In fact,
+ * if you are using synchronize_rcu_bh_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_rcu_bh() instead.
+ *
+ * Note that it is illegal to call this function while holding any lock
+ * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
+ * to call this function from a CPU-hotplug notifier. Failing to observe
+ * these restriction will result in deadlock.
+ */
static inline void synchronize_rcu_bh_expedited(void)
{
synchronize_sched_expedited();
@@ -83,6 +101,7 @@ extern void rcu_sched_force_quiescent_state(void);
/* A context switch is a grace period for RCU-sched and RCU-bh. */
static inline int rcu_blocking_is_gp(void)
{
+ might_sleep(); /* Check for RCU read-side critical section. */
return num_online_cpus() == 1;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0657368bd78f..f58889b8a608 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1863,8 +1863,7 @@ extern void task_clear_jobctl_pending(struct task_struct *task,
#ifdef CONFIG_PREEMPT_RCU
#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_BOOSTED (1 << 1) /* boosted while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 2) /* RCU core needs CPU response. */
+#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
static inline void rcu_copy_process(struct task_struct *p)
{
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index e1b005918bbb..d3d5fa54f25e 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -99,15 +99,18 @@ long srcu_batches_completed(struct srcu_struct *sp);
* power mode. This way we can notice an extended quiescent state to
* other CPUs that started a grace period. Otherwise we would delay any
* grace period as long as we run in the idle task.
+ *
+ * Similarly, we avoid claiming an SRCU read lock held if the current
+ * CPU is offline.
*/
static inline int srcu_read_lock_held(struct srcu_struct *sp)
{
- if (rcu_is_cpu_idle())
- return 0;
-
if (!debug_lockdep_rcu_enabled())
return 1;
-
+ if (rcu_is_cpu_idle())
+ return 0;
+ if (!rcu_lockdep_current_cpu_online())
+ return 0;
return lock_is_held(&sp->dep_map);
}
@@ -169,6 +172,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
int retval = __srcu_read_lock(sp);
rcu_lock_acquire(&(sp)->dep_map);
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "srcu_read_lock() used illegally while idle");
return retval;
}
@@ -182,6 +187,8 @@ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
__releases(sp)
{
+ rcu_lockdep_assert(!rcu_is_cpu_idle(),
+ "srcu_read_unlock() used illegally while idle");
rcu_lock_release(&(sp)->dep_map);
__srcu_read_unlock(sp, idx);
}
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d2d88bed891b..337099783f37 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -313,19 +313,22 @@ TRACE_EVENT(rcu_prep_idle,
/*
* Tracepoint for the registration of a single RCU callback function.
* The first argument is the type of RCU, the second argument is
- * a pointer to the RCU callback itself, and the third element is the
- * new RCU callback queue length for the current CPU.
+ * a pointer to the RCU callback itself, the third element is the
+ * number of lazy callbacks queued, and the fourth element is the
+ * total number of callbacks queued.
*/
TRACE_EVENT(rcu_callback,
- TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
+ TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen_lazy,
+ long qlen),
- TP_ARGS(rcuname, rhp, qlen),
+ TP_ARGS(rcuname, rhp, qlen_lazy, qlen),
TP_STRUCT__entry(
__field(char *, rcuname)
__field(void *, rhp)
__field(void *, func)
+ __field(long, qlen_lazy)
__field(long, qlen)
),
@@ -333,11 +336,13 @@ TRACE_EVENT(rcu_callback,
__entry->rcuname = rcuname;
__entry->rhp = rhp;
__entry->func = rhp->func;
+ __entry->qlen_lazy = qlen_lazy;
__entry->qlen = qlen;
),
- TP_printk("%s rhp=%p func=%pf %ld",
- __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
+ TP_printk("%s rhp=%p func=%pf %ld/%ld",
+ __entry->rcuname, __entry->rhp, __entry->func,
+ __entry->qlen_lazy, __entry->qlen)
);
/*
@@ -345,20 +350,21 @@ TRACE_EVENT(rcu_callback,
* kfree() form. The first argument is the RCU type, the second argument
* is a pointer to the RCU callback, the third argument is the offset
* of the callback within the enclosing RCU-protected data structure,
- * and the fourth argument is the new RCU callback queue length for the
- * current CPU.
+ * the fourth argument is the number of lazy callbacks queued, and the
+ * fifth argument is the total number of callbacks queued.
*/
TRACE_EVENT(rcu_kfree_callback,
TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
- long qlen),
+ long qlen_lazy, long qlen),
- TP_ARGS(rcuname, rhp, offset, qlen),
+ TP_ARGS(rcuname, rhp, offset, qlen_lazy, qlen),
TP_STRUCT__entry(
__field(char *, rcuname)
__field(void *, rhp)
__field(unsigned long, offset)
+ __field(long, qlen_lazy)
__field(long, qlen)
),
@@ -366,41 +372,45 @@ TRACE_EVENT(rcu_kfree_callback,
__entry->rcuname = rcuname;
__entry->rhp = rhp;
__entry->offset = offset;
+ __entry->qlen_lazy = qlen_lazy;
__entry->qlen = qlen;
),
- TP_printk("%s rhp=%p func=%ld %ld",
+ TP_printk("%s rhp=%p func=%ld %ld/%ld",
__entry->rcuname, __entry->rhp, __entry->offset,
- __entry->qlen)
+ __entry->qlen_lazy, __entry->qlen)
);
/*
* Tracepoint for marking the beginning rcu_do_batch, performed to start
* RCU callback invocation. The first argument is the RCU flavor,
- * the second is the total number of callbacks (including those that
- * are not yet ready to be invoked), and the third argument is the
- * current RCU-callback batch limit.
+ * the second is the number of lazy callbacks queued, the third is
+ * the total number of callbacks queued, and the fourth argument is
+ * the current RCU-callback batch limit.
*/
TRACE_EVENT(rcu_batch_start,
- TP_PROTO(char *rcuname, long qlen, int blimit),
+ TP_PROTO(char *rcuname, long qlen_lazy, long qlen, int blimit),
- TP_ARGS(rcuname, qlen, blimit),
+ TP_ARGS(rcuname, qlen_lazy, qlen, blimit),
TP_STRUCT__entry(
__field(char *, rcuname)
+ __field(long, qlen_lazy)
__field(long, qlen)
__field(int, blimit)
),
TP_fast_assign(
__entry->rcuname = rcuname;
+ __entry->qlen_lazy = qlen_lazy;
__entry->qlen = qlen;
__entry->blimit = blimit;
),
- TP_printk("%s CBs=%ld bl=%d",
- __entry->rcuname, __entry->qlen, __entry->blimit)
+ TP_printk("%s CBs=%ld/%ld bl=%d",
+ __entry->rcuname, __entry->qlen_lazy, __entry->qlen,
+ __entry->blimit)
);
/*
@@ -531,16 +541,21 @@ TRACE_EVENT(rcu_torture_read,
#else /* #ifdef CONFIG_RCU_TRACE */
#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
-#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
+#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, \
+ qsmask) do { } while (0)
#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
-#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
+#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, \
+ grplo, grphi, gp_tasks) do { } \
+ while (0)
#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
#define trace_rcu_dyntick(polarity, oldnesting, newnesting) do { } while (0)
#define trace_rcu_prep_idle(reason) do { } while (0)
-#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
-#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
-#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
+#define trace_rcu_callback(rcuname, rhp, qlen_lazy, qlen) do { } while (0)
+#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen_lazy, qlen) \
+ do { } while (0)
+#define trace_rcu_batch_start(rcuname, qlen_lazy, qlen, blimit) \
+ do { } while (0)
#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
#define trace_rcu_batch_end(rcuname, callbacks_invoked, cb, nr, iit, risk) \