From b3fbab0571eb09746cc0283648165ec00efc8eb2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 24 May 2011 08:31:09 -0700 Subject: rcu: Restore checks for blocking in RCU read-side critical sections Long ago, using TREE_RCU with PREEMPT would result in "scheduling while atomic" diagnostics if you blocked in an RCU read-side critical section. However, PREEMPT now implies TREE_PREEMPT_RCU, which defeats this diagnostic. This commit therefore adds a replacement diagnostic based on PROVE_RCU. Because rcu_lockdep_assert() and lockdep_rcu_dereference() are now being used for things that have nothing to do with rcu_dereference(), rename lockdep_rcu_dereference() to lockdep_rcu_suspicious() and add a third argument that is a string indicating what is suspicious. This third argument is passed in from a new third argument to rcu_lockdep_assert(). Update all calls to rcu_lockdep_assert() to add an informative third argument. Also, add a pair of rcu_lockdep_assert() calls from within rcu_note_context_switch(), one complaining if a context switch occurs in an RCU-bh read-side critical section and another complaining if a context switch occurs in an RCU-sched read-side critical section. These are present only if the PROVE_RCU kernel parameter is enabled. Finally, fix some checkpatch whitespace complaints in lockdep.c. Again, you must enable PROVE_RCU to see these new diagnostics. But you are enabling PROVE_RCU to check out new RCU uses in any case, aren't you? Signed-off-by: Paul E. McKenney --- include/linux/lockdep.h | 2 +- include/linux/rcupdate.h | 28 ++++++++++++++++++++++------ 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index ef820a3c378b..b6a56e37284c 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -548,7 +548,7 @@ do { \ #endif #ifdef CONFIG_PROVE_RCU -extern void lockdep_rcu_dereference(const char *file, const int line); +void lockdep_rcu_suspicious(const char *file, const int line, const char *s); #endif #endif /* __LINUX_LOCKDEP_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8f4f881a0ad8..8e7470d8b676 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -297,19 +297,31 @@ extern int rcu_my_thread_group_empty(void); /** * rcu_lockdep_assert - emit lockdep splat if specified condition not met * @c: condition to check + * @s: informative message */ -#define rcu_lockdep_assert(c) \ +#define rcu_lockdep_assert(c, s) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ __warned = true; \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ + lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ } \ } while (0) +#define rcu_sleep_check() \ + do { \ + rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \ + "Illegal context switch in RCU-bh" \ + " read-side critical section"); \ + rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \ + "Illegal context switch in RCU-sched"\ + " read-side critical section"); \ + } while (0) + #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c) do { } while (0) +#define rcu_lockdep_assert(c, s) do { } while (0) +#define rcu_sleep_check() do { } while (0) #endif /* #else #ifdef CONFIG_PROVE_RCU */ @@ -338,14 +350,16 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_check(p, c, space) \ ({ \ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ smp_read_barrier_depends(); \ ((typeof(*p) __force __kernel *)(_________p1)); \ }) #define __rcu_dereference_protected(p, c, space) \ ({ \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \ + " usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(p)); \ }) @@ -359,7 +373,9 @@ extern int rcu_my_thread_group_empty(void); #define __rcu_dereference_index_check(p, c) \ ({ \ typeof(p) _________p1 = ACCESS_ONCE(p); \ - rcu_lockdep_assert(c); \ + rcu_lockdep_assert(c, \ + "suspicious rcu_dereference_index_check()" \ + " usage"); \ smp_read_barrier_depends(); \ (_________p1); \ }) -- cgit v1.2.3 From 990987511c200877bb20201772d5de46644151f2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 31 May 2011 21:03:55 -0700 Subject: rcu: Move rcu_head definition to types.h Take a first step towards untangling Linux kernel header files by placing the struct rcu_head definition into include/linux/types.h and including include/linux/types.h in include/linux/rcupdate.h where struct rcu_head used to be defined. The actual inclusion point for include/linux/types.h is with the rest of the #include directives rather than at the point where struct rcu_head used to be defined, as suggested by Mathieu Desnoyers. Once this is in place, then header files that need only rcu_head can include types.h rather than rcupdate.h. Signed-off-by: Paul E. McKenney Cc: Paul Gortmaker Acked-by: Mathieu Desnoyers --- include/linux/rcupdate.h | 11 +---------- include/linux/types.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 8e7470d8b676..87bd390df73f 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -33,6 +33,7 @@ #ifndef __LINUX_RCUPDATE_H #define __LINUX_RCUPDATE_H +#include #include #include #include @@ -64,16 +65,6 @@ static inline void rcutorture_record_progress(unsigned long vernum) #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) -/** - * struct rcu_head - callback structure for use with RCU - * @next: next update requests in a list - * @func: actual update function to call after the grace period. - */ -struct rcu_head { - struct rcu_head *next; - void (*func)(struct rcu_head *head); -}; - /* Exported common interfaces */ extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); diff --git a/include/linux/types.h b/include/linux/types.h index 176da8c1fbb1..57a97234bec1 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -238,6 +238,16 @@ struct ustat { char f_fpack[6]; }; +/** + * struct rcu_head - callback structure for use with RCU + * @next: next update requests in a list + * @func: actual update function to call after the grace period. + */ +struct rcu_head { + struct rcu_head *next; + void (*func)(struct rcu_head *head); +}; + #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ #endif /* _LINUX_TYPES_H */ -- cgit v1.2.3 From 2c42818e962e2858334bf45bfc56662b3752df34 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 26 May 2011 22:14:36 -0700 Subject: rcu: Abstract common code for RCU grace-period-wait primitives Pull the code that waits for an RCU grace period into a single function, which is then called by synchronize_rcu() and friends in the case of TREE_RCU and TREE_PREEMPT_RCU, and from rcu_barrier() and friends in the case of TINY_RCU and TINY_PREEMPT_RCU. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 130 ++++++++++++++++++++++++++--------------------- include/linux/rcutiny.h | 16 +++++- include/linux/rcutree.h | 2 + 3 files changed, 90 insertions(+), 58 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 87bd390df73f..ae5327de41aa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -66,11 +66,73 @@ static inline void rcutorture_record_progress(unsigned long vernum) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) /* Exported common interfaces */ + +#ifdef CONFIG_PREEMPT_RCU + +/** + * call_rcu() - Queue an RCU callback for invocation after a grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +extern void call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + +/** + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_bh() assumes + * that the read-side critical sections end on completion of a softirq + * handler. This means that read-side critical sections in process + * context must not be interrupted by softirqs. This interface is to be + * used when most of the read-side critical sections are in softirq context. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. + * OR + * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. + * These may be nested. + */ +extern void call_rcu_bh(struct rcu_head *head, + void (*func)(struct rcu_head *head)); + +/** + * call_rcu_sched() - Queue an RCU for invocation after sched grace period. + * @head: structure to be used for queueing the RCU updates. + * @func: actual callback function to be invoked after the grace period + * + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all currently executing RCU + * read-side critical sections have completed. call_rcu_sched() assumes + * that the read-side critical sections end on enabling of preemption + * or on voluntary preemption. + * RCU read-side critical sections are delimited by : + * - rcu_read_lock_sched() and rcu_read_unlock_sched(), + * OR + * anything that disables preemption. + * These may be nested. + */ extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); + extern void synchronize_sched(void); -extern void rcu_barrier_bh(void); -extern void rcu_barrier_sched(void); static inline void __rcu_read_lock_bh(void) { @@ -143,6 +205,15 @@ static inline void rcu_exit_nohz(void) #endif /* #else #ifdef CONFIG_NO_HZ */ +/* + * Infrastructure to implement the synchronize_() primitives in + * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. + */ + +typedef void call_rcu_func_t(struct rcu_head *head, + void (*func)(struct rcu_head *head)); +void wait_rcu_gp(call_rcu_func_t crf); + #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include #elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) @@ -723,61 +794,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -/* Infrastructure to implement the synchronize_() primitives. */ - -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -extern void wakeme_after_rcu(struct rcu_head *head); - -#ifdef CONFIG_PREEMPT_RCU - -/** - * call_rcu() - Queue an RCU callback for invocation after a grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all pre-existing RCU read-side - * critical sections have completed. However, the callback function - * might well execute concurrently with RCU read-side critical sections - * that started after call_rcu() was invoked. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. - */ -extern void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - -#else /* #ifdef CONFIG_PREEMPT_RCU */ - -/* In classic RCU, call_rcu() is just call_rcu_sched(). */ -#define call_rcu call_rcu_sched - -#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ - -/** - * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. - * @head: structure to be used for queueing the RCU updates. - * @func: actual callback function to be invoked after the grace period - * - * The callback function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. call_rcu_bh() assumes - * that the read-side critical sections end on completion of a softirq - * handler. This means that read-side critical sections in process - * context must not be interrupted by softirqs. This interface is to be - * used when most of the read-side critical sections are in softirq context. - * RCU read-side critical sections are delimited by : - * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context. - * OR - * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. - * These may be nested. - */ -extern void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head)); - /* * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally * by call_rcu() and rcu callback execution, and are therefore not part of the diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 52b3e0281fd0..4eab233a00cd 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -31,6 +31,16 @@ static inline void rcu_init(void) { } +static inline void rcu_barrier_bh(void) +{ + wait_rcu_gp(call_rcu_bh); +} + +static inline void rcu_barrier_sched(void) +{ + wait_rcu_gp(call_rcu_sched); +} + #ifdef CONFIG_TINY_RCU static inline void synchronize_rcu_expedited(void) @@ -45,9 +55,13 @@ static inline void rcu_barrier(void) #else /* #ifdef CONFIG_TINY_RCU */ -void rcu_barrier(void); void synchronize_rcu_expedited(void); +static inline void rcu_barrier(void) +{ + wait_rcu_gp(call_rcu); +} + #endif /* #else #ifdef CONFIG_TINY_RCU */ static inline void synchronize_rcu_bh(void) diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index e65d06634dd8..67458468f1a8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void) } extern void rcu_barrier(void); +extern void rcu_barrier_bh(void); +extern void rcu_barrier_sched(void); extern unsigned long rcutorture_testseq; extern unsigned long rcutorture_vernum; -- cgit v1.2.3 From 29c00b4a1d9e277786120032aa8364631820d863 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 17 Jun 2011 15:53:19 -0700 Subject: rcu: Add event-tracing for RCU callback invocation There was recently some controversy about the overhead of invoking RCU callbacks. Add TRACE_EVENT()s to obtain fine-grained timings for the start and stop of a batch of callbacks and also for each callback invoked. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 50 ----------------------- include/trace/events/rcu.h | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 50 deletions(-) create mode 100644 include/trace/events/rcu.h (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ae5327de41aa..dd2bc2c6a285 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -794,44 +794,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -/* - * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally - * by call_rcu() and rcu callback execution, and are therefore not part of the - * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. - */ - -#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD -# define STATE_RCU_HEAD_READY 0 -# define STATE_RCU_HEAD_QUEUED 1 - -extern struct debug_obj_descr rcuhead_debug_descr; - -static inline void debug_rcu_head_queue(struct rcu_head *head) -{ - WARN_ON_ONCE((unsigned long)head & 0x3); - debug_object_activate(head, &rcuhead_debug_descr); - debug_object_active_state(head, &rcuhead_debug_descr, - STATE_RCU_HEAD_READY, - STATE_RCU_HEAD_QUEUED); -} - -static inline void debug_rcu_head_unqueue(struct rcu_head *head) -{ - debug_object_active_state(head, &rcuhead_debug_descr, - STATE_RCU_HEAD_QUEUED, - STATE_RCU_HEAD_READY); - debug_object_deactivate(head, &rcuhead_debug_descr); -} -#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -static inline void debug_rcu_head_queue(struct rcu_head *head) -{ -} - -static inline void debug_rcu_head_unqueue(struct rcu_head *head) -{ -} -#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ - static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) { return offset < 4096; @@ -850,18 +812,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset) call_rcu(head, (rcu_callback)offset); } -extern void kfree(const void *); - -static inline void __rcu_reclaim(struct rcu_head *head) -{ - unsigned long offset = (unsigned long)head->func; - - if (__is_kfree_rcu_offset(offset)) - kfree((void *)head - offset); - else - head->func(head); -} - /** * kfree_rcu() - kfree an object after a grace period. * @ptr: pointer to kfree diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h new file mode 100644 index 000000000000..db3f6e9e63e6 --- /dev/null +++ b/include/trace/events/rcu.h @@ -0,0 +1,98 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rcu + +#if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RCU_H + +#include + +/* + * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + */ +TRACE_EVENT(rcu_batch_start, + + TP_PROTO(long callbacks_ready, int blimit), + + TP_ARGS(callbacks_ready, blimit), + + TP_STRUCT__entry( + __field( long, callbacks_ready ) + __field( int, blimit ) + ), + + TP_fast_assign( + __entry->callbacks_ready = callbacks_ready; + __entry->blimit = blimit; + ), + + TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) +); + +/* + * Tracepoint for the invocation of a single RCU callback + */ +TRACE_EVENT(rcu_invoke_callback, + + TP_PROTO(struct rcu_head *rhp), + + TP_ARGS(rhp), + + TP_STRUCT__entry( + __field( void *, rhp ) + __field( void *, func ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->func = rhp->func; + ), + + TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) +); + +/* + * Tracepoint for the invocation of a single RCU kfree callback + */ +TRACE_EVENT(rcu_invoke_kfree_callback, + + TP_PROTO(struct rcu_head *rhp, unsigned long offset), + + TP_ARGS(rhp, offset), + + TP_STRUCT__entry( + __field(void *, rhp ) + __field(unsigned long, offset ) + ), + + TP_fast_assign( + __entry->rhp = rhp; + __entry->offset = offset; + ), + + TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) +); + +/* + * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + */ +TRACE_EVENT(rcu_batch_end, + + TP_PROTO(int callbacks_invoked), + + TP_ARGS(callbacks_invoked), + + TP_STRUCT__entry( + __field( int, callbacks_invoked ) + ), + + TP_fast_assign( + __entry->callbacks_invoked = callbacks_invoked; + ), + + TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) +); + +#endif /* _TRACE_RCU_H */ + +/* This part must be outside protection */ +#include -- cgit v1.2.3 From 300df91ca9358f7f09298eec9503c12b32054ef7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Jun 2011 22:26:31 -0700 Subject: rcu: Event-trace markers for computing RCU CPU utilization This commit adds the trace_rcu_utilization() marker that is to be used to allow postprocessing scripts compute RCU's CPU utilization, give or take event-trace overhead. Note that we do not include RCU's dyntick-idle interface because event tracing requires RCU protection, which is not available in dyntick-idle mode. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 73 +++++++++++++++++++++++++++++++++------------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index db3f6e9e63e6..ab458eb689fb 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -7,29 +7,58 @@ #include /* - * Tracepoint for calling rcu_do_batch, performed to start callback invocation: + * Tracepoint for start/end markers used for utilization calculations. + * By convention, the string is of the following forms: + * + * "Start " -- Mark the start of the specified activity, + * such as "context switch". Nesting is permitted. + * "End " -- Mark the end of the specified activity. + */ +TRACE_EVENT(rcu_utilization, + + TP_PROTO(char *s), + + TP_ARGS(s), + + TP_STRUCT__entry( + __field(char *, s) + ), + + TP_fast_assign( + __entry->s = s; + ), + + TP_printk("%s", __entry->s) +); + +/* + * Tracepoint for marking the beginning rcu_do_batch, performed to start + * RCU callback invocation. The first argument is the total number of + * callbacks (including those that are not yet ready to be invoked), + * and the second argument is the current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long callbacks_ready, int blimit), + TP_PROTO(long qlen, int blimit), - TP_ARGS(callbacks_ready, blimit), + TP_ARGS(qlen, blimit), TP_STRUCT__entry( - __field( long, callbacks_ready ) - __field( int, blimit ) + __field(long, qlen) + __field(int, blimit) ), TP_fast_assign( - __entry->callbacks_ready = callbacks_ready; - __entry->blimit = blimit; + __entry->qlen = qlen; + __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->callbacks_ready, __entry->blimit) + TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) ); /* - * Tracepoint for the invocation of a single RCU callback + * Tracepoint for the invocation of a single RCU callback function. + * The argument is a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, @@ -38,20 +67,23 @@ TRACE_EVENT(rcu_invoke_callback, TP_ARGS(rhp), TP_STRUCT__entry( - __field( void *, rhp ) - __field( void *, func ) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( - __entry->rhp = rhp; - __entry->func = rhp->func; + __entry->rhp = rhp; + __entry->func = rhp->func; ), TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) ); /* - * Tracepoint for the invocation of a single RCU kfree callback + * Tracepoint for the invocation of a single RCU callback of the special + * kfree() form. The first argument is a pointer to the RCU callback + * and the second argument is the offset of the callback within the + * enclosing RCU-protected data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, @@ -60,12 +92,12 @@ TRACE_EVENT(rcu_invoke_kfree_callback, TP_ARGS(rhp, offset), TP_STRUCT__entry( - __field(void *, rhp ) - __field(unsigned long, offset ) + __field(void *, rhp) + __field(unsigned long, offset) ), TP_fast_assign( - __entry->rhp = rhp; + __entry->rhp = rhp; __entry->offset = offset; ), @@ -73,7 +105,8 @@ TRACE_EVENT(rcu_invoke_kfree_callback, ); /* - * Tracepoint for leaving rcu_do_batch, performed after callback invocation: + * Tracepoint for exiting rcu_do_batch after RCU callbacks have been + * invoked. The first argument is the number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, @@ -82,11 +115,11 @@ TRACE_EVENT(rcu_batch_end, TP_ARGS(callbacks_invoked), TP_STRUCT__entry( - __field( int, callbacks_invoked ) + __field(int, callbacks_invoked) ), TP_fast_assign( - __entry->callbacks_invoked = callbacks_invoked; + __entry->callbacks_invoked = callbacks_invoked; ), TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) -- cgit v1.2.3 From 72fe701b70e6ced35d734b676c13efbc8fc769a9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 01:14:54 -0700 Subject: rcu: Add RCU type to callback-invocation tracing Add a string to the rcu_batch_start() and rcu_batch_end() trace messages that indicates the RCU type ("rcu_sched", "rcu_bh", or "rcu_preempt"). The trace messages for the actual invocations themselves are not marked, as it should be clear from the rcu_batch_start() and rcu_batch_end() events before and after. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ab458eb689fb..508824e5a77d 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -33,27 +33,31 @@ TRACE_EVENT(rcu_utilization, /* * Tracepoint for marking the beginning rcu_do_batch, performed to start - * RCU callback invocation. The first argument is the total number of - * callbacks (including those that are not yet ready to be invoked), - * and the second argument is the current RCU-callback batch limit. + * RCU callback invocation. The first argument is the RCU flavor, + * the second is the total number of callbacks (including those that + * are not yet ready to be invoked), and the third argument is the + * current RCU-callback batch limit. */ TRACE_EVENT(rcu_batch_start, - TP_PROTO(long qlen, int blimit), + TP_PROTO(char *rcuname, long qlen, int blimit), - TP_ARGS(qlen, blimit), + TP_ARGS(rcuname, qlen, blimit), TP_STRUCT__entry( + __field(char *, rcuname) __field(long, qlen) __field(int, blimit) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->qlen = qlen; __entry->blimit = blimit; ), - TP_printk("CBs=%ld bl=%d", __entry->qlen, __entry->blimit) + TP_printk("%s CBs=%ld bl=%d", + __entry->rcuname, __entry->qlen, __entry->blimit) ); /* @@ -106,23 +110,27 @@ TRACE_EVENT(rcu_invoke_kfree_callback, /* * Tracepoint for exiting rcu_do_batch after RCU callbacks have been - * invoked. The first argument is the number of callbacks actually invoked. + * invoked. The first argument is the name of the RCU flavor and + * the second argument is number of callbacks actually invoked. */ TRACE_EVENT(rcu_batch_end, - TP_PROTO(int callbacks_invoked), + TP_PROTO(char *rcuname, int callbacks_invoked), - TP_ARGS(callbacks_invoked), + TP_ARGS(rcuname, callbacks_invoked), TP_STRUCT__entry( + __field(char *, rcuname) __field(int, callbacks_invoked) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->callbacks_invoked = callbacks_invoked; ), - TP_printk("CBs-invoked=%d", __entry->callbacks_invoked) + TP_printk("%s CBs-invoked=%d", + __entry->rcuname, __entry->callbacks_invoked) ); #endif /* _TRACE_RCU_H */ -- cgit v1.2.3 From 385680a9487d2f85382ad6d74e2a15837e47bfd9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 21 Jun 2011 22:43:26 -0700 Subject: rcu: Add event-trace markers to TREE_RCU kthreads Add event-trace markers to TREE_RCU kthreads to allow including these kthread's CPU time in the utilization calculations. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index 508824e5a77d..ac52aba00a3e 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -13,6 +13,9 @@ * "Start " -- Mark the start of the specified activity, * such as "context switch". Nesting is permitted. * "End " -- Mark the end of the specified activity. + * + * An "@" character within "" is a comment character: Data + * reduction scripts will ignore the "@" and the remainder of the line. */ TRACE_EVENT(rcu_utilization, -- cgit v1.2.3 From 965a002b4f1a458c5dcb334ec29f48a0046faa25 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 18 Jun 2011 09:55:39 -0700 Subject: rcu: Make TINY_RCU also use softirq for RCU_BOOST=n This patch #ifdefs TINY_RCU kthreads out of the kernel unless RCU_BOOST=y, thus eliminating context-switch overhead if RCU priority boosting has not been configured. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 4eab233a00cd..00b7a5e493d2 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,9 +27,13 @@ #include +#ifdef CONFIG_RCU_BOOST static inline void rcu_init(void) { } +#else /* #ifdef CONFIG_RCU_BOOST */ +void rcu_init(void); +#endif /* #else #ifdef CONFIG_RCU_BOOST */ static inline void rcu_barrier_bh(void) { -- cgit v1.2.3 From d4c08f2ac311a360230eef7e5395b0ec8d8f0670 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Jun 2011 06:36:56 -0700 Subject: rcu: Add grace-period, quiescent-state, and call_rcu trace events Add trace events to record grace-period start and end, quiescent states, CPUs noticing grace-period start and end, grace-period initialization, call_rcu() invocation, tasks blocking in RCU read-side critical sections, tasks exiting those same critical sections, force_quiescent_state() detection of dyntick-idle and offline CPUs, CPUs entering and leaving dyntick-idle mode (except from NMIs), CPUs coming online and going offline, and CPUs being kicked for staying in dyntick-idle mode for too long (as in many weeks, even on 32-bit systems). Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney rcu: Add the rcu flavor to callback trace events The earlier trace events for registering RCU callbacks and for invoking them did not include the RCU flavor (rcu_bh, rcu_preempt, or rcu_sched). This commit adds the RCU flavor to those trace events. Signed-off-by: Paul E. McKenney --- include/trace/events/rcu.h | 345 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 331 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h index ac52aba00a3e..669fbd62ec25 100644 --- a/include/trace/events/rcu.h +++ b/include/trace/events/rcu.h @@ -24,7 +24,7 @@ TRACE_EVENT(rcu_utilization, TP_ARGS(s), TP_STRUCT__entry( - __field(char *, s) + __field(char *, s) ), TP_fast_assign( @@ -34,6 +34,297 @@ TRACE_EVENT(rcu_utilization, TP_printk("%s", __entry->s) ); +#ifdef CONFIG_RCU_TRACE + +#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) + +/* + * Tracepoint for grace-period events: starting and ending a grace + * period ("start" and "end", respectively), a CPU noting the start + * of a new grace period or the end of an old grace period ("cpustart" + * and "cpuend", respectively), a CPU passing through a quiescent + * state ("cpuqs"), a CPU coming online or going offline ("cpuonl" + * and "cpuofl", respectively), and a CPU being kicked for being too + * long in dyntick-idle mode ("kick"). + */ +TRACE_EVENT(rcu_grace_period, + + TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent), + + TP_ARGS(rcuname, gpnum, gpevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(char *, gpevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->gpevent = gpevent; + ), + + TP_printk("%s %lu %s", + __entry->rcuname, __entry->gpnum, __entry->gpevent) +); + +/* + * Tracepoint for grace-period-initialization events. These are + * distinguished by the type of RCU, the new grace-period number, the + * rcu_node structure level, the starting and ending CPU covered by the + * rcu_node structure, and the mask of CPUs that will be waited for. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_grace_period_init, + + TP_PROTO(char *rcuname, unsigned long gpnum, u8 level, + int grplo, int grphi, unsigned long qsmask), + + TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(unsigned long, qsmask) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->qsmask = qsmask; + ), + + TP_printk("%s %lu %u %d %d %lx", + __entry->rcuname, __entry->gpnum, __entry->level, + __entry->grplo, __entry->grphi, __entry->qsmask) +); + +/* + * Tracepoint for tasks blocking within preemptible-RCU read-side + * critical sections. Track the type of RCU (which one day might + * include SRCU), the grace-period number that the task is blocking + * (the current or the next), and the task's PID. + */ +TRACE_EVENT(rcu_preempt_task, + + TP_PROTO(char *rcuname, int pid, unsigned long gpnum), + + TP_ARGS(rcuname, pid, gpnum), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", + __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for tasks that blocked within a given preemptible-RCU + * read-side critical section exiting that critical section. Track the + * type of RCU (which one day might include SRCU) and the task's PID. + */ +TRACE_EVENT(rcu_unlock_preempted_task, + + TP_PROTO(char *rcuname, unsigned long gpnum, int pid), + + TP_ARGS(rcuname, gpnum, pid), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, pid) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->pid = pid; + ), + + TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid) +); + +/* + * Tracepoint for quiescent-state-reporting events. These are + * distinguished by the type of RCU, the grace-period number, the + * mask of quiescent lower-level entities, the rcu_node structure level, + * the starting and ending CPU covered by the rcu_node structure, and + * whether there are any blocked tasks blocking the current grace period. + * All but the type of RCU are extracted from the rcu_node structure. + */ +TRACE_EVENT(rcu_quiescent_state_report, + + TP_PROTO(char *rcuname, unsigned long gpnum, + unsigned long mask, unsigned long qsmask, + u8 level, int grplo, int grphi, int gp_tasks), + + TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(unsigned long, mask) + __field(unsigned long, qsmask) + __field(u8, level) + __field(int, grplo) + __field(int, grphi) + __field(u8, gp_tasks) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->mask = mask; + __entry->qsmask = qsmask; + __entry->level = level; + __entry->grplo = grplo; + __entry->grphi = grphi; + __entry->gp_tasks = gp_tasks; + ), + + TP_printk("%s %lu %lx>%lx %u %d %d %u", + __entry->rcuname, __entry->gpnum, + __entry->mask, __entry->qsmask, __entry->level, + __entry->grplo, __entry->grphi, __entry->gp_tasks) +); + +/* + * Tracepoint for quiescent states detected by force_quiescent_state(). + * These trace events include the type of RCU, the grace-period number + * that was blocked by the CPU, the CPU itself, and the type of quiescent + * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline, + * or "kick" when kicking a CPU that has been in dyntick-idle mode for + * too long. + */ +TRACE_EVENT(rcu_fqs, + + TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent), + + TP_ARGS(rcuname, gpnum, cpu, qsevent), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(unsigned long, gpnum) + __field(int, cpu) + __field(char *, qsevent) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->gpnum = gpnum; + __entry->cpu = cpu; + __entry->qsevent = qsevent; + ), + + TP_printk("%s %lu %d %s", + __entry->rcuname, __entry->gpnum, + __entry->cpu, __entry->qsevent) +); + +#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */ + +/* + * Tracepoint for dyntick-idle entry/exit events. These take a string + * as argument: "Start" for entering dyntick-idle mode and "End" for + * leaving it. + */ +TRACE_EVENT(rcu_dyntick, + + TP_PROTO(char *polarity), + + TP_ARGS(polarity), + + TP_STRUCT__entry( + __field(char *, polarity) + ), + + TP_fast_assign( + __entry->polarity = polarity; + ), + + TP_printk("%s", __entry->polarity) +); + +/* + * Tracepoint for the registration of a single RCU callback function. + * The first argument is the type of RCU, the second argument is + * a pointer to the RCU callback itself, and the third element is the + * new RCU callback queue length for the current CPU. + */ +TRACE_EVENT(rcu_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen), + + TP_ARGS(rcuname, rhp, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->func = rhp->func; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%pf %ld", + __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen) +); + +/* + * Tracepoint for the registration of a single RCU callback of the special + * kfree() form. The first argument is the RCU type, the second argument + * is a pointer to the RCU callback, the third argument is the offset + * of the callback within the enclosing RCU-protected data structure, + * and the fourth argument is the new RCU callback queue length for the + * current CPU. + */ +TRACE_EVENT(rcu_kfree_callback, + + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset, + long qlen), + + TP_ARGS(rcuname, rhp, offset, qlen), + + TP_STRUCT__entry( + __field(char *, rcuname) + __field(void *, rhp) + __field(unsigned long, offset) + __field(long, qlen) + ), + + TP_fast_assign( + __entry->rcuname = rcuname; + __entry->rhp = rhp; + __entry->offset = offset; + __entry->qlen = qlen; + ), + + TP_printk("%s rhp=%p func=%ld %ld", + __entry->rcuname, __entry->rhp, __entry->offset, + __entry->qlen) +); + /* * Tracepoint for marking the beginning rcu_do_batch, performed to start * RCU callback invocation. The first argument is the RCU flavor, @@ -65,50 +356,58 @@ TRACE_EVENT(rcu_batch_start, /* * Tracepoint for the invocation of a single RCU callback function. - * The argument is a pointer to the RCU callback itself. + * The first argument is the type of RCU, and the second argument is + * a pointer to the RCU callback itself. */ TRACE_EVENT(rcu_invoke_callback, - TP_PROTO(struct rcu_head *rhp), + TP_PROTO(char *rcuname, struct rcu_head *rhp), - TP_ARGS(rhp), + TP_ARGS(rcuname, rhp), TP_STRUCT__entry( - __field(void *, rhp) - __field(void *, func) + __field(char *, rcuname) + __field(void *, rhp) + __field(void *, func) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->func = rhp->func; ), - TP_printk("rhp=%p func=%pf", __entry->rhp, __entry->func) + TP_printk("%s rhp=%p func=%pf", + __entry->rcuname, __entry->rhp, __entry->func) ); /* * Tracepoint for the invocation of a single RCU callback of the special - * kfree() form. The first argument is a pointer to the RCU callback - * and the second argument is the offset of the callback within the - * enclosing RCU-protected data structure. + * kfree() form. The first argument is the RCU flavor, the second + * argument is a pointer to the RCU callback, and the third argument + * is the offset of the callback within the enclosing RCU-protected + * data structure. */ TRACE_EVENT(rcu_invoke_kfree_callback, - TP_PROTO(struct rcu_head *rhp, unsigned long offset), + TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset), - TP_ARGS(rhp, offset), + TP_ARGS(rcuname, rhp, offset), TP_STRUCT__entry( - __field(void *, rhp) + __field(char *, rcuname) + __field(void *, rhp) __field(unsigned long, offset) ), TP_fast_assign( + __entry->rcuname = rcuname; __entry->rhp = rhp; __entry->offset = offset; ), - TP_printk("rhp=%p func=%ld", __entry->rhp, __entry->offset) + TP_printk("%s rhp=%p func=%ld", + __entry->rcuname, __entry->rhp, __entry->offset) ); /* @@ -136,6 +435,24 @@ TRACE_EVENT(rcu_batch_end, __entry->rcuname, __entry->callbacks_invoked) ); +#else /* #ifdef CONFIG_RCU_TRACE */ + +#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0) +#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0) +#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0) +#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0) +#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0) +#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0) +#define trace_rcu_dyntick(polarity) do { } while (0) +#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0) +#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0) +#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0) +#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0) +#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0) +#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0) + +#endif /* #else #ifdef CONFIG_RCU_TRACE */ + #endif /* _TRACE_RCU_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 22507ed9b9d587486fb4681e93a8c58837738a25 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 18 Jul 2011 16:54:51 -0700 Subject: rcu: Remove unused and redundant interfaces The rcu_dereference_bh_protected() and rcu_dereference_sched_protected() macros are synonyms for rcu_dereference_protected() and are not used anywhere in mainline. This commit therefore removes them. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dd2bc2c6a285..caf22e84a743 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -578,26 +578,6 @@ extern int rcu_my_thread_group_empty(void); #define rcu_dereference_protected(p, c) \ __rcu_dereference_protected((p), (c), __rcu) -/** - * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * This is the RCU-bh counterpart to rcu_dereference_protected(). - */ -#define rcu_dereference_bh_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) - -/** - * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * This is the RCU-sched counterpart to rcu_dereference_protected(). - */ -#define rcu_dereference_sched_protected(p, c) \ - __rcu_dereference_protected((p), (c), __rcu) - /** * rcu_dereference() - fetch RCU-protected pointer for dereferencing -- cgit v1.2.3 From fc0763f53e3ff6a6bfa66934662a3446b9ca6f16 Mon Sep 17 00:00:00 2001 From: "Shi, Alex" Date: Thu, 28 Jul 2011 14:56:12 +0800 Subject: nohz: Remove nohz_cpu_mask RCU no longer uses this global variable, nor does anyone else. This commit therefore removes this variable. This reduces memory footprint and also removes some atomic instructions and memory barriers from the dyntick-idle path. Signed-off-by: Alex Shi Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0f..6ee91e20353b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle); extern int runqueue_is_locked(int cpu); -extern cpumask_var_t nohz_cpu_mask; #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) extern void select_nohz_load_balancer(int stop_tick); extern int get_nohz_timer_target(void); -- cgit v1.2.3 From d322f45ceed525daa9401154590bbae3222cfefb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 31 Jul 2011 22:09:25 -0700 Subject: rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier Recent changes to gcc give warning messages on rcu_assign_pointers()'s checks that allow it to determine when it is OK to omit the memory barrier. Stephen Hemminger tried a number of gcc tricks to silence this warning, but #pragmas and CPP macros do not work together in the way that would be required to make this work. However, we now have RCU_INIT_POINTER(), which already omits this memory barrier, and which therefore may be used when assigning NULL to an RCU-protected pointer that is accessible to readers. This commit therefore makes rcu_assign_pointer() unconditionally emit the memory barrier. Reported-by: Stephen Hemminger Signed-off-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index caf22e84a743..d7accc5f8892 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -443,9 +443,7 @@ extern int rcu_my_thread_group_empty(void); }) #define __rcu_assign_pointer(p, v, space) \ ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ + smp_wmb(); \ (p) = (typeof(*v) __force space *)(v); \ }) -- cgit v1.2.3 From 6846c0c54074d47927c90eab4a805115e1ae3292 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 31 Jul 2011 22:33:02 -0700 Subject: rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation The differences between rcu_assign_pointer() and RCU_INIT_POINTER() are subtle, and it is easy to use the the cheaper RCU_INIT_POINTER() when the more-expensive rcu_assign_pointer() should have been used instead. The consequences of this mistake are quite severe. This commit therefore carefully lays out the situations in which it it permissible to use RCU_INIT_POINTER(). Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 47 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d7accc5f8892..af186e260c43 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -754,11 +754,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them - * (pretty much all of them other than x86), and also prevents - * the compiler from reordering the code that initializes the - * structure after the pointer assignment. More importantly, this - * call documents which pointers will be dereferenced by RCU read-side - * code. + * (which is most of them), and also prevents the compiler from + * reordering the code that initializes the structure after the pointer + * assignment. More importantly, this call documents which pointers + * will be dereferenced by RCU read-side code. + * + * In some special cases, you may use RCU_INIT_POINTER() instead + * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due + * to the fact that it does not constrain either the CPU or the compiler. + * That said, using RCU_INIT_POINTER() when you should have used + * rcu_assign_pointer() is a very bad thing that results in + * impossible-to-diagnose memory corruption. So please be careful. + * See the RCU_INIT_POINTER() comment header for details. */ #define rcu_assign_pointer(p, v) \ __rcu_assign_pointer((p), (v), __rcu) @@ -766,8 +773,34 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) /** * RCU_INIT_POINTER() - initialize an RCU protected pointer * - * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep - * splats. + * Initialize an RCU-protected pointer in special cases where readers + * do not need ordering constraints on the CPU or the compiler. These + * special cases are: + * + * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or- + * 2. The caller has taken whatever steps are required to prevent + * RCU readers from concurrently accessing this pointer -or- + * 3. The referenced data structure has already been exposed to + * readers either at compile time or via rcu_assign_pointer() -and- + * a. You have not made -any- reader-visible changes to + * this structure since then -or- + * b. It is OK for readers accessing this structure from its + * new location to see the old state of the structure. (For + * example, the changes were to statistical counters or to + * other state where exact synchronization is not required.) + * + * Failure to follow these rules governing use of RCU_INIT_POINTER() will + * result in impossible-to-diagnose memory corruption. As in the structures + * will look OK in crash dumps, but any concurrent RCU readers might + * see pre-initialized values of the referenced data structure. So + * please be very careful how you use RCU_INIT_POINTER()!!! + * + * If you are creating an RCU-protected linked structure that is accessed + * by a single external-to-structure RCU-protected pointer, then you may + * use RCU_INIT_POINTER() to initialize the internal RCU-protected + * pointers, but you must use rcu_assign_pointer() to initialize the + * external-to-structure pointer -after- you have completely initialized + * the reader-accessible portions of the linked structure. */ #define RCU_INIT_POINTER(p, v) \ p = (typeof(*v) __force __rcu *)(v) -- cgit v1.2.3 From 6206ab9bab620fc0fbbed30ce20d145b0b3d1840 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 1 Aug 2011 06:22:11 -0700 Subject: rcu: Move __rcu_read_unlock()'s barrier() within if-statement We only need to constrain the compiler if we are actually exiting the top-level RCU read-side critical section. This commit therefore moves the first barrier() cal in __rcu_read_unlock() to inside the "if" statement, thus avoiding needless register flushes for inner rcu_read_unlock() calls. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index af186e260c43..2cf4226ade7e 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -134,16 +134,6 @@ extern void call_rcu_sched(struct rcu_head *head, extern void synchronize_sched(void); -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} - -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - #ifdef CONFIG_PREEMPT_RCU extern void __rcu_read_lock(void); @@ -686,7 +676,7 @@ static inline void rcu_read_unlock(void) */ static inline void rcu_read_lock_bh(void) { - __rcu_read_lock_bh(); + local_bh_disable(); __acquire(RCU_BH); rcu_read_acquire_bh(); } @@ -700,7 +690,7 @@ static inline void rcu_read_unlock_bh(void) { rcu_read_release_bh(); __release(RCU_BH); - __rcu_read_unlock_bh(); + local_bh_enable(); } /** -- cgit v1.2.3 From 82e78d80fc392ac7e98326bc8beeb8a679913ffd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 4 Aug 2011 07:55:34 -0700 Subject: rcu: Simplify unboosting checks Commit 7765be (Fix RCU_BOOST race handling current->rcu_read_unlock_special) introduced a new ->rcu_boosted field in the task structure. This is redundant because the existing ->rcu_boost_mutex will be non-NULL at any time that ->rcu_boosted is nonzero. Therefore, this commit removes ->rcu_boosted and tests ->rcu_boost_mutex instead. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6ee91e20353b..acca43560805 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1259,9 +1259,6 @@ struct task_struct { #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; -#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) - int rcu_boosted; -#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */ struct list_head rcu_node_entry; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TREE_PREEMPT_RCU -- cgit v1.2.3