From 01352fb81658cbf78c55844de8e3d1d606bbf3f8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 11 Dec 2013 13:59:08 -0800 Subject: locking: Add an smp_mb__after_unlock_lock() for UNLOCK+BLOCK barrier The Linux kernel has traditionally required that an UNLOCK+LOCK pair act as a full memory barrier when either (1) that UNLOCK+LOCK pair was executed by the same CPU or task, or (2) the same lock variable was used for the UNLOCK and LOCK. It now seems likely that very few places in the kernel rely on this full-memory-barrier semantic, and with the advent of queued locks, providing this semantic either requires complex reasoning, or for some architectures, added overhead. This commit therefore adds a smp_mb__after_unlock_lock(), which may be placed after a LOCK primitive to restore the full-memory-barrier semantic. All definitions are currently no-ops, but will be upgraded for some architectures when queued locks arrive. Signed-off-by: Paul E. McKenney Reviewed-by: Peter Zijlstra Cc: Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/1386799151-2219-5-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/spinlock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 75f34949d9ab..3f2867ff0ced 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -130,6 +130,16 @@ do { \ #define smp_mb__before_spinlock() smp_wmb() #endif +/* + * Place this after a lock-acquisition primitive to guarantee that + * an UNLOCK+LOCK pair act as a full barrier. This guarantee applies + * if the UNLOCK and LOCK are executed by the same CPU or if the + * UNLOCK and LOCK operate on the same lock variable. + */ +#ifndef smp_mb__after_unlock_lock +#define smp_mb__after_unlock_lock() do { } while (0) +#endif + /** * raw_spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. -- cgit v1.2.3 From 93ea02bb84354370e51de803a9405f171f3edf88 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2013 14:57:36 +0100 Subject: arch: Clean up asm/barrier.h implementations using asm-generic/barrier.h We're going to be adding a few new barrier primitives, and in order to avoid endless duplication make more agressive use of asm-generic/barrier.h. Change the asm-generic/barrier.h such that it allows partial barrier definitions and fills out the rest with defaults. There are a few architectures (m32r, m68k) that could probably do away with their barrier.h file entirely but are kept for now due to their unconventional nop() implementation. Suggested-by: Geert Uytterhoeven Reviewed-by: "Paul E. McKenney" Reviewed-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra Cc: Michael Ellerman Cc: Michael Neuling Cc: Russell King Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Victor Kaplansky Cc: Tony Luck Cc: Oleg Nesterov Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Link: http://lkml.kernel.org/r/20131213150640.846368594@infradead.org Signed-off-by: Ingo Molnar --- include/asm-generic/barrier.h | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 639d7a4d033b..d12a90f93689 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -1,4 +1,5 @@ -/* Generic barrier definitions, based on MN10300 definitions. +/* + * Generic barrier definitions, originally based on MN10300 definitions. * * It should be possible to use these on really simple architectures, * but it serves more as a starting point for new ports. @@ -16,35 +17,50 @@ #ifndef __ASSEMBLY__ -#define nop() asm volatile ("nop") +#include + +#ifndef nop +#define nop() asm volatile ("nop") +#endif /* - * Force strict CPU ordering. - * And yes, this is required on UP too when we're talking - * to devices. + * Force strict CPU ordering. And yes, this is required on UP too when we're + * talking to devices. * - * This implementation only contains a compiler barrier. + * Fall back to compiler barriers if nothing better is provided. */ -#define mb() asm volatile ("": : :"memory") +#ifndef mb +#define mb() barrier() +#endif + +#ifndef rmb #define rmb() mb() -#define wmb() asm volatile ("": : :"memory") +#endif + +#ifndef wmb +#define wmb() mb() +#endif + +#ifndef read_barrier_depends +#define read_barrier_depends() do { } while (0) +#endif #ifdef CONFIG_SMP #define smp_mb() mb() #define smp_rmb() rmb() #define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() #else #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while (0) #endif -#define set_mb(var, value) do { var = value; mb(); } while (0) -#define set_wmb(var, value) do { var = value; wmb(); } while (0) - -#define read_barrier_depends() do {} while (0) -#define smp_read_barrier_depends() do {} while (0) +#ifndef set_mb +#define set_mb(var, value) do { (var) = (value); mb(); } while (0) +#endif #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ -- cgit v1.2.3 From 47933ad41a86a4a9b50bed7c9b9bd2ba242aac63 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 6 Nov 2013 14:57:36 +0100 Subject: arch: Introduce smp_load_acquire(), smp_store_release() A number of situations currently require the heavyweight smp_mb(), even though there is no need to order prior stores against later loads. Many architectures have much cheaper ways to handle these situations, but the Linux kernel currently has no portable way to make use of them. This commit therefore supplies smp_load_acquire() and smp_store_release() to remedy this situation. The new smp_load_acquire() primitive orders the specified load against any subsequent reads or writes, while the new smp_store_release() primitive orders the specifed store against any prior reads or writes. These primitives allow array-based circular FIFOs to be implemented without an smp_mb(), and also allow a theoretical hole in rcu_assign_pointer() to be closed at no additional expense on most architectures. In addition, the RCU experience transitioning from explicit smp_read_barrier_depends() and smp_wmb() to rcu_dereference() and rcu_assign_pointer(), respectively resulted in substantial improvements in readability. It therefore seems likely that replacing other explicit barriers with smp_load_acquire() and smp_store_release() will provide similar benefits. It appears that roughly half of the explicit barriers in core kernel code might be so replaced. [Changelog by PaulMck] Reviewed-by: "Paul E. McKenney" Signed-off-by: Peter Zijlstra Acked-by: Will Deacon Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Michael Ellerman Cc: Michael Neuling Cc: Russell King Cc: Geert Uytterhoeven Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Victor Kaplansky Cc: Tony Luck Cc: Oleg Nesterov Link: http://lkml.kernel.org/r/20131213150640.908486364@infradead.org Signed-off-by: Ingo Molnar --- include/asm-generic/barrier.h | 15 +++++++++++++++ include/linux/compiler.h | 9 +++++++++ 2 files changed, 24 insertions(+) (limited to 'include') diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index d12a90f93689..6f692f8ac664 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -62,5 +62,20 @@ #define set_mb(var, value) do { (var) = (value); mb(); } while (0) #endif +#define smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ACCESS_ONCE(*p) = (v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = ACCESS_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + smp_mb(); \ + ___p1; \ +}) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 92669cd182a6..fe7a686dfd8d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -298,6 +298,11 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) #endif +/* Is this type a native word size -- useful for atomic operations */ +#ifndef __native_word +# define __native_word(t) (sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +#endif + /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 @@ -337,6 +342,10 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #define compiletime_assert(condition, msg) \ _compiletime_assert(condition, msg, __compiletime_assert_, __LINE__) +#define compiletime_assert_atomic_type(t) \ + compiletime_assert(__native_word(t), \ + "Need native word sized stores/loads for atomicity.") + /* * Prevent the compiler from merging or refetching accesses. The compiler * is also forbidden from reordering successive instances of ACCESS_ONCE(), -- cgit v1.2.3