diff options
author | H. Peter Anvin <hpa@zytor.com> | 2010-07-27 17:01:49 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2010-09-26 17:21:43 -0700 |
commit | a1a34b6cceb66a843dfbec0a197b0067fe9a8dc9 (patch) | |
tree | 55e31071c9128ecda297b68f78b1e1d34a5a9c02 | |
parent | 5d881186be461f3ccea69fe93f674e3266db7336 (diff) |
x86: Add memory modify constraints to xchg() and cmpxchg()
commit 113fc5a6e8c2288619ff7e8187a6f556b7e0d372 upstream.
[ Backport to .32 by Tomáš Janoušek <tomi@nomi.cz> ]
xchg() and cmpxchg() modify their memory operands, not merely read
them. For some versions of gcc the "memory" clobber has apparently
dealt with the situation, but not for all.
Originally-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Cc: Glauber Costa <glommer@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Palfrader <peter@palfrader.org>
Cc: Greg KH <gregkh@suse.de>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Zachary Amsden <zamsden@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
LKML-Reference: <4C4F7277.8050306@zytor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | arch/x86/include/asm/cmpxchg_32.h | 65 | ||||
-rw-r--r-- | arch/x86/include/asm/cmpxchg_64.h | 4 |
2 files changed, 20 insertions, 49 deletions
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 5af505158515..9873a5f64676 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -17,60 +17,33 @@ struct __xchg_dummy { #define __xg(x) ((struct __xchg_dummy *)(x)) /* - * The semantics of XCHGCMP8B are a bit strange, this is why - * there is a loop and the loading of %%eax and %%edx has to - * be inside. This inlines well in most cases, the cached - * cost is around ~38 cycles. (in the future we might want - * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that - * might have an implicit FPU-save as a cost, so it's not - * clear which path to go.) + * CMPXCHG8B only writes to the target if we had the previous + * value in registers, otherwise it acts as a read and gives us the + * "new previous" value. That is why there is a loop. Preloading + * EDX:EAX is a performance optimization: in the common case it means + * we need only one locked operation. * - * cmpxchg8b must be used with the lock prefix here to allow - * the instruction to be executed atomically, see page 3-102 - * of the instruction set reference 24319102.pdf. We need - * the reader side to see the coherent 64bit value. + * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very + * least an FPU save and/or %cr0.ts manipulation. + * + * cmpxchg8b must be used with the lock prefix here to allow the + * instruction to be executed atomically. We need to have the reader + * side to see the coherent 64bit value. */ -static inline void __set_64bit(unsigned long long *ptr, - unsigned int low, unsigned int high) +static inline void set_64bit(volatile u64 *ptr, u64 value) { + u32 low = value; + u32 high = value >> 32; + u64 prev = *ptr; + asm volatile("\n1:\t" - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" LOCK_PREFIX "cmpxchg8b %0\n\t" "jnz 1b" - : "=m"(*ptr) - : "D" (ptr), - "b"(low), - "c"(high) - : "ax", "dx", "memory"); -} - -static inline void __set_64bit_constant(unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr, (unsigned int)value, (unsigned int)(value >> 32)); -} - -#define ll_low(x) *(((unsigned int *)&(x)) + 0) -#define ll_high(x) *(((unsigned int *)&(x)) + 1) - -static inline void __set_64bit_var(unsigned long long *ptr, - unsigned long long value) -{ - __set_64bit(ptr, ll_low(value), ll_high(value)); + : "=m" (*ptr), "+A" (prev) + : "b" (low), "c" (high) + : "memory"); } -#define set_64bit(ptr, value) \ - (__builtin_constant_p((value)) \ - ? __set_64bit_constant((ptr), (value)) \ - : __set_64bit_var((ptr), (value))) - -#define _set_64bit(ptr, value) \ - (__builtin_constant_p(value) \ - ? __set_64bit(ptr, (unsigned int)(value), \ - (unsigned int)((value) >> 32)) \ - : __set_64bit(ptr, ll_low((value)), ll_high((value)))) - /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 1871cb045b01..e8cb051b8681 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -8,13 +8,11 @@ #define __xg(x) ((volatile long *)(x)) -static inline void set_64bit(volatile unsigned long *ptr, unsigned long val) +static inline void set_64bit(volatile u64 *ptr, u64 val) { *ptr = val; } -#define _set_64bit set_64bit - /* * Note: no "lock" prefix even on SMP: xchg always implies lock anyway * Note 2: xchg has side effect, so that attribute volatile is necessary, |