diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-11 14:18:38 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-11 14:18:38 -0800 |
commit | 24af98c4cf5f5e69266e270c7f3fb34b82ff6656 (patch) | |
tree | 70d71381c841c92b2d28397bf0c5d6a7d9bbbaac /arch/x86 | |
parent | 9061cbe62adeccf8c986883bcd40f4aeee59ea75 (diff) | |
parent | 337f13046ff03717a9e99675284a817527440a49 (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"So we have a laundry list of locking subsystem changes:
- continuing barrier API and code improvements
- futex enhancements
- atomics API improvements
- pvqspinlock enhancements: in particular lock stealing and adaptive
spinning
- qspinlock micro-enhancements"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
futex: Allow FUTEX_CLOCK_REALTIME with FUTEX_WAIT op
futex: Cleanup the goto confusion in requeue_pi()
futex: Remove pointless put_pi_state calls in requeue()
futex: Document pi_state refcounting in requeue code
futex: Rename free_pi_state() to put_pi_state()
futex: Drop refcount if requeue_pi() acquired the rtmutex
locking/barriers, arch: Remove ambiguous statement in the smp_store_mb() documentation
lcoking/barriers, arch: Use smp barriers in smp_store_release()
locking/cmpxchg, arch: Remove tas() definitions
locking/pvqspinlock: Queue node adaptive spinning
locking/pvqspinlock: Allow limited lock stealing
locking/pvqspinlock: Collect slowpath lock statistics
sched/core, locking: Document Program-Order guarantees
locking, sched: Introduce smp_cond_acquire() and use it
locking/pvqspinlock, x86: Optimize the PV unlock code path
locking/qspinlock: Avoid redundant read of next pointer
locking/qspinlock: Prefetch the next node cacheline
locking/qspinlock: Use _acquire/_release() versions of cmpxchg() & xchg()
atomics: Add test for atomic operations with _relaxed variants
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 8 | ||||
-rw-r--r-- | arch/x86/include/asm/qspinlock_paravirt.h | 59 |
2 files changed, 67 insertions, 0 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index db3622f22b61..965fc4216f76 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -687,6 +687,14 @@ config PARAVIRT_SPINLOCKS If you are unsure how to answer this question, answer Y. +config QUEUED_LOCK_STAT + bool "Paravirt queued spinlock statistics" + depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS + ---help--- + Enable the collection of statistical data on the slowpath + behavior of paravirtualized queued spinlocks and report + them on debugfs. + source "arch/x86/xen/Kconfig" config KVM_GUEST diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index b002e711ba88..9f92c180ed2f 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -1,6 +1,65 @@ #ifndef __ASM_QSPINLOCK_PARAVIRT_H #define __ASM_QSPINLOCK_PARAVIRT_H +/* + * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit + * registers. For i386, however, only 1 32-bit register needs to be saved + * and restored. So an optimized version of __pv_queued_spin_unlock() is + * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit. + */ +#ifdef CONFIG_64BIT + +PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); +#define __pv_queued_spin_unlock __pv_queued_spin_unlock +#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" +#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" + +/* + * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock + * which combines the registers saving trunk and the body of the following + * C code: + * + * void __pv_queued_spin_unlock(struct qspinlock *lock) + * { + * struct __qspinlock *l = (void *)lock; + * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * + * if (likely(lockval == _Q_LOCKED_VAL)) + * return; + * pv_queued_spin_unlock_slowpath(lock, lockval); + * } + * + * For x86-64, + * rdi = lock (first argument) + * rsi = lockval (second argument) + * rdx = internal variable (set to 0) + */ +asm (".pushsection .text;" + ".globl " PV_UNLOCK ";" + ".align 4,0x90;" + PV_UNLOCK ": " + "push %rdx;" + "mov $0x1,%eax;" + "xor %edx,%edx;" + "lock cmpxchg %dl,(%rdi);" + "cmp $0x1,%al;" + "jne .slowpath;" + "pop %rdx;" + "ret;" + ".slowpath: " + "push %rsi;" + "movzbl %al,%esi;" + "call " PV_UNLOCK_SLOWPATH ";" + "pop %rsi;" + "pop %rdx;" + "ret;" + ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" + ".popsection"); + +#else /* CONFIG_64BIT */ + +extern void __pv_queued_spin_unlock(struct qspinlock *lock); PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock); +#endif /* CONFIG_64BIT */ #endif |