From b3abd80250c13414bc258b53e57242feb159af91 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 11:14:19 -0700 Subject: lockref: add 'lockref_get_or_lock() helper This behaves like "lockref_get_not_zero()", but instead of doing nothing if the count was zero, it returns with the lock held. This allows callers to revalidate the lockref-protected data structure if required even if the count was zero to begin with, and possibly increment the count if it passes muster. In particular, the dentry code wants this when it wants to turn an RCU-protected dentry into a stable refcounted one: if the dentry count it zero, but the sequence number still validates the dentry, we can take a reference to it. Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 01233e01627a..0ea026092d1d 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -53,6 +53,22 @@ static inline int lockref_get_not_zero(struct lockref *lockref) return retval; } +/** + * lockref_get_or_lock - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + * and we got the lock instead. + */ +static inline int lockref_get_or_lock(struct lockref *lockref) +{ + spin_lock(&lockref->lock); + if (!lockref->count) + return 0; + lockref->count++; + spin_unlock(&lockref->lock); + return 1; +} + /** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockcnt: pointer to lockref structure -- cgit v1.2.3 From 15570086b590a69d59183b08a7770e316cca20a7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 11:38:06 -0700 Subject: vfs: reimplement d_rcu_to_refcount() using lockref_get_or_lock() This moves __d_rcu_to_refcount() from into fs/namei.c and re-implements it using the lockref infrastructure instead. It also adds a lot of comments about what is actually going on, because turning a dentry that was looked up using RCU into a long-lived reference counted entry is one of the more subtle parts of the rcu walk. We also used to be _particularly_ subtle in unlazy_walk() where we re-validate both the dentry and its parent using the same sequence count. We used to do it by nesting the locks and then verifying the sequence count just once. That was silly, because nested locking is expensive, but the sequence count check is not. So this just re-validates the dentry and the parent separately, avoiding the nested locking, and making the lockref lookup possible. Acked-by: Waiman Long Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index efdc94434c30..9169b91ea2d2 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -304,28 +304,6 @@ extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seq); -/** - * __d_rcu_to_refcount - take a refcount on dentry if sequence check is ok - * @dentry: dentry to take a ref on - * @seq: seqcount to verify against - * Returns: 0 on failure, else 1. - * - * __d_rcu_to_refcount operates on a dentry,seq pair that was returned - * by __d_lookup_rcu, to get a reference on an rcu-walk dentry. - */ -static inline int __d_rcu_to_refcount(struct dentry *dentry, unsigned seq) -{ - int ret = 0; - - assert_spin_locked(&dentry->d_lock); - if (!read_seqcount_retry(&dentry->d_seq, seq)) { - ret = 1; - dentry->d_lockref.count++; - } - - return ret; -} - static inline unsigned d_count(const struct dentry *dentry) { return dentry->d_lockref.count; -- cgit v1.2.3 From 2f4f12e571c4e2f50f3818a3c2544929145f75dd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 11:58:20 -0700 Subject: lockref: uninline lockref helper functions They aren't very good to inline, since they already call external functions (the spinlock code), and we're going to create rather more complicated versions of them that can do the reference count updates locklessly. Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 66 +++---------------------------------------------- 1 file changed, 4 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 0ea026092d1d..4c0af31c8d47 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -21,67 +21,9 @@ struct lockref { unsigned int count; }; -/** - * lockref_get - Increments reference count unconditionally - * @lockcnt: pointer to lockref structure - * - * This operation is only valid if you already hold a reference - * to the object, so you know the count cannot be zero. - */ -static inline void lockref_get(struct lockref *lockref) -{ - spin_lock(&lockref->lock); - lockref->count++; - spin_unlock(&lockref->lock); -} - -/** - * lockref_get_not_zero - Increments count unless the count is 0 - * @lockcnt: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count is 0 - */ -static inline int lockref_get_not_zero(struct lockref *lockref) -{ - int retval = 0; - - spin_lock(&lockref->lock); - if (lockref->count) { - lockref->count++; - retval = 1; - } - spin_unlock(&lockref->lock); - return retval; -} - -/** - * lockref_get_or_lock - Increments count unless the count is 0 - * @lockcnt: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count was zero - * and we got the lock instead. - */ -static inline int lockref_get_or_lock(struct lockref *lockref) -{ - spin_lock(&lockref->lock); - if (!lockref->count) - return 0; - lockref->count++; - spin_unlock(&lockref->lock); - return 1; -} - -/** - * lockref_put_or_lock - decrements count unless count <= 1 before decrement - * @lockcnt: pointer to lockref structure - * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken - */ -static inline int lockref_put_or_lock(struct lockref *lockref) -{ - spin_lock(&lockref->lock); - if (lockref->count <= 1) - return 0; - lockref->count--; - spin_unlock(&lockref->lock); - return 1; -} +extern void lockref_get(struct lockref *); +extern int lockref_get_not_zero(struct lockref *); +extern int lockref_get_or_lock(struct lockref *); +extern int lockref_put_or_lock(struct lockref *); #endif /* __LINUX_LOCKREF_H */ -- cgit v1.2.3 From bc08b449ee14ace4d869adaa1bb35a44ce68d775 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 Sep 2013 12:12:15 -0700 Subject: lockref: implement lockless reference count updates using cmpxchg() Instead of taking the spinlock, the lockless versions atomically check that the lock is not taken, and do the reference count update using a cmpxchg() loop. This is semantically identical to doing the reference count update protected by the lock, but avoids the "wait for lock" contention that you get when accesses to the reference count are contended. Note that a "lockref" is absolutely _not_ equivalent to an atomic_t. Even when the lockref reference counts are updated atomically with cmpxchg, the fact that they also verify the state of the spinlock means that the lockless updates can never happen while somebody else holds the spinlock. So while "lockref_put_or_lock()" looks a lot like just another name for "atomic_dec_and_lock()", and both optimize to lockless updates, they are fundamentally different: the decrement done by atomic_dec_and_lock() is truly independent of any lock (as long as it doesn't decrement to zero), so a locked region can still see the count change. The lockref structure, in contrast, really is a *locked* reference count. If you hold the spinlock, the reference count will be stable and you can modify the reference count without using atomics, because even the lockless updates will see and respect the state of the lock. In order to enable the cmpxchg lockless code, the architecture needs to do three things: (1) Make sure that the "arch_spinlock_t" and an "unsigned int" can fit in an aligned u64, and have a "cmpxchg()" implementation that works on such a u64 data type. (2) define a helper function to test for a spinlock being unlocked ("arch_spin_value_unlocked()") (3) select the "ARCH_USE_CMPXCHG_LOCKREF" config variable in its Kconfig file. This enables it for x86-64 (but not 32-bit, we'd need to make sure cmpxchg() turns into the proper cmpxchg8b in order to enable it for 32-bit mode). Signed-off-by: Linus Torvalds --- include/linux/lockref.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 4c0af31c8d47..ca07b5028b01 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -17,8 +17,15 @@ #include struct lockref { - spinlock_t lock; - unsigned int count; + union { +#ifdef CONFIG_CMPXCHG_LOCKREF + aligned_u64 lock_count; +#endif + struct { + spinlock_t lock; + unsigned int count; + }; + }; }; extern void lockref_get(struct lockref *); -- cgit v1.2.3