From fe15ce446beb3a33583af81ffe6c9d01a75314ed Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:23 +1100 Subject: fs: change d_delete semantics Change d_delete from a dentry deletion notification to a dentry caching advise, more like ->drop_inode. Require it to be constant and idempotent, and not take d_lock. This is how all existing filesystems use the callback anyway. This makes fine grained dentry locking of dput and dentry lru scanning much simpler. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index f088ea2f6ac9..2270d4822c2f 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -276,7 +276,7 @@ smb_dir_open(struct inode *dir, struct file *file) static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); -static int smb_delete_dentry(struct dentry *); +static int smb_delete_dentry(const struct dentry *); static const struct dentry_operations smbfs_dentry_operations = { @@ -367,7 +367,7 @@ out: * We use this to unhash dentries with bad inodes. */ static int -smb_delete_dentry(struct dentry * dentry) +smb_delete_dentry(const struct dentry *dentry) { if (dentry->d_inode) { if (is_bad_inode(dentry->d_inode)) { -- cgit v1.2.3 From fb2d5b86aff355a27ebfc132d3c99f4a940cc3fe Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:26 +1100 Subject: fs: name case update method smpfs and ncpfs want to update a live dentry name in-place. Rather than have them open code the locking, provide a documented dcache API. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index dbb98658148b..dbd2e1df3ba9 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -145,8 +145,8 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, goto end_advance; } else { hashed = 1; - memcpy((char *) newdent->d_name.name, qname->name, - newdent->d_name.len); + /* dir i_mutex is locked because we're in readdir */ + dentry_update_name_case(newdent, qname); } if (!newdent->d_inode) { -- cgit v1.2.3 From 621e155a3591962420eacdd39f6f0aa29ceb221e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:27 +1100 Subject: fs: change d_compare for rcu-walk Change d_compare so it may be called from lock-free RCU lookups. This does put significant restrictions on what may be done from the callback, however there don't seem to have been any problems with in-tree fses. If some strange use case pops up that _really_ cannot cope with the rcu-walk rules, we can just add new rcu-unaware callbacks, which would cause name lookup to drop out of rcu-walk mode. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/dir.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 2270d4822c2f..bd63229f43f2 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -275,7 +275,10 @@ smb_dir_open(struct inode *dir, struct file *file) */ static int smb_lookup_validate(struct dentry *, struct nameidata *); static int smb_hash_dentry(struct dentry *, struct qstr *); -static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *); +static int smb_compare_dentry(const struct dentry *, + const struct inode *, + const struct dentry *, const struct inode *, + unsigned int, const char *, const struct qstr *); static int smb_delete_dentry(const struct dentry *); static const struct dentry_operations smbfs_dentry_operations = @@ -347,14 +350,17 @@ smb_hash_dentry(struct dentry *dir, struct qstr *this) } static int -smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b) +smb_compare_dentry(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { int i, result = 1; - if (a->len != b->len) + if (len != name->len) goto out; - for (i=0; i < a->len; i++) { - if (tolower(a->name[i]) != tolower(b->name[i])) + for (i=0; i < len; i++) { + if (tolower(str[i]) != tolower(name->name[i])) goto out; } result = 0; -- cgit v1.2.3 From b1e6a015a580ad145689ad1d6b4aa0e03e6c868b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:28 +1100 Subject: fs: change d_hash for rcu-walk Change d_hash so it may be called from lock-free RCU lookups. See similar patch for d_compare for details. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 2 +- drivers/staging/smbfs/dir.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index dbd2e1df3ba9..0beded260b00 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -134,7 +134,7 @@ smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, qname->hash = full_name_hash(qname->name, qname->len); if (dentry->d_op && dentry->d_op->d_hash) - if (dentry->d_op->d_hash(dentry, qname) != 0) + if (dentry->d_op->d_hash(dentry, inode, qname) != 0) goto end_advance; newdent = d_lookup(dentry, qname); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index bd63229f43f2..5f79799d5d4a 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -274,7 +274,8 @@ smb_dir_open(struct inode *dir, struct file *file) * Dentry operations routines */ static int smb_lookup_validate(struct dentry *, struct nameidata *); -static int smb_hash_dentry(struct dentry *, struct qstr *); +static int smb_hash_dentry(const struct dentry *, const struct inode *, + struct qstr *); static int smb_compare_dentry(const struct dentry *, const struct inode *, const struct dentry *, const struct inode *, @@ -336,7 +337,8 @@ smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) } static int -smb_hash_dentry(struct dentry *dir, struct qstr *this) +smb_hash_dentry(const struct dentry *dir, const struct inode *inode, + struct qstr *this) { unsigned long hash; int i; -- cgit v1.2.3 From b7ab39f631f505edc2bbdb86620d5493f995c9da Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:32 +1100 Subject: fs: dcache scale dentry refcount Make d_count non-atomic and protect it with d_lock. This allows us to ensure a 0 refcount dentry remains 0 without dcache_lock. It is also fairly natural when we start protecting many other dentry members with d_lock. Signed-off-by: Nick Piggin --- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 8c8afc716b98..18aee04a8411 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -280,7 +280,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index f99bddc01716..fe4b242f0094 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -456,7 +456,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked(tmp); + dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); spin_unlock(&dcache_lock); -- cgit v1.2.3 From da5029563a0a026c64821b09e8e7b4fd81d3fe1b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:33 +1100 Subject: fs: dcache scale d_unhashed Protect d_unhashed(dentry) condition with d_lock. This means keeping DCACHE_UNHASHED bit in synch with hash manipulations. Signed-off-by: Nick Piggin --- drivers/usb/core/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index b690aa35df9a..e3ab4437ea96 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -347,10 +347,13 @@ static int usbfs_empty (struct dentry *dentry) list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); + spin_lock(&de->d_lock); if (usbfs_positive(de)) { + spin_unlock(&de->d_lock); spin_unlock(&dcache_lock); return 0; } + spin_unlock(&de->d_lock); } spin_unlock(&dcache_lock); -- cgit v1.2.3 From 2fd6b7f50797f2e993eea59e0a0b8c6399c811dc Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:34 +1100 Subject: fs: dcache scale subdirs Protect d_subdirs and d_child with d_lock, except in filesystems that aren't using dcache_lock for these anyway (eg. using i_mutex). Note: if we change the locking rule in future so that ->d_child protection is provided only with ->d_parent->d_lock, it may allow us to reduce some locking. But it would be an exception to an otherwise regular locking scheme, so we'd have to see some good results. Probably not worthwhile. Signed-off-by: Nick Piggin --- drivers/staging/smbfs/cache.c | 4 ++++ drivers/usb/core/inode.c | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 0beded260b00..920434b6c071 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -63,6 +63,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dentry = list_entry(next, struct dentry, d_u.d_child); @@ -70,6 +71,7 @@ smb_invalidate_dircache_entries(struct dentry *parent) smb_age_dentry(server, dentry); next = next->next; } + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); } @@ -97,6 +99,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) /* If a pointer is invalid, we search the dentry. */ spin_lock(&dcache_lock); + spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { dent = list_entry(next, struct dentry, d_u.d_child); @@ -111,6 +114,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } dent = NULL; out_unlock: + spin_unlock(&parent->d_lock); spin_unlock(&dcache_lock); return dent; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index e3ab4437ea96..89a0e8366585 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -344,18 +344,20 @@ static int usbfs_empty (struct dentry *dentry) struct list_head *list; spin_lock(&dcache_lock); - + spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); - spin_lock(&de->d_lock); + + spin_lock_nested(&de->d_lock, DENTRY_D_LOCK_NESTED); if (usbfs_positive(de)) { spin_unlock(&de->d_lock); + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 0; } spin_unlock(&de->d_lock); } - + spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); return 1; } -- cgit v1.2.3 From 949854d02455080d20cd3e1db28a3a18daf7599d Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:37 +1100 Subject: fs: Use rename lock and RCU for multi-step operations The remaining usages for dcache_lock is to allow atomic, multi-step read-side operations over the directory tree by excluding modifications to the tree. Also, to walk in the leaf->root direction in the tree where we don't have a natural d_lock ordering. This could be accomplished by taking every d_lock, but this would mean a huge number of locks and actually gets very tricky. Solve this instead by using the rename seqlock for multi-step read-side operations, retry in case of a rename so we don't walk up the wrong parent. Concurrent dentry insertions are not serialised against. Concurrent deletes are tricky when walking up the directory: our parent might have been deleted when dropping locks so also need to check and retry for that. We can also use the rename lock in cases where livelock is a worry (and it is introduced in subsequent patch). Signed-off-by: Nick Piggin --- drivers/staging/pohmelfs/path_entry.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index 8ec83d2dffb7..bbe42f42ca8f 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -83,10 +83,11 @@ out: int pohmelfs_path_length(struct pohmelfs_inode *pi) { struct dentry *d, *root, *first; - int len = 1; /* Root slash */ + int len; + unsigned seq; - first = d = d_find_alias(&pi->vfs_inode); - if (!d) { + first = d_find_alias(&pi->vfs_inode); + if (!first) { dprintk("%s: ino: %llu, mode: %o.\n", __func__, pi->ino, pi->vfs_inode.i_mode); return -ENOENT; } @@ -95,6 +96,11 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) root = dget(current->fs->root.dentry); spin_unlock(¤t->fs->lock); +rename_retry: + len = 1; /* Root slash */ + d = first; + seq = read_seqbegin(&rename_lock); + rcu_read_lock(); spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) @@ -105,6 +111,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) d = d->d_parent; } spin_unlock(&dcache_lock); + rcu_read_unlock(); + if (read_seqretry(&rename_lock, seq)) + goto rename_retry; dput(root); dput(first); -- cgit v1.2.3 From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:38 +1100 Subject: fs: dcache remove dcache_lock dcache_lock no longer protects anything. remove it. Signed-off-by: Nick Piggin --- drivers/infiniband/hw/ipath/ipath_fs.c | 6 +----- drivers/infiniband/hw/qib/qib_fs.c | 3 --- drivers/staging/pohmelfs/path_entry.c | 2 -- drivers/staging/smbfs/cache.c | 4 ---- drivers/usb/core/inode.c | 3 --- 5 files changed, 1 insertion(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 18aee04a8411..925e88227ded 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -277,18 +277,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); - } else { + } else spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); - } ret = 0; bail: diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fe4b242f0094..49af4a6538ba 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -453,17 +453,14 @@ static int remove_file(struct dentry *parent, char *name) goto bail; } - spin_lock(&dcache_lock); spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { dget_locked_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); simple_unlink(parent->d_inode, tmp); } else { spin_unlock(&tmp->d_lock); - spin_unlock(&dcache_lock); } ret = 0; diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index bbe42f42ca8f..400a9fc386ad 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -101,7 +101,6 @@ rename_retry: d = first; seq = read_seqbegin(&rename_lock); rcu_read_lock(); - spin_lock(&dcache_lock); if (!IS_ROOT(d) && d_unhashed(d)) len += UNHASHED_OBSCURE_STRING_SIZE; /* Obscure " (deleted)" string */ @@ -110,7 +109,6 @@ rename_retry: len += d->d_name.len + 1; /* Plus slash */ d = d->d_parent; } - spin_unlock(&dcache_lock); rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto rename_retry; diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 920434b6c071..75dfd403fb90 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -62,7 +62,6 @@ smb_invalidate_dircache_entries(struct dentry *parent) struct list_head *next; struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -72,7 +71,6 @@ smb_invalidate_dircache_entries(struct dentry *parent) next = next->next; } spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); } /* @@ -98,7 +96,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) } /* If a pointer is invalid, we search the dentry. */ - spin_lock(&dcache_lock); spin_lock(&parent->d_lock); next = parent->d_subdirs.next; while (next != &parent->d_subdirs) { @@ -115,7 +112,6 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = NULL; out_unlock: spin_unlock(&parent->d_lock); - spin_unlock(&dcache_lock); return dent; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 89a0e8366585..1b125c224dcf 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -343,7 +343,6 @@ static int usbfs_empty (struct dentry *dentry) { struct list_head *list; - spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); list_for_each(list, &dentry->d_subdirs) { struct dentry *de = list_entry(list, struct dentry, d_u.d_child); @@ -352,13 +351,11 @@ static int usbfs_empty (struct dentry *dentry) if (usbfs_positive(de)) { spin_unlock(&de->d_lock); spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 0; } spin_unlock(&de->d_lock); } spin_unlock(&dentry->d_lock); - spin_unlock(&dcache_lock); return 1; } -- cgit v1.2.3 From dc0474be3e27463d4d4a2793f82366eed906f223 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:43 +1100 Subject: fs: dcache rationalise dget variants dget_locked was a shortcut to avoid the lazy lru manipulation when we already held dcache_lock (lru manipulation was relatively cheap at that point). However, how that the lru lock is an innermost one, we never hold it at any caller, so the lock cost can now be avoided. We already have well working lazy dcache LRU, so it should be fine to defer LRU manipulations to scan time. Signed-off-by: Nick Piggin --- drivers/infiniband/hw/ipath/ipath_fs.c | 2 +- drivers/infiniband/hw/qib/qib_fs.c | 2 +- drivers/staging/smbfs/cache.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 925e88227ded..31ae1b108aea 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -279,7 +279,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked_dlock(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index 49af4a6538ba..df7fa251dcdc 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -455,7 +455,7 @@ static int remove_file(struct dentry *parent, char *name) spin_lock(&tmp->d_lock); if (!(d_unhashed(tmp) && tmp->d_inode)) { - dget_locked_dlock(tmp); + dget_dlock(tmp); __d_drop(tmp); spin_unlock(&tmp->d_lock); simple_unlink(parent->d_inode, tmp); diff --git a/drivers/staging/smbfs/cache.c b/drivers/staging/smbfs/cache.c index 75dfd403fb90..f2a1323ca827 100644 --- a/drivers/staging/smbfs/cache.c +++ b/drivers/staging/smbfs/cache.c @@ -102,7 +102,7 @@ smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) dent = list_entry(next, struct dentry, d_u.d_child); if ((unsigned long)dent->d_fsdata == fpos) { if (dent->d_inode) - dget_locked(dent); + dget(dent); else dent = NULL; goto out_unlock; -- cgit v1.2.3 From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:49 +1100 Subject: fs: icache RCU free inodes RCU free the struct inode. This will allow: - Subsequent store-free path walking patch. The inode must be consulted for permissions when walking, so an RCU inode reference is a must. - sb_inode_list_lock to be moved inside i_lock because sb list walkers who want to take i_lock no longer need to take sb_inode_list_lock to walk the list in the first place. This will simplify and optimize locking. - Could remove some nested trylock loops in dcache code - Could potentially simplify things a bit in VM land. Do not need to take the page lock to follow page->mapping. The downsides of this is the performance cost of using RCU. In a simple creat/unlink microbenchmark, performance drops by about 10% due to inability to reuse cache-hot slab objects. As iterations increase and RCU freeing starts kicking over, this increases to about 20%. In cases where inode lifetimes are longer (ie. many inodes may be allocated during the average life span of a single inode), a lot of this cache reuse is not applicable, so the regression caused by this patch is smaller. The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU, however this adds some complexity to list walking and store-free path walking, so I prefer to implement this at a later date, if it is shown to be a win in real situations. I haven't found a regression in any non-micro benchmark so I doubt it will be a problem. Signed-off-by: Nick Piggin --- drivers/staging/pohmelfs/inode.c | 9 ++++++++- drivers/staging/smbfs/inode.c | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index 61685ccceda8..cc8d2840f9b6 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -826,6 +826,13 @@ const struct address_space_operations pohmelfs_aops = { .set_page_dirty = __set_page_dirty_nobuffers, }; +static void pohmelfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(pohmelfs_inode_cache, POHMELFS_I(inode)); +} + /* * ->detroy_inode() callback. Deletes inode from the caches * and frees private data. @@ -842,8 +849,8 @@ static void pohmelfs_destroy_inode(struct inode *inode) dprintk("%s: pi: %p, inode: %p, ino: %llu.\n", __func__, pi, &pi->vfs_inode, pi->ino); - kmem_cache_free(pohmelfs_inode_cache, pi); atomic_long_dec(&psb->total_inodes); + call_rcu(&inode->i_rcu, pohmelfs_i_callback); } /* diff --git a/drivers/staging/smbfs/inode.c b/drivers/staging/smbfs/inode.c index 540a984bb516..244319dc9702 100644 --- a/drivers/staging/smbfs/inode.c +++ b/drivers/staging/smbfs/inode.c @@ -62,11 +62,18 @@ static struct inode *smb_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } -static void smb_destroy_inode(struct inode *inode) +static void smb_i_callback(struct rcu_head *head) { + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } +static void smb_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, smb_i_callback); +} + static void init_once(void *foo) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; -- cgit v1.2.3 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- drivers/staging/autofs/root.c | 2 +- drivers/staging/smbfs/dir.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index 0fdec4befd84..b09adb57971f 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -237,7 +237,7 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr * * We need to do this before we release the directory semaphore. */ - dentry->d_op = &autofs_dentry_operations; + d_set_d_op(dentry, &autofs_dentry_operations); dentry->d_flags |= DCACHE_AUTOFS_PENDING; d_add(dentry, NULL); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 5f79799d5d4a..78f09412740c 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -398,9 +398,9 @@ smb_new_dentry(struct dentry *dentry) struct smb_sb_info *server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); dentry->d_time = jiffies; } @@ -462,9 +462,9 @@ smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) add_entry: server = server_from_dentry(dentry); if (server->mnt->flags & SMB_MOUNT_CASE) - dentry->d_op = &smbfs_dentry_operations_case; + d_set_d_op(dentry, &smbfs_dentry_operations_case); else - dentry->d_op = &smbfs_dentry_operations; + d_set_d_op(dentry, &smbfs_dentry_operations); d_add(dentry, inode); smb_renew_times(dentry); -- cgit v1.2.3 From 34286d6662308d82aed891852d04c7c3a2649b16 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:57 +1100 Subject: fs: rcu-walk aware d_revalidate method Require filesystems be aware of .d_revalidate being called in rcu-walk mode (nd->flags & LOOKUP_RCU). For now do a simple push down, returning -ECHILD from all implementations. Signed-off-by: Nick Piggin --- drivers/staging/autofs/root.c | 5 ++++- drivers/staging/smbfs/dir.c | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/autofs/root.c b/drivers/staging/autofs/root.c index b09adb57971f..bf0e9755da67 100644 --- a/drivers/staging/autofs/root.c +++ b/drivers/staging/autofs/root.c @@ -154,13 +154,16 @@ static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, str * yet completely filled in, and revalidate has to delay such * lookups.. */ -static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd) +static int autofs_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode * dir; struct autofs_sb_info *sbi; struct autofs_dir_ent *ent; int res; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + lock_kernel(); dir = dentry->d_parent->d_inode; sbi = autofs_sbi(dir->i_sb); diff --git a/drivers/staging/smbfs/dir.c b/drivers/staging/smbfs/dir.c index 78f09412740c..dd612f50749f 100644 --- a/drivers/staging/smbfs/dir.c +++ b/drivers/staging/smbfs/dir.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "smb_fs.h" #include "smb_mount.h" @@ -301,13 +302,20 @@ static const struct dentry_operations smbfs_dentry_operations_case = * This is the callback when the dcache has a lookup hit. */ static int -smb_lookup_validate(struct dentry * dentry, struct nameidata *nd) +smb_lookup_validate(struct dentry *dentry, struct nameidata *nd) { - struct smb_sb_info *server = server_from_dentry(dentry); - struct inode * inode = dentry->d_inode; - unsigned long age = jiffies - dentry->d_time; + struct smb_sb_info *server; + struct inode *inode; + unsigned long age; int valid; + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + server = server_from_dentry(dentry); + inode = dentry->d_inode; + age = jiffies - dentry->d_time; + /* * The default validation is based on dentry age: * we believe in dentries for a few seconds. (But each -- cgit v1.2.3 From b74c79e99389cd79b31fcc08f82c24e492e63c7e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:58 +1100 Subject: fs: provide rcu-walk aware permission i_ops Signed-off-by: Nick Piggin --- drivers/staging/smbfs/file.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/smbfs/file.c b/drivers/staging/smbfs/file.c index 5dcd19c60eb9..31372e7b12de 100644 --- a/drivers/staging/smbfs/file.c +++ b/drivers/staging/smbfs/file.c @@ -407,11 +407,14 @@ smb_file_release(struct inode *inode, struct file * file) * privileges, so we need our own check for this. */ static int -smb_file_permission(struct inode *inode, int mask) +smb_file_permission(struct inode *inode, int mask, unsigned int flags) { int mode = inode->i_mode; int error = 0; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + VERBOSE("mode=%x, mask=%x\n", mode, mask); /* Look at user permissions */ -- cgit v1.2.3 From b3e19d924b6eaf2ca7d22cba99a517c5171007b6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:11 +1100 Subject: fs: scale mntget/mntput The problem that this patch aims to fix is vfsmount refcounting scalability. We need to take a reference on the vfsmount for every successful path lookup, which often go to the same mount point. The fundamental difficulty is that a "simple" reference count can never be made scalable, because any time a reference is dropped, we must check whether that was the last reference. To do that requires communication with all other CPUs that may have taken a reference count. We can make refcounts more scalable in a couple of ways, involving keeping distributed counters, and checking for the global-zero condition less frequently. - check the global sum once every interval (this will delay zero detection for some interval, so it's probably a showstopper for vfsmounts). - keep a local count and only taking the global sum when local reaches 0 (this is difficult for vfsmounts, because we can't hold preempt off for the life of a reference, so a counter would need to be per-thread or tied strongly to a particular CPU which requires more locking). - keep a local difference of increments and decrements, which allows us to sum the total difference and hence find the refcount when summing all CPUs. Then, keep a single integer "long" refcount for slow and long lasting references, and only take the global sum of local counters when the long refcount is 0. This last scheme is what I implemented here. Attached mounts and process root and working directory references are "long" references, and everything else is a short reference. This allows scalable vfsmount references during path walking over mounted subtrees and unattached (lazy umounted) mounts with processes still running in them. This results in one fewer atomic op in the fastpath: mntget is now just a per-CPU inc, rather than an atomic inc; and mntput just requires a spinlock and non-atomic decrement in the common case. However code is otherwise bigger and heavier, so single threaded performance is basically a wash. Signed-off-by: Nick Piggin --- drivers/mtd/mtdchar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c index 4759d827e8c7..f511dd15fd31 100644 --- a/drivers/mtd/mtdchar.c +++ b/drivers/mtd/mtdchar.c @@ -1201,7 +1201,7 @@ err_unregister_chdev: static void __exit cleanup_mtdchar(void) { unregister_mtd_user(&mtdchar_notifier); - mntput(mtd_inode_mnt); + mntput_long(mtd_inode_mnt); unregister_filesystem(&mtd_inodefs_type); __unregister_chrdev(MTD_CHAR_MAJOR, 0, 1 << MINORBITS, "mtd"); } -- cgit v1.2.3