diff options
Diffstat (limited to 'fs/namespace.c')
-rw-r--r-- | fs/namespace.c | 135 |
1 files changed, 87 insertions, 48 deletions
diff --git a/fs/namespace.c b/fs/namespace.c index 500202ce10db..ac2ce8a766e1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(fs_kobj); * It should be taken for write in all cases where the vfsmount * tree or hash is modified or when a vfsmount structure is modified. */ -DEFINE_BRLOCK(vfsmount_lock); +__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { @@ -547,16 +547,38 @@ static void free_vfsmnt(struct mount *mnt) kmem_cache_free(mnt_cache, mnt); } +/* call under rcu_read_lock */ +bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) +{ + struct mount *mnt; + if (read_seqretry(&mount_lock, seq)) + return false; + if (bastard == NULL) + return true; + mnt = real_mount(bastard); + mnt_add_count(mnt, 1); + if (likely(!read_seqretry(&mount_lock, seq))) + return true; + if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { + mnt_add_count(mnt, -1); + return false; + } + rcu_read_unlock(); + mntput(bastard); + rcu_read_lock(); + return false; +} + /* * find the first mount at @dentry on vfsmount @mnt. - * vfsmount_lock must be held for read or write. + * call under rcu_read_lock() */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { struct list_head *head = mount_hashtable + hash(mnt, dentry); struct mount *p; - list_for_each_entry(p, head, mnt_hash) + list_for_each_entry_rcu(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; @@ -564,7 +586,7 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) /* * find the last mount at @dentry on vfsmount @mnt. - * vfsmount_lock must be held for read or write. + * mount_lock must be held. */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { @@ -596,17 +618,17 @@ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; + struct vfsmount *m; + unsigned seq; - br_read_lock(&vfsmount_lock); - child_mnt = __lookup_mnt(path->mnt, path->dentry); - if (child_mnt) { - mnt_add_count(child_mnt, 1); - br_read_unlock(&vfsmount_lock); - return &child_mnt->mnt; - } else { - br_read_unlock(&vfsmount_lock); - return NULL; - } + rcu_read_lock(); + do { + seq = read_seqbegin(&mount_lock); + child_mnt = __lookup_mnt(path->mnt, path->dentry); + m = child_mnt ? &child_mnt->mnt : NULL; + } while (!legitimize_mnt(m, seq)); + rcu_read_unlock(); + return m; } static struct mountpoint *new_mountpoint(struct dentry *dentry) @@ -874,38 +896,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } +static void delayed_free(struct rcu_head *head) +{ + struct mount *mnt = container_of(head, struct mount, mnt_rcu); + kfree(mnt->mnt_devname); +#ifdef CONFIG_SMP + free_percpu(mnt->mnt_pcp); +#endif + kmem_cache_free(mnt_cache, mnt); +} + static void mntput_no_expire(struct mount *mnt) { put_again: -#ifdef CONFIG_SMP - br_read_lock(&vfsmount_lock); - if (likely(mnt->mnt_ns)) { - /* shouldn't be the last one */ - mnt_add_count(mnt, -1); - br_read_unlock(&vfsmount_lock); + rcu_read_lock(); + mnt_add_count(mnt, -1); + if (likely(mnt->mnt_ns)) { /* shouldn't be the last one */ + rcu_read_unlock(); return; } - br_read_unlock(&vfsmount_lock); - lock_mount_hash(); - mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { + rcu_read_unlock(); unlock_mount_hash(); return; } -#else - mnt_add_count(mnt, -1); - if (likely(mnt_get_count(mnt))) - return; - lock_mount_hash(); -#endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; + rcu_read_unlock(); unlock_mount_hash(); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + if (unlikely(mnt->mnt.mnt_flags & MNT_DOOMED)) { + rcu_read_unlock(); + unlock_mount_hash(); + return; + } + mnt->mnt.mnt_flags |= MNT_DOOMED; + rcu_read_unlock(); list_del(&mnt->mnt_instance); unlock_mount_hash(); @@ -924,7 +954,8 @@ put_again: fsnotify_vfsmount_delete(&mnt->mnt); dput(mnt->mnt.mnt_root); deactivate_super(mnt->mnt.mnt_sb); - free_vfsmnt(mnt); + mnt_free_id(mnt); + call_rcu(&mnt->mnt_rcu, delayed_free); } void mntput(struct vfsmount *mnt) @@ -1137,6 +1168,8 @@ static void namespace_unlock(void) list_splice_init(&unmounted, &head); up_write(&namespace_sem); + synchronize_rcu(); + while (!list_empty(&head)) { mnt = list_first_entry(&head, struct mount, mnt_hash); list_del_init(&mnt->mnt_hash); @@ -1152,10 +1185,13 @@ static inline void namespace_lock(void) } /* - * vfsmount lock must be held for write + * mount_lock must be held * namespace_sem must be held for write + * how = 0 => just this tree, don't propagate + * how = 1 => propagate; we know that nobody else has reference to any victims + * how = 2 => lazy umount */ -void umount_tree(struct mount *mnt, int propagate) +void umount_tree(struct mount *mnt, int how) { LIST_HEAD(tmp_list); struct mount *p; @@ -1163,7 +1199,7 @@ void umount_tree(struct mount *mnt, int propagate) for (p = mnt; p; p = next_mnt(p, mnt)) list_move(&p->mnt_hash, &tmp_list); - if (propagate) + if (how) propagate_umount(&tmp_list); list_for_each_entry(p, &tmp_list, mnt_hash) { @@ -1171,6 +1207,8 @@ void umount_tree(struct mount *mnt, int propagate) list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); p->mnt_ns = NULL; + if (how < 2) + p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { put_mountpoint(p->mnt_mp); @@ -1262,14 +1300,18 @@ static int do_umount(struct mount *mnt, int flags) lock_mount_hash(); event++; - if (!(flags & MNT_DETACH)) - shrink_submounts(mnt); - - retval = -EBUSY; - if (flags & MNT_DETACH || !propagate_mount_busy(mnt, 2)) { + if (flags & MNT_DETACH) { if (!list_empty(&mnt->mnt_list)) - umount_tree(mnt, 1); + umount_tree(mnt, 2); retval = 0; + } else { + shrink_submounts(mnt); + retval = -EBUSY; + if (!propagate_mount_busy(mnt, 2)) { + if (!list_empty(&mnt->mnt_list)) + umount_tree(mnt, 1); + retval = 0; + } } unlock_mount_hash(); namespace_unlock(); @@ -1955,7 +1997,7 @@ static int do_add_mount(struct mount *newmnt, struct path *path, int mnt_flags) struct mount *parent; int err; - mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL); + mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | MNT_DOOMED | MNT_SYNC_UMOUNT); mp = lock_mount(path); if (IS_ERR(mp)) @@ -2172,7 +2214,7 @@ resume: * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint * - * vfsmount_lock must be held for write + * mount_lock must be held for write */ static void shrink_submounts(struct mount *mnt) { @@ -2558,7 +2600,7 @@ out_type: /* * Return true if path is reachable from root * - * namespace_sem or vfsmount_lock is held + * namespace_sem or mount_lock is held */ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, const struct path *root) @@ -2573,9 +2615,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(&vfsmount_lock); + read_seqlock_excl(&mount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(&vfsmount_lock); + read_sequnlock_excl(&mount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2748,8 +2790,6 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mountpoint_hashtable[u]); - br_lock_init(&vfsmount_lock); - err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2788,9 +2828,8 @@ void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { - lock_mount_hash(); real_mount(mnt)->mnt_ns = NULL; - unlock_mount_hash(); + synchronize_rcu(); /* yecchhh... */ mntput(mnt); } } |