From 202322e6f7cd12e82b5ff0fa92bbdf517fcf0947 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:22 -0700 Subject: [PATCH] namespace.c: fix mnt_namespace clearing This patch clears mnt_namespace on unmount. Not clearing mnt_namespace has two effects: 1) It is possible to attach a new mount to a detached mount, because check_mnt() returns true. This means, that when no other references to the detached mount remain, it still can't be freed. This causes a resource leak, and possibly un-removable modules. 2) If mnt_namespace is dereferenced (only in mark_mounts_for_expiry()) after the namspace has been freed, it can cause an Oops, memory corruption, etc. 1) has been tested before and after the patch, 2) is only speculation. Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 208c079e9fdb..a0d0ef1f1a48 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -345,6 +345,7 @@ static void umount_tree(struct vfsmount *mnt) for (p = mnt; p; p = next_mnt(p, mnt)) { list_del(&p->mnt_list); list_add(&p->mnt_list, &kill); + p->mnt_namespace = NULL; } while (!list_empty(&kill)) { @@ -1449,15 +1450,8 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { - struct vfsmount *mnt; - down_write(&namespace->sem); spin_lock(&vfsmount_lock); - - list_for_each_entry(mnt, &namespace->list, mnt_list) { - mnt->mnt_namespace = NULL; - } - umount_tree(namespace->root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem); -- cgit v1.2.3 From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry() This patch fixes a race found by Ram in mark_mounts_for_expiry() in fs/namespace.c. The bug can only be triggered with simultaneous exiting of a process having a private namespace, and expiry of a mount from within that namespace. It's practically impossible to trigger, and I haven't even tried. But still, a bug is a bug. The race happens when put_namespace() is called by another task, while mark_mounts_for_expiry() is between atomic_read() and get_namespace(). In that case get_namespace() will be called on an already dead namespace with unforeseeable results. The solution was suggested by Al Viro, with his own words: Instead of screwing with atomic_read() in there, why don't we simply do the following: a) atomic_dec_and_lock() in put_namespace() b) __put_namespace() called without dropping lock c) the first thing done by __put_namespace would be struct vfsmount *root = namespace->root; namespace->root = NULL; spin_unlock(...); .... umount_tree(root); ... d) check in mark_... would be simply namespace && namespace->root. And we are all set; no screwing around with atomic_read(), no magic at all. Dying namespace gets NULL ->root. All changes of ->root happen under spinlock. If under a spinlock we see non-NULL ->mnt_namespace, it won't be freed until we drop the lock (we will set ->mnt_namespace to NULL under that lock before we get to freeing namespace). If under a spinlock we see non-NULL ->mnt_namespace and ->mnt_namespace->root, we can grab a reference to namespace and be sure that it won't go away. Signed-off-by: Miklos Szeredi Acked-by: Al Viro Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index a0d0ef1f1a48..9d17541ebafa 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -869,7 +869,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ namespace = mnt->mnt_namespace; - if (!namespace || atomic_read(&namespace->count) <= 0) + if (!namespace || !namespace->root) continue; get_namespace(namespace); @@ -1450,9 +1450,12 @@ void __init mnt_init(unsigned long mempages) void __put_namespace(struct namespace *namespace) { + struct vfsmount *root = namespace->root; + namespace->root = NULL; + spin_unlock(&vfsmount_lock); down_write(&namespace->sem); spin_lock(&vfsmount_lock); - umount_tree(namespace->root); + umount_tree(root); spin_unlock(&vfsmount_lock); up_write(&namespace->sem); kfree(namespace); -- cgit v1.2.3 From a4d70278610e6bebe44a7b59a469fe7391387da6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:24 -0700 Subject: [PATCH] namespace.c: cleanup in mark_mounts_for_expiry() This patch simplifies mark_mounts_for_expiry() by using detach_mnt() instead of duplicating everything it does. It should be an equivalent transformation except for righting the dput/mntput order. Al Viro said: "Looks sane". Signed-off-by: Miklos Szeredi Cc: Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 9d17541ebafa..ea555a36c314 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -880,24 +880,13 @@ void mark_mounts_for_expiry(struct list_head *mounts) /* check that it is still dead: the count should now be 2 - as * contributed by the vfsmount parent and the mntget above */ if (atomic_read(&mnt->mnt_count) == 2) { - struct vfsmount *xdmnt; - struct dentry *xdentry; + struct nameidata old_nd; /* delete from the namespace */ list_del_init(&mnt->mnt_list); - list_del_init(&mnt->mnt_child); - list_del_init(&mnt->mnt_hash); - mnt->mnt_mountpoint->d_mounted--; - - xdentry = mnt->mnt_mountpoint; - mnt->mnt_mountpoint = mnt->mnt_root; - xdmnt = mnt->mnt_parent; - mnt->mnt_parent = mnt; - + detach_mnt(mnt, &old_nd); spin_unlock(&vfsmount_lock); - - mntput(xdmnt); - dput(xdentry); + path_release(&old_nd); /* now lay it to rest if this was the last ref on the * superblock */ -- cgit v1.2.3 From 24ca2af1e7cff55e71e9f86c61ddc56e894b8b40 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:25 -0700 Subject: [PATCH] namespace.c: split mark_mounts_for_expiry() This patch splits the mark_mounts_for_expiry() function. It's too complex and too deeply nested, even without the bugfix in the following patch. Otherwise code is completely the same. Signed-off-by: Miklos Szeredi Cc: Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 71 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 39 insertions(+), 32 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index ea555a36c314..d82cf18a1a94 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -825,6 +825,44 @@ unlock: EXPORT_SYMBOL_GPL(do_add_mount); +static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) +{ + spin_lock(&vfsmount_lock); + + /* + * Check that it is still dead: the count should now be 2 - as + * contributed by the vfsmount parent and the mntget above + */ + if (atomic_read(&mnt->mnt_count) == 2) { + struct nameidata old_nd; + + /* delete from the namespace */ + list_del_init(&mnt->mnt_list); + detach_mnt(mnt, &old_nd); + spin_unlock(&vfsmount_lock); + path_release(&old_nd); + + /* + * Now lay it to rest if this was the last ref on the superblock + */ + if (atomic_read(&mnt->mnt_sb->s_active) == 1) { + /* last instance - try to be smart */ + lock_kernel(); + DQUOT_OFF(mnt->mnt_sb); + acct_auto_close(mnt->mnt_sb); + unlock_kernel(); + } + mntput(mnt); + } else { + /* + * Someone brought it back to life whilst we didn't have any + * locks held so return it to the expiration list + */ + list_add_tail(&mnt->mnt_fslink, mounts); + spin_unlock(&vfsmount_lock); + } +} + /* * process a list of expirable mountpoints with the intent of discarding any * mountpoints that aren't in use and haven't been touched since last we came @@ -875,38 +913,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) spin_unlock(&vfsmount_lock); down_write(&namespace->sem); - spin_lock(&vfsmount_lock); - - /* check that it is still dead: the count should now be 2 - as - * contributed by the vfsmount parent and the mntget above */ - if (atomic_read(&mnt->mnt_count) == 2) { - struct nameidata old_nd; - - /* delete from the namespace */ - list_del_init(&mnt->mnt_list); - detach_mnt(mnt, &old_nd); - spin_unlock(&vfsmount_lock); - path_release(&old_nd); - - /* now lay it to rest if this was the last ref on the - * superblock */ - if (atomic_read(&mnt->mnt_sb->s_active) == 1) { - /* last instance - try to be smart */ - lock_kernel(); - DQUOT_OFF(mnt->mnt_sb); - acct_auto_close(mnt->mnt_sb); - unlock_kernel(); - } - - mntput(mnt); - } else { - /* someone brought it back to life whilst we didn't - * have any locks held so return it to the expiration - * list */ - list_add_tail(&mnt->mnt_fslink, mounts); - spin_unlock(&vfsmount_lock); - } - + expire_mount(mnt, mounts); up_write(&namespace->sem); mntput(mnt); -- cgit v1.2.3 From ed42c879b7b1463aa7a15fdbbeb2b1914d60be8a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:26 -0700 Subject: [PATCH] namespace.c: fix expiring of detached mount This patch fixes a bug noticed by Al Viro: However, we still have a problem here - just what would happen if vfsmount is detached while we were grabbing namespace semaphore? Refcount alone is not useful here - we might be held by whoever had detached the vfsmount. IOW, we should check that it's still attached (i.e. that mnt->mnt_parent != mnt). If it's not - just leave it alone, do mntput() and let whoever holds it deal with the sucker. No need to put it back on lists. Signed-off-by: Miklos Szeredi Cc: Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index d82cf18a1a94..2b4635e43ae8 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -829,6 +829,15 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) { spin_lock(&vfsmount_lock); + /* + * Check if mount is still attached, if not, let whoever holds it deal + * with the sucker + */ + if (mnt->mnt_parent == mnt) { + spin_unlock(&vfsmount_lock); + return; + } + /* * Check that it is still dead: the count should now be 2 - as * contributed by the vfsmount parent and the mntget above -- cgit v1.2.3 From ac0811538b40bb92d339d22364026ed91dfdd147 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:27 -0700 Subject: [PATCH] namespace.c: fix mnt_namespace zeroing for expired mounts This patch clears mnt_namespace in an expired mount. If mnt_namespace is not cleared, it's possible to attach a new mount to the already detached mount, because check_mnt() can return true. The effect is a resource leak, since the resulting tree will never be freed. Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 2b4635e43ae8..7fd56eeb21bf 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -847,6 +847,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) /* delete from the namespace */ list_del_init(&mnt->mnt_list); + mnt->mnt_namespace = NULL; detach_mnt(mnt, &old_nd); spin_unlock(&vfsmount_lock); path_release(&old_nd); -- cgit v1.2.3 From 484e389c63472a7f8cfb491cf11b047364e59365 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:28 -0700 Subject: [PATCH] set mnt_namespace in the correct place This patch sets ->mnt_namespace where it's actually added to the namespace. Previously mnt_namespace was set in do_kern_mount() even if the filesystem was never added to any process's namespace (most kernel-internal filesystems). This discrepancy doesn't actually cause any problems, but it's cleaner if mnt_namespace is NULL for these non exported filesystems. Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 7fd56eeb21bf..b168dc37eaab 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -808,6 +808,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, goto unlock; newmnt->mnt_flags = mnt_flags; + newmnt->mnt_namespace = current->namespace; err = graft_tree(newmnt, nd); if (err == 0 && fslist) { -- cgit v1.2.3 From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 7 Jul 2005 17:57:30 -0700 Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire This patch renames vfsmount->mnt_fslink to something a little more descriptive: vfsmount->mnt_expire. Signed-off-by: Mike Waychison Signed-off-by: Miklos Szeredi Acked-by: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index b168dc37eaab..587eb0d707ee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); - INIT_LIST_HEAD(&mnt->mnt_fslink); + INIT_LIST_HEAD(&mnt->mnt_expire); if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -165,8 +165,8 @@ clone_mnt(struct vfsmount *old, struct dentry *root) /* stick the duplicate mount on the same expiry list * as the original if that was on one */ spin_lock(&vfsmount_lock); - if (!list_empty(&old->mnt_fslink)) - list_add(&mnt->mnt_fslink, &old->mnt_fslink); + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); spin_unlock(&vfsmount_lock); } return mnt; @@ -351,7 +351,7 @@ static void umount_tree(struct vfsmount *mnt) while (!list_empty(&kill)) { mnt = list_entry(kill.next, struct vfsmount, mnt_list); list_del_init(&mnt->mnt_list); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); if (mnt->mnt_parent == mnt) { spin_unlock(&vfsmount_lock); } else { @@ -645,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse) if (mnt) { /* stop bind mounts from expiring */ spin_lock(&vfsmount_lock); - list_del_init(&mnt->mnt_fslink); + list_del_init(&mnt->mnt_expire); spin_unlock(&vfsmount_lock); err = graft_tree(mnt, nd); @@ -744,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name) /* if the mount is moved, it should no longer be expire * automatically */ - list_del_init(&old_nd.mnt->mnt_fslink); + list_del_init(&old_nd.mnt->mnt_expire); out2: spin_unlock(&vfsmount_lock); out1: @@ -814,7 +814,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, if (err == 0 && fslist) { /* add to the specified expiration list */ spin_lock(&vfsmount_lock); - list_add_tail(&newmnt->mnt_fslink, fslist); + list_add_tail(&newmnt->mnt_expire, fslist); spin_unlock(&vfsmount_lock); } @@ -869,7 +869,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts) * Someone brought it back to life whilst we didn't have any * locks held so return it to the expiration list */ - list_add_tail(&mnt->mnt_fslink, mounts); + list_add_tail(&mnt->mnt_expire, mounts); spin_unlock(&vfsmount_lock); } } @@ -896,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - still marked for expiry (marked on the last call here; marks are * cleared by mntput()) */ - list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) { + list_for_each_entry_safe(mnt, next, mounts, mnt_expire) { if (!xchg(&mnt->mnt_expiry_mark, 1) || atomic_read(&mnt->mnt_count) != 1) continue; mntget(mnt); - list_move(&mnt->mnt_fslink, &graveyard); + list_move(&mnt->mnt_expire, &graveyard); } /* @@ -912,8 +912,8 @@ void mark_mounts_for_expiry(struct list_head *mounts) * - dispose of the corpse */ while (!list_empty(&graveyard)) { - mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink); - list_del_init(&mnt->mnt_fslink); + mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire); + list_del_init(&mnt->mnt_expire); /* don't do anything if the namespace is dead - all the * vfsmounts from it are going away anyway */ -- cgit v1.2.3