From 202322e6f7cd12e82b5ff0fa92bbdf517fcf0947 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:22 -0700
Subject: [PATCH] namespace.c: fix mnt_namespace clearing

This patch clears mnt_namespace on unmount.

Not clearing mnt_namespace has two effects:

   1) It is possible to attach a new mount to a detached mount,
      because check_mnt() returns true.

      This means, that when no other references to the detached mount
      remain, it still can't be freed.  This causes a resource leak,
      and possibly un-removable modules.

   2) If mnt_namespace is dereferenced (only in mark_mounts_for_expiry())
      after the namspace has been freed, it can cause an Oops, memory
      corruption, etc.

1) has been tested before and after the patch, 2) is only speculation.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 208c079e9fdb..a0d0ef1f1a48 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -345,6 +345,7 @@ static void umount_tree(struct vfsmount *mnt)
 	for (p = mnt; p; p = next_mnt(p, mnt)) {
 		list_del(&p->mnt_list);
 		list_add(&p->mnt_list, &kill);
+		p->mnt_namespace = NULL;
 	}
 
 	while (!list_empty(&kill)) {
@@ -1449,15 +1450,8 @@ void __init mnt_init(unsigned long mempages)
 
 void __put_namespace(struct namespace *namespace)
 {
-	struct vfsmount *mnt;
-
 	down_write(&namespace->sem);
 	spin_lock(&vfsmount_lock);
-
-	list_for_each_entry(mnt, &namespace->list, mnt_list) {
-		mnt->mnt_namespace = NULL;
-	}
-
 	umount_tree(namespace->root);
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace->sem);
-- 
cgit v1.2.3


From 1ce88cf466f7b6078b14d67d186a3d7c19dd5609 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:24 -0700
Subject: [PATCH] namespace.c: fix race in mark_mounts_for_expiry()

This patch fixes a race found by Ram in mark_mounts_for_expiry() in
fs/namespace.c.

The bug can only be triggered with simultaneous exiting of a process having
a private namespace, and expiry of a mount from within that namespace.
It's practically impossible to trigger, and I haven't even tried.  But
still, a bug is a bug.

The race happens when put_namespace() is called by another task, while
mark_mounts_for_expiry() is between atomic_read() and get_namespace().  In
that case get_namespace() will be called on an already dead namespace with
unforeseeable results.

The solution was suggested by Al Viro, with his own words:

      Instead of screwing with atomic_read() in there, why don't we
      simply do the following:
      	a) atomic_dec_and_lock() in put_namespace()
      	b) __put_namespace() called without dropping lock
      	c) the first thing done by __put_namespace would be
      struct vfsmount *root = namespace->root;
      namespace->root = NULL;
      spin_unlock(...);
      ....
      umount_tree(root);
      ...
      	d) check in mark_... would be simply namespace && namespace->root.

      And we are all set; no screwing around with atomic_read(), no magic
      at all.  Dying namespace gets NULL ->root.
      All changes of ->root happen under spinlock.
      If under a spinlock we see non-NULL ->mnt_namespace, it won't be
      freed until we drop the lock (we will set ->mnt_namespace to NULL
      under that lock before we get to freeing namespace).
      If under a spinlock we see non-NULL ->mnt_namespace and
      ->mnt_namespace->root, we can grab a reference to namespace and be
      sure that it won't go away.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index a0d0ef1f1a48..9d17541ebafa 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -869,7 +869,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 		/* don't do anything if the namespace is dead - all the
 		 * vfsmounts from it are going away anyway */
 		namespace = mnt->mnt_namespace;
-		if (!namespace || atomic_read(&namespace->count) <= 0)
+		if (!namespace || !namespace->root)
 			continue;
 		get_namespace(namespace);
 
@@ -1450,9 +1450,12 @@ void __init mnt_init(unsigned long mempages)
 
 void __put_namespace(struct namespace *namespace)
 {
+	struct vfsmount *root = namespace->root;
+	namespace->root = NULL;
+	spin_unlock(&vfsmount_lock);
 	down_write(&namespace->sem);
 	spin_lock(&vfsmount_lock);
-	umount_tree(namespace->root);
+	umount_tree(root);
 	spin_unlock(&vfsmount_lock);
 	up_write(&namespace->sem);
 	kfree(namespace);
-- 
cgit v1.2.3


From a4d70278610e6bebe44a7b59a469fe7391387da6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:24 -0700
Subject: [PATCH] namespace.c: cleanup in mark_mounts_for_expiry()

This patch simplifies mark_mounts_for_expiry() by using detach_mnt() instead
of duplicating everything it does.

It should be an equivalent transformation except for righting the dput/mntput
order.

Al Viro said: "Looks sane".

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 9d17541ebafa..ea555a36c314 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -880,24 +880,13 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 		/* check that it is still dead: the count should now be 2 - as
 		 * contributed by the vfsmount parent and the mntget above */
 		if (atomic_read(&mnt->mnt_count) == 2) {
-			struct vfsmount *xdmnt;
-			struct dentry *xdentry;
+			struct nameidata old_nd;
 
 			/* delete from the namespace */
 			list_del_init(&mnt->mnt_list);
-			list_del_init(&mnt->mnt_child);
-			list_del_init(&mnt->mnt_hash);
-			mnt->mnt_mountpoint->d_mounted--;
-
-			xdentry = mnt->mnt_mountpoint;
-			mnt->mnt_mountpoint = mnt->mnt_root;
-			xdmnt = mnt->mnt_parent;
-			mnt->mnt_parent = mnt;
-
+			detach_mnt(mnt, &old_nd);
 			spin_unlock(&vfsmount_lock);
-
-			mntput(xdmnt);
-			dput(xdentry);
+			path_release(&old_nd);
 
 			/* now lay it to rest if this was the last ref on the
 			 * superblock */
-- 
cgit v1.2.3


From 24ca2af1e7cff55e71e9f86c61ddc56e894b8b40 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:25 -0700
Subject: [PATCH] namespace.c: split mark_mounts_for_expiry()

This patch splits the mark_mounts_for_expiry() function.  It's too complex and
too deeply nested, even without the bugfix in the following patch.

Otherwise code is completely the same.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 71 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 39 insertions(+), 32 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index ea555a36c314..d82cf18a1a94 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -825,6 +825,44 @@ unlock:
 
 EXPORT_SYMBOL_GPL(do_add_mount);
 
+static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
+{
+	spin_lock(&vfsmount_lock);
+
+	/*
+	 * Check that it is still dead: the count should now be 2 - as
+	 * contributed by the vfsmount parent and the mntget above
+	 */
+	if (atomic_read(&mnt->mnt_count) == 2) {
+		struct nameidata old_nd;
+
+		/* delete from the namespace */
+		list_del_init(&mnt->mnt_list);
+		detach_mnt(mnt, &old_nd);
+		spin_unlock(&vfsmount_lock);
+		path_release(&old_nd);
+
+		/*
+		 * Now lay it to rest if this was the last ref on the superblock
+		 */
+		if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
+			/* last instance - try to be smart */
+			lock_kernel();
+			DQUOT_OFF(mnt->mnt_sb);
+			acct_auto_close(mnt->mnt_sb);
+			unlock_kernel();
+		}
+		mntput(mnt);
+	} else {
+		/*
+		 * Someone brought it back to life whilst we didn't have any
+		 * locks held so return it to the expiration list
+		 */
+		list_add_tail(&mnt->mnt_fslink, mounts);
+		spin_unlock(&vfsmount_lock);
+	}
+}
+
 /*
  * process a list of expirable mountpoints with the intent of discarding any
  * mountpoints that aren't in use and haven't been touched since last we came
@@ -875,38 +913,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 
 		spin_unlock(&vfsmount_lock);
 		down_write(&namespace->sem);
-		spin_lock(&vfsmount_lock);
-
-		/* check that it is still dead: the count should now be 2 - as
-		 * contributed by the vfsmount parent and the mntget above */
-		if (atomic_read(&mnt->mnt_count) == 2) {
-			struct nameidata old_nd;
-
-			/* delete from the namespace */
-			list_del_init(&mnt->mnt_list);
-			detach_mnt(mnt, &old_nd);
-			spin_unlock(&vfsmount_lock);
-			path_release(&old_nd);
-
-			/* now lay it to rest if this was the last ref on the
-			 * superblock */
-			if (atomic_read(&mnt->mnt_sb->s_active) == 1) {
-				/* last instance - try to be smart */
-				lock_kernel();
-				DQUOT_OFF(mnt->mnt_sb);
-				acct_auto_close(mnt->mnt_sb);
-				unlock_kernel();
-			}
-
-			mntput(mnt);
-		} else {
-			/* someone brought it back to life whilst we didn't
-			 * have any locks held so return it to the expiration
-			 * list */
-			list_add_tail(&mnt->mnt_fslink, mounts);
-			spin_unlock(&vfsmount_lock);
-		}
-
+		expire_mount(mnt, mounts);
 		up_write(&namespace->sem);
 
 		mntput(mnt);
-- 
cgit v1.2.3


From ed42c879b7b1463aa7a15fdbbeb2b1914d60be8a Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:26 -0700
Subject: [PATCH] namespace.c: fix expiring of detached mount

This patch fixes a bug noticed by Al Viro:

   However, we still have a problem here - just what would
   happen if vfsmount is detached while we were grabbing namespace
   semaphore?  Refcount alone is not useful here - we might be held by
   whoever had detached the vfsmount.  IOW, we should check that it's
   still attached (i.e. that mnt->mnt_parent != mnt).  If it's not -
   just leave it alone, do mntput() and let whoever holds it deal with
   the sucker.  No need to put it back on lists.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Cc: <viro@parcelfarce.linux.theplanet.co.uk>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index d82cf18a1a94..2b4635e43ae8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -829,6 +829,15 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
 {
 	spin_lock(&vfsmount_lock);
 
+	/*
+	 * Check if mount is still attached, if not, let whoever holds it deal
+	 * with the sucker
+	 */
+	if (mnt->mnt_parent == mnt) {
+		spin_unlock(&vfsmount_lock);
+		return;
+	}
+
 	/*
 	 * Check that it is still dead: the count should now be 2 - as
 	 * contributed by the vfsmount parent and the mntget above
-- 
cgit v1.2.3


From ac0811538b40bb92d339d22364026ed91dfdd147 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:27 -0700
Subject: [PATCH] namespace.c: fix mnt_namespace zeroing for expired mounts

This patch clears mnt_namespace in an expired mount.

If mnt_namespace is not cleared, it's possible to attach a new mount to the
already detached mount, because check_mnt() can return true.

The effect is a resource leak, since the resulting tree will never be
freed.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 2b4635e43ae8..7fd56eeb21bf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -847,6 +847,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
 
 		/* delete from the namespace */
 		list_del_init(&mnt->mnt_list);
+		mnt->mnt_namespace = NULL;
 		detach_mnt(mnt, &old_nd);
 		spin_unlock(&vfsmount_lock);
 		path_release(&old_nd);
-- 
cgit v1.2.3


From 484e389c63472a7f8cfb491cf11b047364e59365 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:28 -0700
Subject: [PATCH] set mnt_namespace in the correct place

This patch sets ->mnt_namespace where it's actually added to the
namespace.

Previously mnt_namespace was set in do_kern_mount() even if the filesystem
was never added to any process's namespace (most kernel-internal
filesystems).

This discrepancy doesn't actually cause any problems, but it's cleaner if
mnt_namespace is NULL for these non exported filesystems.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7fd56eeb21bf..b168dc37eaab 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -808,6 +808,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 		goto unlock;
 
 	newmnt->mnt_flags = mnt_flags;
+	newmnt->mnt_namespace = current->namespace;
 	err = graft_tree(newmnt, nd);
 
 	if (err == 0 && fslist) {
-- 
cgit v1.2.3


From 55e700b924f9e0ba24e3a071d1097d050b05abe6 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 7 Jul 2005 17:57:30 -0700
Subject: [PATCH] namespace: rename mnt_fslink to mnt_expire

This patch renames vfsmount->mnt_fslink to something a little more
descriptive: vfsmount->mnt_expire.

Signed-off-by: Mike Waychison <michael.waychison@sun.com>
Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/namespace.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'fs/namespace.c')

diff --git a/fs/namespace.c b/fs/namespace.c
index b168dc37eaab..587eb0d707ee 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -61,7 +61,7 @@ struct vfsmount *alloc_vfsmnt(const char *name)
 		INIT_LIST_HEAD(&mnt->mnt_child);
 		INIT_LIST_HEAD(&mnt->mnt_mounts);
 		INIT_LIST_HEAD(&mnt->mnt_list);
-		INIT_LIST_HEAD(&mnt->mnt_fslink);
+		INIT_LIST_HEAD(&mnt->mnt_expire);
 		if (name) {
 			int size = strlen(name)+1;
 			char *newname = kmalloc(size, GFP_KERNEL);
@@ -165,8 +165,8 @@ clone_mnt(struct vfsmount *old, struct dentry *root)
 		/* stick the duplicate mount on the same expiry list
 		 * as the original if that was on one */
 		spin_lock(&vfsmount_lock);
-		if (!list_empty(&old->mnt_fslink))
-			list_add(&mnt->mnt_fslink, &old->mnt_fslink);
+		if (!list_empty(&old->mnt_expire))
+			list_add(&mnt->mnt_expire, &old->mnt_expire);
 		spin_unlock(&vfsmount_lock);
 	}
 	return mnt;
@@ -351,7 +351,7 @@ static void umount_tree(struct vfsmount *mnt)
 	while (!list_empty(&kill)) {
 		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
 		list_del_init(&mnt->mnt_list);
-		list_del_init(&mnt->mnt_fslink);
+		list_del_init(&mnt->mnt_expire);
 		if (mnt->mnt_parent == mnt) {
 			spin_unlock(&vfsmount_lock);
 		} else {
@@ -645,7 +645,7 @@ static int do_loopback(struct nameidata *nd, char *old_name, int recurse)
 	if (mnt) {
 		/* stop bind mounts from expiring */
 		spin_lock(&vfsmount_lock);
-		list_del_init(&mnt->mnt_fslink);
+		list_del_init(&mnt->mnt_expire);
 		spin_unlock(&vfsmount_lock);
 
 		err = graft_tree(mnt, nd);
@@ -744,7 +744,7 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
 
 	/* if the mount is moved, it should no longer be expire
 	 * automatically */
-	list_del_init(&old_nd.mnt->mnt_fslink);
+	list_del_init(&old_nd.mnt->mnt_expire);
 out2:
 	spin_unlock(&vfsmount_lock);
 out1:
@@ -814,7 +814,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
 	if (err == 0 && fslist) {
 		/* add to the specified expiration list */
 		spin_lock(&vfsmount_lock);
-		list_add_tail(&newmnt->mnt_fslink, fslist);
+		list_add_tail(&newmnt->mnt_expire, fslist);
 		spin_unlock(&vfsmount_lock);
 	}
 
@@ -869,7 +869,7 @@ static void expire_mount(struct vfsmount *mnt, struct list_head *mounts)
 		 * Someone brought it back to life whilst we didn't have any
 		 * locks held so return it to the expiration list
 		 */
-		list_add_tail(&mnt->mnt_fslink, mounts);
+		list_add_tail(&mnt->mnt_expire, mounts);
 		spin_unlock(&vfsmount_lock);
 	}
 }
@@ -896,13 +896,13 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	 * - still marked for expiry (marked on the last call here; marks are
 	 *   cleared by mntput())
 	 */
-	list_for_each_entry_safe(mnt, next, mounts, mnt_fslink) {
+	list_for_each_entry_safe(mnt, next, mounts, mnt_expire) {
 		if (!xchg(&mnt->mnt_expiry_mark, 1) ||
 		    atomic_read(&mnt->mnt_count) != 1)
 			continue;
 
 		mntget(mnt);
-		list_move(&mnt->mnt_fslink, &graveyard);
+		list_move(&mnt->mnt_expire, &graveyard);
 	}
 
 	/*
@@ -912,8 +912,8 @@ void mark_mounts_for_expiry(struct list_head *mounts)
 	 * - dispose of the corpse
 	 */
 	while (!list_empty(&graveyard)) {
-		mnt = list_entry(graveyard.next, struct vfsmount, mnt_fslink);
-		list_del_init(&mnt->mnt_fslink);
+		mnt = list_entry(graveyard.next, struct vfsmount, mnt_expire);
+		list_del_init(&mnt->mnt_expire);
 
 		/* don't do anything if the namespace is dead - all the
 		 * vfsmounts from it are going away anyway */
-- 
cgit v1.2.3