summaryrefslogtreecommitdiff
path: root/fs/namespace.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 11:28:42 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-13 11:28:42 +0200
commit77c688ac87183537ed0fb84ec2cb8fa8ec97c458 (patch)
treed18e117e05c0d71463823536165ddd9ad75b6bc5 /fs/namespace.c
parent5e40d331bd72447197f26525f21711c4a265b6a6 (diff)
parenta457606a6f81cfddfc9da1ef2a8bf2c65a8eb35e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs updates from Al Viro: "The big thing in this pile is Eric's unmount-on-rmdir series; we finally have everything we need for that. The final piece of prereqs is delayed mntput() - now filesystem shutdown always happens on shallow stack. Other than that, we have several new primitives for iov_iter (Matt Wilcox, culled from his XIP-related series) pushing the conversion to ->read_iter()/ ->write_iter() a bit more, a bunch of fs/dcache.c cleanups and fixes (including the external name refcounting, which gives consistent behaviour of d_move() wrt procfs symlinks for long and short names alike) and assorted cleanups and fixes all over the place. This is just the first pile; there's a lot of stuff from various people that ought to go in this window. Starting with unionmount/overlayfs mess... ;-/" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (60 commits) fs/file_table.c: Update alloc_file() comment vfs: Deduplicate code shared by xattr system calls operating on paths reiserfs: remove pointless forward declaration of struct nameidata don't need that forward declaration of struct nameidata in dcache.h anymore take dname_external() into fs/dcache.c let path_init() failures treated the same way as subsequent link_path_walk() fix misuses of f_count() in ppp and netlink ncpfs: use list_for_each_entry() for d_subdirs walk vfs: move getname() from callers to do_mount() gfs2_atomic_open(): skip lookups on hashed dentry [infiniband] remove pointless assignments gadgetfs: saner API for gadgetfs_create_file() f_fs: saner API for ffs_sb_create_file() jfs: don't hash direct inode [s390] remove pointless assignment of ->f_op in vmlogrdr ->open() ecryptfs: ->f_op is never NULL android: ->f_op is never NULL nouveau: __iomem misannotations missing annotation in fs/file.c fs: namespace: suppress 'may be used uninitialized' warnings ...
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c203
1 files changed, 148 insertions, 55 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index ef42d9bee212..348562f14e93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -23,6 +23,7 @@
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/bootmem.h>
+#include <linux/task_work.h>
#include "pnode.h"
#include "internal.h"
@@ -224,6 +225,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
+ INIT_HLIST_NODE(&mnt->mnt_mp_list);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
@@ -666,11 +668,45 @@ struct vfsmount *lookup_mnt(struct path *path)
return m;
}
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+/*
+ * __is_local_mountpoint - Test to see if dentry is a mountpoint in the
+ * current mount namespace.
+ *
+ * The common case is dentries are not mountpoints at all and that
+ * test is handled inline. For the slow case when we are actually
+ * dealing with a mountpoint of some kind, walk through all of the
+ * mounts in the current mount namespace and test to see if the dentry
+ * is a mountpoint.
+ *
+ * The mount_hashtable is not usable in the context because we
+ * need to identify all mounts that may be in the current mount
+ * namespace not just a mount that happens to have some specified
+ * parent mount.
+ */
+bool __is_local_mountpoint(struct dentry *dentry)
+{
+ struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ struct mount *mnt;
+ bool is_covered = false;
+
+ if (!d_mountpoint(dentry))
+ goto out;
+
+ down_read(&namespace_sem);
+ list_for_each_entry(mnt, &ns->list, mnt_list) {
+ is_covered = (mnt->mnt_mountpoint == dentry);
+ if (is_covered)
+ break;
+ }
+ up_read(&namespace_sem);
+out:
+ return is_covered;
+}
+
+static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
{
struct hlist_head *chain = mp_hash(dentry);
struct mountpoint *mp;
- int ret;
hlist_for_each_entry(mp, chain, m_hash) {
if (mp->m_dentry == dentry) {
@@ -681,6 +717,14 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
return mp;
}
}
+ return NULL;
+}
+
+static struct mountpoint *new_mountpoint(struct dentry *dentry)
+{
+ struct hlist_head *chain = mp_hash(dentry);
+ struct mountpoint *mp;
+ int ret;
mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
if (!mp)
@@ -695,6 +739,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
mp->m_dentry = dentry;
mp->m_count = 1;
hlist_add_head(&mp->m_hash, chain);
+ INIT_HLIST_HEAD(&mp->m_list);
return mp;
}
@@ -702,6 +747,7 @@ static void put_mountpoint(struct mountpoint *mp)
{
if (!--mp->m_count) {
struct dentry *dentry = mp->m_dentry;
+ BUG_ON(!hlist_empty(&mp->m_list));
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_MOUNTED;
spin_unlock(&dentry->d_lock);
@@ -748,6 +794,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(&mnt->mnt_child);
hlist_del_init_rcu(&mnt->mnt_hash);
+ hlist_del_init(&mnt->mnt_mp_list);
put_mountpoint(mnt->mnt_mp);
mnt->mnt_mp = NULL;
}
@@ -764,6 +811,7 @@ void mnt_set_mountpoint(struct mount *mnt,
child_mnt->mnt_mountpoint = dget(mp->m_dentry);
child_mnt->mnt_parent = mnt;
child_mnt->mnt_mp = mp;
+ hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}
/*
@@ -957,6 +1005,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
return ERR_PTR(err);
}
+static void cleanup_mnt(struct mount *mnt)
+{
+ /*
+ * This probably indicates that somebody messed
+ * up a mnt_want/drop_write() pair. If this
+ * happens, the filesystem was probably unable
+ * to make r/w->r/o transitions.
+ */
+ /*
+ * The locking used to deal with mnt_count decrement provides barriers,
+ * so mnt_get_writers() below is safe.
+ */
+ WARN_ON(mnt_get_writers(mnt));
+ if (unlikely(mnt->mnt_pins.first))
+ mnt_pin_kill(mnt);
+ fsnotify_vfsmount_delete(&mnt->mnt);
+ dput(mnt->mnt.mnt_root);
+ deactivate_super(mnt->mnt.mnt_sb);
+ mnt_free_id(mnt);
+ call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
+}
+
+static void __cleanup_mnt(struct rcu_head *head)
+{
+ cleanup_mnt(container_of(head, struct mount, mnt_rcu));
+}
+
+static LLIST_HEAD(delayed_mntput_list);
+static void delayed_mntput(struct work_struct *unused)
+{
+ struct llist_node *node = llist_del_all(&delayed_mntput_list);
+ struct llist_node *next;
+
+ for (; node; node = next) {
+ next = llist_next(node);
+ cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
+ }
+}
+static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
+
static void mntput_no_expire(struct mount *mnt)
{
rcu_read_lock();
@@ -982,24 +1070,18 @@ static void mntput_no_expire(struct mount *mnt)
list_del(&mnt->mnt_instance);
unlock_mount_hash();
- /*
- * This probably indicates that somebody messed
- * up a mnt_want/drop_write() pair. If this
- * happens, the filesystem was probably unable
- * to make r/w->r/o transitions.
- */
- /*
- * The locking used to deal with mnt_count decrement provides barriers,
- * so mnt_get_writers() below is safe.
- */
- WARN_ON(mnt_get_writers(mnt));
- if (unlikely(mnt->mnt_pins.first))
- mnt_pin_kill(mnt);
- fsnotify_vfsmount_delete(&mnt->mnt);
- dput(mnt->mnt.mnt_root);
- deactivate_super(mnt->mnt.mnt_sb);
- mnt_free_id(mnt);
- call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
+ if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
+ struct task_struct *task = current;
+ if (likely(!(task->flags & PF_KTHREAD))) {
+ init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
+ if (!task_work_add(task, &mnt->mnt_rcu, true))
+ return;
+ }
+ if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
+ schedule_delayed_work(&delayed_mntput_work, 1);
+ return;
+ }
+ cleanup_mnt(mnt);
}
void mntput(struct vfsmount *mnt)
@@ -1272,6 +1354,7 @@ void umount_tree(struct mount *mnt, int how)
if (how < 2)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
if (mnt_has_parent(p)) {
+ hlist_del_init(&p->mnt_mp_list);
put_mountpoint(p->mnt_mp);
mnt_add_count(p->mnt_parent, -1);
/* move the reference to mountpoint into ->mnt_ex_mountpoint */
@@ -1385,6 +1468,37 @@ static int do_umount(struct mount *mnt, int flags)
return retval;
}
+/*
+ * __detach_mounts - lazily unmount all mounts on the specified dentry
+ *
+ * During unlink, rmdir, and d_drop it is possible to loose the path
+ * to an existing mountpoint, and wind up leaking the mount.
+ * detach_mounts allows lazily unmounting those mounts instead of
+ * leaking them.
+ *
+ * The caller may hold dentry->d_inode->i_mutex.
+ */
+void __detach_mounts(struct dentry *dentry)
+{
+ struct mountpoint *mp;
+ struct mount *mnt;
+
+ namespace_lock();
+ mp = lookup_mountpoint(dentry);
+ if (!mp)
+ goto out_unlock;
+
+ lock_mount_hash();
+ while (!hlist_empty(&mp->m_list)) {
+ mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
+ umount_tree(mnt, 2);
+ }
+ unlock_mount_hash();
+ put_mountpoint(mp);
+out_unlock:
+ namespace_unlock();
+}
+
/*
* Is the caller allowed to modify his namespace?
*/
@@ -1742,7 +1856,9 @@ retry:
namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {
- struct mountpoint *mp = new_mountpoint(dentry);
+ struct mountpoint *mp = lookup_mountpoint(dentry);
+ if (!mp)
+ mp = new_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2398,21 +2514,9 @@ int copy_mount_options(const void __user * data, unsigned long *where)
return 0;
}
-int copy_mount_string(const void __user *data, char **where)
+char *copy_mount_string(const void __user *data)
{
- char *tmp;
-
- if (!data) {
- *where = NULL;
- return 0;
- }
-
- tmp = strndup_user(data, PAGE_SIZE);
- if (IS_ERR(tmp))
- return PTR_ERR(tmp);
-
- *where = tmp;
- return 0;
+ return data ? strndup_user(data, PAGE_SIZE) : NULL;
}
/*
@@ -2429,7 +2533,7 @@ int copy_mount_string(const void __user *data, char **where)
* Therefore, if this magic number is present, it carries no information
* and must be discarded.
*/
-long do_mount(const char *dev_name, const char *dir_name,
+long do_mount(const char *dev_name, const char __user *dir_name,
const char *type_page, unsigned long flags, void *data_page)
{
struct path path;
@@ -2441,15 +2545,11 @@ long do_mount(const char *dev_name, const char *dir_name,
flags &= ~MS_MGC_MSK;
/* Basic sanity checks */
-
- if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
- return -EINVAL;
-
if (data_page)
((char *)data_page)[PAGE_SIZE - 1] = 0;
/* ... and get the mountpoint */
- retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
+ retval = user_path(dir_name, &path);
if (retval)
return retval;
@@ -2674,37 +2774,30 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
{
int ret;
char *kernel_type;
- struct filename *kernel_dir;
char *kernel_dev;
unsigned long data_page;
- ret = copy_mount_string(type, &kernel_type);
- if (ret < 0)
+ kernel_type = copy_mount_string(type);
+ ret = PTR_ERR(kernel_type);
+ if (IS_ERR(kernel_type))
goto out_type;
- kernel_dir = getname(dir_name);
- if (IS_ERR(kernel_dir)) {
- ret = PTR_ERR(kernel_dir);
- goto out_dir;
- }
-
- ret = copy_mount_string(dev_name, &kernel_dev);
- if (ret < 0)
+ kernel_dev = copy_mount_string(dev_name);
+ ret = PTR_ERR(kernel_dev);
+ if (IS_ERR(kernel_dev))
goto out_dev;
ret = copy_mount_options(data, &data_page);
if (ret < 0)
goto out_data;
- ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
+ ret = do_mount(kernel_dev, dir_name, kernel_type, flags,
(void *) data_page);
free_page(data_page);
out_data:
kfree(kernel_dev);
out_dev:
- putname(kernel_dir);
-out_dir:
kfree(kernel_type);
out_type:
return ret;