From 5ff9d8a65ce80efb509ce4e8051394e9ed2cd942 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 29 Mar 2013 21:04:39 -0700
Subject: vfs: Lock in place mounts from more privileged users

When creating a less privileged mount namespace or propogating mounts
from a more privileged to a less privileged mount namespace lock the
submounts so they may not be unmounted individually in the child mount
namespace revealing what is under them.

This enforces the reasonable expectation that it is not possible to
see under a mount point.  Most of the time mounts are on empty
directories and revealing that does not matter, however I have seen an
occassionaly sloppy configuration where there were interesting things
concealed under a mount point that probably should not be revealed.

Expirable submounts are not locked because they will eventually
unmount automatically so whatever is under them already needs
to be safe for unprivileged users to access.

From a practical standpoint these restrictions do not appear to be
significant for unprivileged users of the mount namespace.  Recursive
bind mounts and pivot_root continues to work, and mounts that are
created in a mount namespace may be unmounted there.  All of which
means that the common idiom of keeping a directory of interesting
files and using pivot_root to throw everything else away continues to
work just fine.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/mount.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mount.h b/include/linux/mount.h
index 73005f9957ea..38cd98f112a0 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -48,6 +48,7 @@ struct mnt_namespace;
 #define MNT_INTERNAL	0x4000
 
 #define MNT_LOCK_READONLY	0x400000
+#define MNT_LOCKED		0x800000
 
 struct vfsmount {
 	struct dentry *mnt_root;	/* root of the mounted tree */
-- 
cgit v1.2.3


From e51db73532955dc5eaba4235e62b74b460709d5b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 30 Mar 2013 19:57:41 -0700
Subject: userns: Better restrictions on when proc and sysfs can be mounted

Rely on the fact that another flavor of the filesystem is already
mounted and do not rely on state in the user namespace.

Verify that the mounted filesystem is not covered in any significant
way.  I would love to verify that the previously mounted filesystem
has no mounts on top but there are at least the directories
/proc/sys/fs/binfmt_misc and /sys/fs/cgroup/ that exist explicitly
for other filesystems to mount on top of.

Refactor the test into a function named fs_fully_visible and call that
function from the mount routines of proc and sysfs.  This makes this
test local to the filesystems involved and the results current of when
the mounts take place, removing a weird threading of the user
namespace, the mount namespace and the filesystems themselves.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/fs.h             | 1 +
 include/linux/user_namespace.h | 4 ----
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 981874773e85..3050c620f062 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1897,6 +1897,7 @@ extern int vfs_ustat(dev_t, struct kstatfs *);
 extern int freeze_super(struct super_block *super);
 extern int thaw_super(struct super_block *super);
 extern bool our_mnt(struct vfsmount *mnt);
+extern bool fs_fully_visible(struct file_system_type *);
 
 extern int current_umask(void);
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index b6b215f13b45..4ce009324933 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -26,8 +26,6 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
-	bool			may_mount_sysfs;
-	bool			may_mount_proc;
 };
 
 extern struct user_namespace init_user_ns;
@@ -84,6 +82,4 @@ static inline void put_user_ns(struct user_namespace *ns)
 
 #endif
 
-void update_mnt_policy(struct user_namespace *userns);
-
 #endif /* _LINUX_USER_H */
-- 
cgit v1.2.3


From 7dc5dbc879bd0779924b5132a48b731a0bc04a1e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 25 Mar 2013 20:07:01 -0700
Subject: sysfs: Restrict mounting sysfs

Don't allow mounting sysfs unless the caller has CAP_SYS_ADMIN rights
over the net namespace.  The principle here is if you create or have
capabilities over it you can mount it, otherwise you get to live with
what other people have mounted.

Instead of testing this with a straight forward ns_capable call,
perform this check the long and torturous way with kobject helpers,
this keeps direct knowledge of namespaces out of sysfs, and preserves
the existing sysfs abstractions.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/kobject_ns.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index f66b065a8b5f..df32d2508290 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -39,6 +39,7 @@ enum kobj_ns_type {
  */
 struct kobj_ns_type_operations {
 	enum kobj_ns_type type;
+	bool (*current_may_mount)(void);
 	void *(*grab_current_ns)(void);
 	const void *(*netlink_ns)(struct sock *sk);
 	const void *(*initial_ns)(void);
@@ -50,6 +51,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type);
 const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
 const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
 
+bool kobj_ns_current_may_mount(enum kobj_ns_type type);
 void *kobj_ns_grab_current(enum kobj_ns_type type);
 const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
 const void *kobj_ns_initial(enum kobj_ns_type type);
-- 
cgit v1.2.3


From c7b96acf1456ef127fef461fcfedb54b81fecfbb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 20 Mar 2013 12:49:49 -0700
Subject: userns:  Kill nsown_capable it makes the wrong thing easy

nsown_capable is a special case of ns_capable essentially for just CAP_SETUID and
CAP_SETGID.  For the existing users it doesn't noticably simplify things and
from the suggested patches I have seen it encourages people to do the wrong
thing.  So remove nsown_capable.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/capability.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/capability.h b/include/linux/capability.h
index d9a4f7f40f32..a6ee1f9a5018 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -210,7 +210,6 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
 				      struct user_namespace *ns, int cap);
 extern bool capable(int cap);
 extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool nsown_capable(int cap);
 extern bool inode_capable(const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 
-- 
cgit v1.2.3