From 0558c1bf5a0811bf5e3753eed911a15b9bd08271 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 21 Jan 2021 14:19:23 +0100 Subject: capability: handle idmapped mounts In order to determine whether a caller holds privilege over a given inode the capability framework exposes the two helpers privileged_wrt_inode_uidgid() and capable_wrt_inode_uidgid(). The former verifies that the inode has a mapping in the caller's user namespace and the latter additionally verifies that the caller has the requested capability in their current user namespace. If the inode is accessed through an idmapped mount map it into the mount's user namespace. Afterwards the checks are identical to non-idmapped inodes. If the initial user namespace is passed all operations are a nop so non-idmapped mounts will not see a change in behavior. Link: https://lore.kernel.org/r/20210121131959.646623-5-christian.brauner@ubuntu.com Cc: Christoph Hellwig Cc: David Howells Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: James Morris Acked-by: Serge Hallyn Signed-off-by: Christian Brauner --- security/commoncap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'security/commoncap.c') diff --git a/security/commoncap.c b/security/commoncap.c index bacc1111d871..88ee345f7bfa 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -489,7 +489,7 @@ int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size) return -EINVAL; if (!validheader(size, cap)) return -EINVAL; - if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) + if (!capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_SETFCAP)) return -EPERM; if (size == XATTR_CAPS_SZ_2) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) @@ -956,7 +956,8 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) struct inode *inode = d_backing_inode(dentry); if (!inode) return -EINVAL; - if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP)) + if (!capable_wrt_inode_uidgid(&init_user_ns, inode, + CAP_SETFCAP)) return -EPERM; return 0; } -- cgit v1.2.3 From e65ce2a50cf6af216bea6fd80d771fcbb4c0aaa1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 21 Jan 2021 14:19:27 +0100 Subject: acl: handle idmapped mounts The posix acl permission checking helpers determine whether a caller is privileged over an inode according to the acls associated with the inode. Add helpers that make it possible to handle acls on idmapped mounts. The vfs and the filesystems targeted by this first iteration make use of posix_acl_fix_xattr_from_user() and posix_acl_fix_xattr_to_user() to translate basic posix access and default permissions such as the ACL_USER and ACL_GROUP type according to the initial user namespace (or the superblock's user namespace) to and from the caller's current user namespace. Adapt these two helpers to handle idmapped mounts whereby we either map from or into the mount's user namespace depending on in which direction we're translating. Similarly, cap_convert_nscap() is used by the vfs to translate user namespace and non-user namespace aware filesystem capabilities from the superblock's user namespace to the caller's user namespace. Enable it to handle idmapped mounts by accounting for the mount's user namespace. In addition the fileystems targeted in the first iteration of this patch series make use of the posix_acl_chmod() and, posix_acl_update_mode() helpers. Both helpers perform permission checks on the target inode. Let them handle idmapped mounts. These two helpers are called when posix acls are set by the respective filesystems to handle this case we extend the ->set() method to take an additional user namespace argument to pass the mount's user namespace down. Link: https://lore.kernel.org/r/20210121131959.646623-9-christian.brauner@ubuntu.com Cc: Christoph Hellwig Cc: David Howells Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- security/commoncap.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'security/commoncap.c') diff --git a/security/commoncap.c b/security/commoncap.c index 88ee345f7bfa..c3fd9b86ea9a 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -450,16 +450,33 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, return size; } +/** + * rootid_from_xattr - translate root uid of vfs caps + * + * @value: vfs caps value which may be modified by this function + * @size: size of @ivalue + * @task_ns: user namespace of the caller + * @mnt_userns: user namespace of the mount the inode was found from + * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + */ static kuid_t rootid_from_xattr(const void *value, size_t size, - struct user_namespace *task_ns) + struct user_namespace *task_ns, + struct user_namespace *mnt_userns) { const struct vfs_ns_cap_data *nscap = value; + kuid_t rootkid; uid_t rootid = 0; if (size == XATTR_CAPS_SZ_3) rootid = le32_to_cpu(nscap->rootid); - return make_kuid(task_ns, rootid); + rootkid = make_kuid(task_ns, rootid); + return kuid_from_mnt(mnt_userns, rootkid); } static bool validheader(size_t size, const struct vfs_cap_data *cap) @@ -467,13 +484,27 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap) return is_v2header(size, cap) || is_v3header(size, cap); } -/* +/** + * cap_convert_nscap - check vfs caps + * + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: used to retrieve inode to check permissions on + * @ivalue: vfs caps value which may be modified by this function + * @size: size of @ivalue + * * User requested a write of security.capability. If needed, update the * xattr to change from v2 to v3, or to fixup the v3 rootid. * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + * * If all is ok, we return the new size, on error return < 0. */ -int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size) +int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, + const void **ivalue, size_t size) { struct vfs_ns_cap_data *nscap; uid_t nsrootid; @@ -489,14 +520,14 @@ int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size) return -EINVAL; if (!validheader(size, cap)) return -EINVAL; - if (!capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_SETFCAP)) + if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; - if (size == XATTR_CAPS_SZ_2) + if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns)) if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP)) /* user is privileged, just write the v2 */ return size; - rootid = rootid_from_xattr(*ivalue, size, task_ns); + rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns); if (!uid_valid(rootid)) return -EINVAL; -- cgit v1.2.3 From c7c7a1a18af4c3bb7749d33e3df3acdf0a95bbb5 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Thu, 21 Jan 2021 14:19:28 +0100 Subject: xattr: handle idmapped mounts When interacting with extended attributes the vfs verifies that the caller is privileged over the inode with which the extended attribute is associated. For posix access and posix default extended attributes a uid or gid can be stored on-disk. Let the functions handle posix extended attributes on idmapped mounts. If the inode is accessed through an idmapped mount we need to map it according to the mount's user namespace. Afterwards the checks are identical to non-idmapped mounts. This has no effect for e.g. security xattrs since they don't store uids or gids and don't perform permission checks on them like posix acls do. Link: https://lore.kernel.org/r/20210121131959.646623-10-christian.brauner@ubuntu.com Cc: Christoph Hellwig Cc: David Howells Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: James Morris Signed-off-by: Tycho Andersen Signed-off-by: Christian Brauner --- security/commoncap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'security/commoncap.c') diff --git a/security/commoncap.c b/security/commoncap.c index c3fd9b86ea9a..745dc1f2c97f 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -313,7 +313,7 @@ int cap_inode_killpriv(struct dentry *dentry) { int error; - error = __vfs_removexattr(dentry, XATTR_NAME_CAPS); + error = __vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_CAPS); if (error == -EOPNOTSUPP) error = 0; return error; @@ -386,8 +386,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, return -EINVAL; size = sizeof(struct vfs_ns_cap_data); - ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS, - &tmpbuf, size, GFP_NOFS); + ret = (int)vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_CAPS, + &tmpbuf, size, GFP_NOFS); dput(dentry); if (ret < 0) -- cgit v1.2.3 From 71bc356f93a1c589fad13f7487258f89c417976e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 21 Jan 2021 14:19:29 +0100 Subject: commoncap: handle idmapped mounts When interacting with user namespace and non-user namespace aware filesystem capabilities the vfs will perform various security checks to determine whether or not the filesystem capabilities can be used by the caller, whether they need to be removed and so on. The main infrastructure for this resides in the capability codepaths but they are called through the LSM security infrastructure even though they are not technically an LSM or optional. This extends the existing security hooks security_inode_removexattr(), security_inode_killpriv(), security_inode_getsecurity() to pass down the mount's user namespace and makes them aware of idmapped mounts. In order to actually get filesystem capabilities from disk the capability infrastructure exposes the get_vfs_caps_from_disk() helper. For user namespace aware filesystem capabilities a root uid is stored alongside the capabilities. In order to determine whether the caller can make use of the filesystem capability or whether it needs to be ignored it is translated according to the superblock's user namespace. If it can be translated to uid 0 according to that id mapping the caller can use the filesystem capabilities stored on disk. If we are accessing the inode that holds the filesystem capabilities through an idmapped mount we map the root uid according to the mount's user namespace. Afterwards the checks are identical to non-idmapped mounts: reading filesystem caps from disk enforces that the root uid associated with the filesystem capability must have a mapping in the superblock's user namespace and that the caller is either in the same user namespace or is a descendant of the superblock's user namespace. For filesystems that are mountable inside user namespace the caller can just mount the filesystem and won't usually need to idmap it. If they do want to idmap it they can create an idmapped mount and mark it with a user namespace they created and which is thus a descendant of s_user_ns. For filesystems that are not mountable inside user namespaces the descendant rule is trivially true because the s_user_ns will be the initial user namespace. If the initial user namespace is passed nothing changes so non-idmapped mounts will see identical behavior as before. Link: https://lore.kernel.org/r/20210121131959.646623-11-christian.brauner@ubuntu.com Cc: Christoph Hellwig Cc: David Howells Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Reviewed-by: Christoph Hellwig Acked-by: James Morris Signed-off-by: Christian Brauner --- security/commoncap.c | 62 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 13 deletions(-) (limited to 'security/commoncap.c') diff --git a/security/commoncap.c b/security/commoncap.c index 745dc1f2c97f..234b074c2c58 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -303,17 +303,25 @@ int cap_inode_need_killpriv(struct dentry *dentry) /** * cap_inode_killpriv - Erase the security markings on an inode - * @dentry: The inode/dentry to alter + * + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: The inode/dentry to alter * * Erase the privilege-enhancing security markings on an inode. * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + * * Returns 0 if successful, -ve on error. */ -int cap_inode_killpriv(struct dentry *dentry) +int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry) { int error; - error = __vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_CAPS); + error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS); if (error == -EOPNOTSUPP) error = 0; return error; @@ -366,7 +374,8 @@ static bool is_v3header(size_t size, const struct vfs_cap_data *cap) * by the integrity subsystem, which really wants the unconverted values - * so that's good. */ -int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, +int cap_inode_getsecurity(struct user_namespace *mnt_userns, + struct inode *inode, const char *name, void **buffer, bool alloc) { int size, ret; @@ -386,7 +395,7 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, return -EINVAL; size = sizeof(struct vfs_ns_cap_data); - ret = (int)vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_CAPS, + ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS, &tmpbuf, size, GFP_NOFS); dput(dentry); @@ -412,6 +421,9 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer, root = le32_to_cpu(nscap->rootid); kroot = make_kuid(fs_ns, root); + /* If this is an idmapped mount shift the kuid. */ + kroot = kuid_into_mnt(mnt_userns, kroot); + /* If the root kuid maps to a valid uid in current ns, then return * this as a nscap. */ mappedroot = from_kuid(current_user_ns(), kroot); @@ -595,10 +607,24 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, return *effective ? ret : 0; } -/* +/** + * get_vfs_caps_from_disk - retrieve vfs caps from disk + * + * @mnt_userns: user namespace of the mount the inode was found from + * @dentry: dentry from which @inode is retrieved + * @cpu_caps: vfs capabilities + * * Extract the on-exec-apply capability sets for an executable file. + * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. */ -int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps) +int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, + const struct dentry *dentry, + struct cpu_vfs_cap_data *cpu_caps) { struct inode *inode = d_backing_inode(dentry); __u32 magic_etc; @@ -654,6 +680,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data /* Limit the caps to the mounter of the filesystem * or the more limited uid specified in the xattr. */ + rootkuid = kuid_into_mnt(mnt_userns, rootkuid); if (!rootid_owns_currentns(rootkuid)) return -ENODATA; @@ -699,7 +726,8 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file, if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns)) return 0; - rc = get_vfs_caps_from_disk(file->f_path.dentry, &vcaps); + rc = get_vfs_caps_from_disk(file_mnt_user_ns(file), + file->f_path.dentry, &vcaps); if (rc < 0) { if (rc == -EINVAL) printk(KERN_NOTICE "Invalid argument reading file caps for %s\n", @@ -964,16 +992,25 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name, /** * cap_inode_removexattr - Determine whether an xattr may be removed - * @dentry: The inode/dentry being altered - * @name: The name of the xattr to be changed + * + * @mnt_userns: User namespace of the mount the inode was found from + * @dentry: The inode/dentry being altered + * @name: The name of the xattr to be changed * * Determine whether an xattr may be removed from an inode, returning 0 if * permission is granted, -ve if denied. * + * If the inode has been found through an idmapped mount the user namespace of + * the vfsmount must be passed through @mnt_userns. This function will then + * take care to map the inode according to @mnt_userns before checking + * permissions. On non-idmapped mounts or if permission checking is to be + * performed on the raw inode simply passs init_user_ns. + * * This is used to make sure security xattrs don't get removed by those who * aren't privileged to remove them. */ -int cap_inode_removexattr(struct dentry *dentry, const char *name) +int cap_inode_removexattr(struct user_namespace *mnt_userns, + struct dentry *dentry, const char *name) { struct user_namespace *user_ns = dentry->d_sb->s_user_ns; @@ -987,8 +1024,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) struct inode *inode = d_backing_inode(dentry); if (!inode) return -EINVAL; - if (!capable_wrt_inode_uidgid(&init_user_ns, inode, - CAP_SETFCAP)) + if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP)) return -EPERM; return 0; } -- cgit v1.2.3