From 39f60c1ccee72caa0104145b5dbf5d37cce1ea39 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:23 +0100 Subject: fs: port xattr to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/capability.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 65efb74c3585..0a8ba82ef1af 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -42,6 +42,7 @@ struct inode; struct dentry; struct task_struct; struct user_namespace; +struct mnt_idmap; extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_init_eff_set; @@ -271,11 +272,11 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns) } /* audit system wants to get cap info from files as well */ -int get_vfs_caps_from_disk(struct user_namespace *mnt_userns, +int get_vfs_caps_from_disk(struct mnt_idmap *idmap, const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); -int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry, +int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry, const void **ivalue, size_t size); #endif /* !_LINUX_CAPABILITY_H */ -- cgit v1.2.3 From 9452e93e6dae862d7aeff2b11236d79bde6f9b66 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 13 Jan 2023 12:49:27 +0100 Subject: fs: port privilege checking helpers to mnt_idmap Convert to struct mnt_idmap. Last cycle we merged the necessary infrastructure in 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). This is just the conversion to struct mnt_idmap. Currently we still pass around the plain namespace that was attached to a mount. This is in general pretty convenient but it makes it easy to conflate namespaces that are relevant on the filesystem with namespaces that are relevent on the mount level. Especially for non-vfs developers without detailed knowledge in this area this can be a potential source for bugs. Once the conversion to struct mnt_idmap is done all helpers down to the really low-level helpers will take a struct mnt_idmap argument instead of two namespace arguments. This way it becomes impossible to conflate the two eliminating the possibility of any bugs. All of the vfs and all filesystems only operate on struct mnt_idmap. Acked-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner (Microsoft) --- include/linux/capability.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 0a8ba82ef1af..03c2a613ad40 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -249,9 +249,9 @@ static inline bool ns_capable_setid(struct user_namespace *ns, int cap) } #endif /* CONFIG_MULTIUSER */ bool privileged_wrt_inode_uidgid(struct user_namespace *ns, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const struct inode *inode); -bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns, +bool capable_wrt_inode_uidgid(struct mnt_idmap *idmap, const struct inode *inode, int cap); extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap); extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns); -- cgit v1.2.3 From a4eecbae092759537748360299de03e434c9a956 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 25 Jan 2023 16:55:56 +0100 Subject: capability: add cap_isidentical Signed-off-by: Mateusz Guzik Reviewed-by: Serge Hallyn Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/capability.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index 03c2a613ad40..d3c6c2d1ff45 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -157,6 +157,16 @@ static inline bool cap_isclear(const kernel_cap_t a) return true; } +static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) +{ + unsigned __capi; + CAP_FOR_EACH_U32(__capi) { + if (a.cap[__capi] != b.cap[__capi]) + return false; + } + return true; +} + /* * Check if "a" is a subset of "set". * return true if ALL of the capabilities in "a" are also in "set" -- cgit v1.2.3 From f122a08b197d076ccf136c73fae0146875812a88 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 28 Feb 2023 11:39:09 -0800 Subject: capability: just use a 'u64' instead of a 'u32[2]' array Back in 2008 we extended the capability bits from 32 to 64, and we did it by extending the single 32-bit capability word from one word to an array of two words. It was then obfuscated by hiding the "2" behind two macro expansions, with the reasoning being that maybe it gets extended further some day. That reasoning may have been valid at the time, but the last thing we want to do is to extend the capability set any more. And the array of values not only causes source code oddities (with loops to deal with it), but also results in worse code generation. It's a lose-lose situation. So just change the 'u32[2]' into a 'u64' and be done with it. We still have to deal with the fact that the user space interface is designed around an array of these 32-bit values, but that was the case before too, since the array layouts were different (ie user space doesn't use an array of 32-bit values for individual capability masks, but an array of 32-bit slices of multiple masks). So that marshalling of data is actually simplified too, even if it does remain somewhat obscure and odd. This was all triggered by my reaction to the new "cap_isidentical()" introduced recently. By just using a saner data structure, it went from unsigned __capi; CAP_FOR_EACH_U32(__capi) { if (a.cap[__capi] != b.cap[__capi]) return false; } return true; to just being return a.val == b.val; instead. Which is rather more obvious both to humans and to compilers. Cc: Mateusz Guzik Cc: Casey Schaufler Cc: Serge Hallyn Cc: Al Viro Cc: Paul Moore Signed-off-by: Linus Torvalds --- include/linux/capability.h | 131 ++++++++++----------------------------------- 1 file changed, 29 insertions(+), 102 deletions(-) (limited to 'include/linux/capability.h') diff --git a/include/linux/capability.h b/include/linux/capability.h index d3c6c2d1ff45..0c356a517991 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -15,28 +15,25 @@ #include #include +#include #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 -#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 extern int file_caps_enabled; -typedef struct kernel_cap_struct { - __u32 cap[_KERNEL_CAPABILITY_U32S]; -} kernel_cap_t; +typedef struct { u64 val; } kernel_cap_t; /* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; + kuid_t rootid; kernel_cap_t permitted; kernel_cap_t inheritable; - kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) - struct file; struct inode; struct dentry; @@ -44,16 +41,6 @@ struct task_struct; struct user_namespace; struct mnt_idmap; -extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_init_eff_set; - -/* - * Internal kernel functions only - */ - -#define CAP_FOR_EACH_U32(__capi) \ - for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) - /* * CAP_FS_MASK and CAP_NFSD_MASKS: * @@ -67,104 +54,52 @@ extern const kernel_cap_t __cap_init_eff_set; * 2. The security.* and trusted.* xattrs are fs-related MAC permissions */ -# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ - | CAP_TO_MASK(CAP_MKNOD) \ - | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ - | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ - | CAP_TO_MASK(CAP_FOWNER) \ - | CAP_TO_MASK(CAP_FSETID)) - -# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) - -#if _KERNEL_CAPABILITY_U32S != 2 -# error Fix up hand-coded capability macro initializers -#else /* HAND-CODED capability initializers */ +# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \ + | BIT_ULL(CAP_MKNOD) \ + | BIT_ULL(CAP_DAC_OVERRIDE) \ + | BIT_ULL(CAP_DAC_READ_SEARCH) \ + | BIT_ULL(CAP_FOWNER) \ + | BIT_ULL(CAP_FSETID) \ + | BIT_ULL(CAP_MAC_OVERRIDE)) +#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1) -#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) +# define CAP_EMPTY_SET ((kernel_cap_t) { 0 }) +# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK }) +# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) }) +# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) }) -# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) -# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ - CAP_FS_MASK_B1 } }) -# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_SYS_RESOURCE), \ - CAP_FS_MASK_B1 } }) +# define cap_clear(c) do { (c).val = 0; } while (0) -#endif /* _KERNEL_CAPABILITY_U32S != 2 */ - -# define cap_clear(c) do { (c) = __cap_empty_set; } while (0) - -#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) - -#define CAP_BOP_ALL(c, a, b, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ - } \ -} while (0) - -#define CAP_UOP_ALL(c, a, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = OP a.cap[__capi]; \ - } \ -} while (0) +#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag)) +#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag)) +#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0) static inline kernel_cap_t cap_combine(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, |); - return dest; + return (kernel_cap_t) { a.val | b.val }; } static inline kernel_cap_t cap_intersect(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, &); - return dest; + return (kernel_cap_t) { a.val & b.val }; } static inline kernel_cap_t cap_drop(const kernel_cap_t a, const kernel_cap_t drop) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, drop, &~); - return dest; -} - -static inline kernel_cap_t cap_invert(const kernel_cap_t c) -{ - kernel_cap_t dest; - CAP_UOP_ALL(dest, c, ~); - return dest; + return (kernel_cap_t) { a.val &~ drop.val }; } static inline bool cap_isclear(const kernel_cap_t a) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != 0) - return false; - } - return true; + return !a.val; } static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != b.cap[__capi]) - return false; - } - return true; + return a.val == b.val; } /* @@ -176,39 +111,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) */ static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { - kernel_cap_t dest; - dest = cap_drop(a, set); - return cap_isclear(dest); + return !(a.val & ~set.val); } /* Used to decide between falling back on the old suser() or fsuser(). */ static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_FS_SET); } static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_fs_set)); + return cap_combine(a, cap_intersect(permitted, CAP_FS_SET)); } static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_NFSD_SET); } static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_nfsd_set)); + return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET)); } #ifdef CONFIG_MULTIUSER -- cgit v1.2.3