summaryrefslogtreecommitdiff
path: root/kernel/user_namespace.c
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-09-15 13:45:37 +0200
committerChristian Brauner <brauner@kernel.org>2025-09-19 14:26:17 +0200
commit3ab378cfa793c648d4edf02bbfff3af8715aca91 (patch)
treeb5a57da7ff6c26434569ec95e8d2cf43f9cbe3c6 /kernel/user_namespace.c
parent8f5ae30d69d7543eee0d70083daf4de8fe15d585 (diff)
parent28ef38a9a2c7aa4dd8dfbb1d2b12f9ea84044327 (diff)
Merge patch series "ns: support file handles"
Christian Brauner <brauner@kernel.org> says: For a while now we have supported file handles for pidfds. This has proven to be very useful. Extend the concept to cover namespaces as well. After this patchset it is possible to encode and decode namespace file handles using the commong name_to_handle_at() and open_by_handle_at() apis. Namespaces file descriptors can already be derived from pidfds which means they aren't subject to overmount protection bugs. IOW, it's irrelevant if the caller would not have access to an appropriate /proc/<pid>/ns/ directory as they could always just derive the namespace based on a pidfd already. It has the same advantage as pidfds. It's possible to reliably and for the lifetime of the system refer to a namespace without pinning any resources and to compare them. Permission checking is kept simple. If the caller is located in the namespace the file handle refers to they are able to open it otherwise they must hold privilege over the owning namespace of the relevant namespace. Both the network namespace and the mount namespace already have an associated cookie that isn't recycled and is fully exposed to userspace. Move this into ns_common and use the same id space for all namespaces so they can trivially and reliably be compared. There's more coming based on the iterator infrastructure but the series is large enough and focuses on file handles. Extensive selftests included. * patches from https://lore.kernel.org/20250912-work-namespace-v2-0-1a247645cef5@kernel.org: (33 commits) selftests/namespaces: add file handle selftests selftests/namespaces: add identifier selftests tools: update nsfs.h uapi header nsfs: add missing id retrieval support nsfs: support exhaustive file handles nsfs: support file handles nsfs: add current_in_namespace() ns: add to_<type>_ns() to respective headers uts: support ns lookup user: support ns lookup time: support ns lookup pid: support ns lookup net: support ns lookup ipc: support ns lookup cgroup: support ns lookup mnt: support ns lookup nstree: make iterator generic ns: remove ns_alloc_inum() uts: use ns_common_init() user: use ns_common_init() ... Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'kernel/user_namespace.c')
-rw-r--r--kernel/user_namespace.c17
1 files changed, 8 insertions, 9 deletions
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 682f40d5632d..cfb0e28f2779 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -21,6 +21,7 @@
#include <linux/fs_struct.h>
#include <linux/bsearch.h>
#include <linux/sort.h>
+#include <linux/nstree.h>
static struct kmem_cache *user_ns_cachep __ro_after_init;
static DEFINE_MUTEX(userns_state_mutex);
@@ -124,12 +125,11 @@ int create_user_ns(struct cred *new)
goto fail_dec;
ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
- ret = ns_alloc_inum(&ns->ns);
+
+ ret = ns_common_init(&ns->ns, &userns_operations, true);
if (ret)
goto fail_free;
- ns->ns.ops = &userns_operations;
- refcount_set(&ns->ns.count, 1);
/* Leave the new->user_ns reference with the new user namespace. */
ns->parent = parent_ns;
ns->level = parent_ns->level + 1;
@@ -159,6 +159,7 @@ int create_user_ns(struct cred *new)
goto fail_keyring;
set_cred_user_ns(new, ns);
+ ns_tree_add(ns);
return 0;
fail_keyring:
#ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -201,6 +202,7 @@ static void free_user_ns(struct work_struct *work)
do {
struct ucounts *ucounts = ns->ucounts;
parent = ns->parent;
+ ns_tree_remove(ns);
if (ns->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
kfree(ns->gid_map.forward);
kfree(ns->gid_map.reverse);
@@ -219,7 +221,8 @@ static void free_user_ns(struct work_struct *work)
retire_userns_sysctls(ns);
key_free_user_ns(ns);
ns_free_inum(&ns->ns);
- kmem_cache_free(user_ns_cachep, ns);
+ /* Concurrent nstree traversal depends on a grace period. */
+ kfree_rcu(ns, ns.ns_rcu);
dec_user_namespaces(ucounts);
ns = parent;
} while (refcount_dec_and_test(&parent->ns.count));
@@ -1322,11 +1325,6 @@ bool current_in_userns(const struct user_namespace *target_ns)
}
EXPORT_SYMBOL(current_in_userns);
-static inline struct user_namespace *to_user_ns(struct ns_common *ns)
-{
- return container_of(ns, struct user_namespace, ns);
-}
-
static struct ns_common *userns_get(struct task_struct *task)
{
struct user_namespace *user_ns;
@@ -1413,6 +1411,7 @@ const struct proc_ns_operations userns_operations = {
static __init int user_namespaces_init(void)
{
user_ns_cachep = KMEM_CACHE(user_namespace, SLAB_PANIC | SLAB_ACCOUNT);
+ ns_tree_add(&init_user_ns);
return 0;
}
subsys_initcall(user_namespaces_init);