// SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2025 Christian Brauner */ #include #include #include #include #include static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock); static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */ static LIST_HEAD(ns_unified_list); /* protected by ns_tree_lock */ /** * struct ns_tree - Namespace tree * @ns_tree: Rbtree of namespaces of a particular type * @ns_list: Sequentially walkable list of all namespaces of this type * @type: type of namespaces in this tree */ struct ns_tree { struct rb_root ns_tree; struct list_head ns_list; int type; }; struct ns_tree mnt_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list), .type = CLONE_NEWNS, }; struct ns_tree net_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list), .type = CLONE_NEWNET, }; EXPORT_SYMBOL_GPL(net_ns_tree); struct ns_tree uts_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list), .type = CLONE_NEWUTS, }; struct ns_tree user_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list), .type = CLONE_NEWUSER, }; struct ns_tree ipc_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list), .type = CLONE_NEWIPC, }; struct ns_tree pid_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list), .type = CLONE_NEWPID, }; struct ns_tree cgroup_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list), .type = CLONE_NEWCGROUP, }; struct ns_tree time_ns_tree = { .ns_tree = RB_ROOT, .ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list), .type = CLONE_NEWTIME, }; static inline struct ns_common *node_to_ns(const struct rb_node *node) { if (!node) return NULL; return rb_entry(node, struct ns_common, ns_tree_node); } static inline struct ns_common *node_to_ns_unified(const struct rb_node *node) { if (!node) return NULL; return rb_entry(node, struct ns_common, ns_unified_tree_node); } static inline struct ns_common *node_to_ns_owner(const struct rb_node *node) { if (!node) return NULL; return rb_entry(node, struct ns_common, ns_owner_tree_node); } static int ns_id_cmp(u64 id_a, u64 id_b) { if (id_a < id_b) return -1; if (id_a > id_b) return 1; return 0; } static int ns_cmp(struct rb_node *a, const struct rb_node *b) { return ns_id_cmp(node_to_ns(a)->ns_id, node_to_ns(b)->ns_id); } static int ns_cmp_unified(struct rb_node *a, const struct rb_node *b) { return ns_id_cmp(node_to_ns_unified(a)->ns_id, node_to_ns_unified(b)->ns_id); } static int ns_cmp_owner(struct rb_node *a, const struct rb_node *b) { return ns_id_cmp(node_to_ns_owner(a)->ns_id, node_to_ns_owner(b)->ns_id); } void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) { struct rb_node *node, *prev; const struct proc_ns_operations *ops = ns->ops; VFS_WARN_ON_ONCE(!ns->ns_id); VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); write_seqlock(&ns_tree_lock); node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp); /* * If there's no previous entry simply add it after the * head and if there is add it after the previous entry. */ prev = rb_prev(&ns->ns_tree_node); if (!prev) list_add_rcu(&ns->ns_list_node, &ns_tree->ns_list); else list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node); /* Add to unified tree and list */ rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified); prev = rb_prev(&ns->ns_unified_tree_node); if (!prev) list_add_rcu(&ns->ns_unified_list_node, &ns_unified_list); else list_add_rcu(&ns->ns_unified_list_node, &node_to_ns_unified(prev)->ns_unified_list_node); if (ops) { struct user_namespace *user_ns; VFS_WARN_ON_ONCE(!ops->owner); user_ns = ops->owner(ns); if (user_ns) { struct ns_common *owner = &user_ns->ns; VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER); /* Insert into owner's rbtree */ rb_find_add_rcu(&ns->ns_owner_tree_node, &owner->ns_owner_tree, ns_cmp_owner); /* Insert into owner's list in sorted order */ prev = rb_prev(&ns->ns_owner_tree_node); if (!prev) list_add_rcu(&ns->ns_owner_entry, &owner->ns_owner); else list_add_rcu(&ns->ns_owner_entry, &node_to_ns_owner(prev)->ns_owner_entry); } else { /* Only the initial user namespace doesn't have an owner. */ VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns)); } } write_sequnlock(&ns_tree_lock); VFS_WARN_ON_ONCE(node); /* * Take an active reference on the owner namespace. This ensures * that the owner remains visible while any of its child namespaces * are active. For init namespaces this is a no-op as ns_owner() * returns NULL for namespaces owned by init_user_ns. */ __ns_ref_active_get_owner(ns); } void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree) { const struct proc_ns_operations *ops = ns->ops; struct user_namespace *user_ns; VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node)); VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node)); VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); write_seqlock(&ns_tree_lock); rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree); RB_CLEAR_NODE(&ns->ns_tree_node); list_bidir_del_rcu(&ns->ns_list_node); rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree); RB_CLEAR_NODE(&ns->ns_unified_tree_node); list_bidir_del_rcu(&ns->ns_unified_list_node); /* Remove from owner's rbtree if this namespace has an owner */ if (ops) { user_ns = ops->owner(ns); if (user_ns) { struct ns_common *owner = &user_ns->ns; rb_erase(&ns->ns_owner_tree_node, &owner->ns_owner_tree); RB_CLEAR_NODE(&ns->ns_owner_tree_node); } list_bidir_del_rcu(&ns->ns_owner_entry); } write_sequnlock(&ns_tree_lock); } EXPORT_SYMBOL_GPL(__ns_tree_remove); static int ns_find(const void *key, const struct rb_node *node) { const u64 ns_id = *(u64 *)key; const struct ns_common *ns = node_to_ns(node); if (ns_id < ns->ns_id) return -1; if (ns_id > ns->ns_id) return 1; return 0; } static int ns_find_unified(const void *key, const struct rb_node *node) { const u64 ns_id = *(u64 *)key; const struct ns_common *ns = node_to_ns_unified(node); if (ns_id < ns->ns_id) return -1; if (ns_id > ns->ns_id) return 1; return 0; } static struct ns_tree *ns_tree_from_type(int ns_type) { switch (ns_type) { case CLONE_NEWCGROUP: return &cgroup_ns_tree; case CLONE_NEWIPC: return &ipc_ns_tree; case CLONE_NEWNS: return &mnt_ns_tree; case CLONE_NEWNET: return &net_ns_tree; case CLONE_NEWPID: return &pid_ns_tree; case CLONE_NEWUSER: return &user_ns_tree; case CLONE_NEWUTS: return &uts_ns_tree; case CLONE_NEWTIME: return &time_ns_tree; } return NULL; } static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id) { struct rb_node *node; unsigned int seq; do { seq = read_seqbegin(&ns_tree_lock); node = rb_find_rcu(&ns_id, &ns_unified_tree, ns_find_unified); if (node) break; } while (read_seqretry(&ns_tree_lock, seq)); return node_to_ns_unified(node); } static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type) { struct ns_tree *ns_tree; struct rb_node *node; unsigned int seq; ns_tree = ns_tree_from_type(ns_type); if (!ns_tree) return NULL; do { seq = read_seqbegin(&ns_tree_lock); node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find); if (node) break; } while (read_seqretry(&ns_tree_lock, seq)); return node_to_ns(node); } struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type) { RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage"); if (ns_type) return __ns_tree_lookup_rcu(ns_id, ns_type); return __ns_unified_tree_lookup_rcu(ns_id); } /** * ns_tree_adjoined_rcu - find the next/previous namespace in the same * tree * @ns: namespace to start from * @previous: if true find the previous namespace, otherwise the next * * Find the next or previous namespace in the same tree as @ns. If * there is no next/previous namespace, -ENOENT is returned. */ struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, struct ns_tree *ns_tree, bool previous) { struct list_head *list; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage"); if (previous) list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_list_node)); else list = rcu_dereference(list_next_rcu(&ns->ns_list_node)); if (list_is_head(list, &ns_tree->ns_list)) return ERR_PTR(-ENOENT); VFS_WARN_ON_ONCE(list_entry_rcu(list, struct ns_common, ns_list_node)->ns_type != ns_tree->type); return list_entry_rcu(list, struct ns_common, ns_list_node); } /** * ns_tree_gen_id - generate a new namespace id * @ns: namespace to generate id for * @id: if non-zero, this is the initial namespace and this is a fixed id * * Generates a new namespace id and assigns it to the namespace. All * namespaces types share the same id space and thus can be compared * directly. IOW, when two ids of two namespace are equal, they are * identical. */ u64 __ns_tree_gen_id(struct ns_common *ns, u64 id) { static atomic64_t namespace_cookie = ATOMIC64_INIT(NS_LAST_INIT_ID + 1); if (id) ns->ns_id = id; else ns->ns_id = atomic64_inc_return(&namespace_cookie); return ns->ns_id; }