diff options
| author | Christian Brauner <brauner@kernel.org> | 2025-11-09 22:11:26 +0100 |
|---|---|---|
| committer | Christian Brauner <brauner@kernel.org> | 2025-11-10 10:20:54 +0100 |
| commit | f8d5a8970d2f49411824fb1fdd34bbb3eea22756 (patch) | |
| tree | f5f5db3de81cfad10be972180b2c3286ddf58de4 /include/linux/ns_common.h | |
| parent | a51dce7c3261d26e574b35da71dac3903bb35ae2 (diff) | |
ns: handle setns(pidfd, ...) cleanly
The setns() system call supports:
(1) namespace file descriptors (nsfd)
(2) process file descriptors (pidfd)
When using nsfds the namespaces will remain active because they are
pinned by the vfs. However, when pidfds are used things are more
complicated.
When the target task exits and passes through exit_nsproxy_namespaces()
or is reaped and thus also passes through exit_cred_namespaces() after
the setns()'ing task has called prepare_nsset() but before the active
reference count of the set of namespaces it wants to setns() to might
have been dropped already:
P1 P2
pid_p1 = clone(CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS)
pidfd = pidfd_open(pid_p1)
setns(pidfd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS)
prepare_nsset()
exit(0)
// ns->__ns_active_ref == 1
// parent_ns->__ns_active_ref == 1
-> exit_nsproxy_namespaces()
-> exit_cred_namespaces()
// ns_active_ref_put() will also put
// the reference on the owner of the
// namespace. If the only reason the
// owning namespace was alive was
// because it was a parent of @ns
// it's active reference count now goes
// to zero... --------------------------------
// |
// ns->__ns_active_ref == 0 |
// parent_ns->__ns_active_ref == 0 |
| commit_nsset()
-----------------> // If setns()
// now manages to install the namespaces
// it will call ns_active_ref_get()
// on them thus bumping the active reference
// count from zero again but without also
// taking the required reference on the owner.
// Thus we get:
//
// ns->__ns_active_ref == 1
// parent_ns->__ns_active_ref == 0
When later someone does ns_active_ref_put() on @ns it will underflow
parent_ns->__ns_active_ref leading to a splat from our asserts
thinking there are still active references when in fact the counter
just underflowed.
So resurrect the ownership chain if necessary as well. If the caller
succeeded to grab passive references to the set of namespaces the
setns() should simply succeed even if the target task exists or gets
reaped in the meantime and thus has dropped all active references to its
namespaces.
The race is rare and can only be triggered when using pidfs to setns()
to namespaces. Also note that active reference on initial namespaces are
nops.
Since we now always handle parent references directly we can drop
ns_ref_active_get_owner() when adding a namespace to a namespace tree.
This is now all handled uniformly in the places where the new namespaces
actually become active.
Link: https://patch.msgid.link/20251109-namespace-6-19-fixes-v1-5-ae8a4ad5a3b3@kernel.org
Fixes: 3c9820d5c64a ("ns: add active reference count")
Reported-by: syzbot+1957b26299cf3ff7890c@syzkaller.appspotmail.com
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'include/linux/ns_common.h')
| -rw-r--r-- | include/linux/ns_common.h | 47 |
1 files changed, 4 insertions, 43 deletions
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 791b18dc77d0..3aaba2ca31d7 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -287,47 +287,8 @@ static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns #define ns_ref_active_read(__ns) \ ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0) -void __ns_ref_active_get_owner(struct ns_common *ns); +void __ns_ref_active_put(struct ns_common *ns); -static __always_inline void __ns_ref_active_get(struct ns_common *ns) -{ - /* Initial namespaces are always active. */ - if (!is_ns_init_id(ns)) - WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); -} -#define ns_ref_active_get(__ns) \ - do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) - -static __always_inline bool __ns_ref_active_get_not_zero(struct ns_common *ns) -{ - /* Initial namespaces are always active. */ - if (is_ns_init_id(ns)) - return true; - - if (atomic_inc_not_zero(&ns->__ns_ref_active)) { - VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); - return true; - } - return false; -} - -#define ns_ref_active_get_owner(__ns) \ - do { if (__ns) __ns_ref_active_get_owner(to_ns_common(__ns)); } while (0) - -void __ns_ref_active_put_owner(struct ns_common *ns); - -static __always_inline void __ns_ref_active_put(struct ns_common *ns) -{ - /* Initial namespaces are always active. */ - if (is_ns_init_id(ns)) - return; - - if (atomic_dec_and_test(&ns->__ns_ref_active)) { - VFS_WARN_ON_ONCE(is_initial_namespace(ns)); - VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); - __ns_ref_active_put_owner(ns); - } -} #define ns_ref_active_put(__ns) \ do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0) @@ -343,9 +304,9 @@ static __always_inline struct ns_common *__must_check ns_get_unless_inactive(str return ns; } -void __ns_ref_active_resurrect(struct ns_common *ns); +void __ns_ref_active_get(struct ns_common *ns); -#define ns_ref_active_resurrect(__ns) \ - do { if (__ns) __ns_ref_active_resurrect(to_ns_common(__ns)); } while (0) +#define ns_ref_active_get(__ns) \ + do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) #endif |
