summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 10:16:37 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-03-24 10:16:37 -0700
commitdf00ded23a6b4df888237333b1f86067d24113b2 (patch)
tree36591a7b9cd112c6525e8fa20fdf49c8011113f0 /include
parent71ee2fde57c707ac8f221321f3e951288f00f04b (diff)
parentd40dc30c7b7c80db2100b73ac26d39c362643a39 (diff)
Merge tag 'vfs-6.15-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs pidfs updates from Christian Brauner: - Allow retrieving exit information after a process has been reaped through pidfds via the new PIDFD_INTO_EXIT extension for the PIDFD_GET_INFO ioctl. Various tools need access to information about a process/task even after it has already been reaped. Pidfd polling allows waiting on either task exit or for a task to have been reaped. The contract for PIDFD_INFO_EXIT is simply that EPOLLHUP must be observed before exit information can be retrieved, i.e., exit information is only provided once the task has been reaped and then can be retrieved as long as the pidfd is open. - Add PIDFD_SELF_{THREAD,THREAD_GROUP} sentinels allowing userspace to forgo allocating a file descriptor for their own process. This is useful in scenarios where users want to act on their own process through pidfds and is akin to AT_FDCWD. - Improve premature thread-group leader and subthread exec behavior when polling on pidfds: (1) During a multi-threaded exec by a subthread, i.e., non-thread-group leader thread, all other threads in the thread-group including the thread-group leader are killed and the struct pid of the thread-group leader will be taken over by the subthread that called exec. IOW, two tasks change their TIDs. (2) A premature thread-group leader exit means that the thread-group leader exited before all of the other subthreads in the thread-group have exited. Both cases lead to inconsistencies for pidfd polling with PIDFD_THREAD. Any caller that holds a PIDFD_THREAD pidfd to the current thread-group leader may or may not see an exit notification on the file descriptor depending on when poll is performed. If the poll is performed before the exec of the subthread has concluded an exit notification is generated for the old thread-group leader. If the poll is performed after the exec of the subthread has concluded no exit notification is generated for the old thread-group leader. The correct behavior is to simply not generate an exit notification on the struct pid of a subhthread exec because the struct pid is taken over by the subthread and thus remains alive. But this is difficult to handle because a thread-group may exit premature as mentioned in (2). In that case an exit notification is reliably generated but the subthreads may continue to run for an indeterminate amount of time and thus also may exec at some point. After this pull no exit notifications will be generated for a PIDFD_THREAD pidfd for a thread-group leader until all subthreads have been reaped. If a subthread should exec before no exit notification will be generated until that task exits or it creates subthreads and repeates the cycle. This means an exit notification indicates the ability for the father to reap the child. * tag 'vfs-6.15-rc1.pidfs' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (25 commits) selftests/pidfd: third test for multi-threaded exec polling selftests/pidfd: second test for multi-threaded exec polling selftests/pidfd: first test for multi-threaded exec polling pidfs: improve multi-threaded exec and premature thread-group leader exit polling pidfs: ensure that PIDFS_INFO_EXIT is available selftests/pidfd: add seventh PIDFD_INFO_EXIT selftest selftests/pidfd: add sixth PIDFD_INFO_EXIT selftest selftests/pidfd: add fifth PIDFD_INFO_EXIT selftest selftests/pidfd: add fourth PIDFD_INFO_EXIT selftest selftests/pidfd: add third PIDFD_INFO_EXIT selftest selftests/pidfd: add second PIDFD_INFO_EXIT selftest selftests/pidfd: add first PIDFD_INFO_EXIT selftest selftests/pidfd: expand common pidfd header pidfs/selftests: ensure correct headers for ioctl handling selftests/pidfd: fix header inclusion pidfs: allow to retrieve exit information pidfs: record exit code and cgroupid at exit pidfs: use private inode slab cache pidfs: move setting flags into pidfs_alloc_file() pidfd: rely on automatic cleanup in __pidfd_prepare() ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/pidfs.h1
-rw-r--r--include/uapi/linux/pidfd.h31
2 files changed, 31 insertions, 1 deletions
diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h
index 7c830d0dec9a..05e6f8f4a026 100644
--- a/include/linux/pidfs.h
+++ b/include/linux/pidfs.h
@@ -6,6 +6,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags);
void __init pidfs_init(void);
void pidfs_add_pid(struct pid *pid);
void pidfs_remove_pid(struct pid *pid);
+void pidfs_exit(struct task_struct *tsk);
extern const struct dentry_operations pidfs_dentry_operations;
#endif /* _LINUX_PID_FS_H */
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 4540f6301b8c..2970ef44655a 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -10,6 +10,10 @@
/* Flags for pidfd_open(). */
#define PIDFD_NONBLOCK O_NONBLOCK
#define PIDFD_THREAD O_EXCL
+#ifdef __KERNEL__
+#include <linux/sched.h>
+#define PIDFD_CLONE CLONE_PIDFD
+#endif
/* Flags for pidfd_send_signal(). */
#define PIDFD_SIGNAL_THREAD (1UL << 0)
@@ -20,9 +24,34 @@
#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */
#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
+#define PIDFD_INFO_EXIT (1UL << 3) /* Only returned if requested. */
#define PIDFD_INFO_SIZE_VER0 64 /* sizeof first published struct */
+/*
+ * The concept of process and threads in userland and the kernel is a confusing
+ * one - within the kernel every thread is a 'task' with its own individual PID,
+ * however from userland's point of view threads are grouped by a single PID,
+ * which is that of the 'thread group leader', typically the first thread
+ * spawned.
+ *
+ * To cut the Gideon knot, for internal kernel usage, we refer to
+ * PIDFD_SELF_THREAD to refer to the current thread (or task from a kernel
+ * perspective), and PIDFD_SELF_THREAD_GROUP to refer to the current thread
+ * group leader...
+ */
+#define PIDFD_SELF_THREAD -10000 /* Current thread. */
+#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */
+
+/*
+ * ...and for userland we make life simpler - PIDFD_SELF refers to the current
+ * thread, PIDFD_SELF_PROCESS refers to the process thread group leader.
+ *
+ * For nearly all practical uses, a user will want to use PIDFD_SELF.
+ */
+#define PIDFD_SELF PIDFD_SELF_THREAD
+#define PIDFD_SELF_PROCESS PIDFD_SELF_THREAD_GROUP
+
struct pidfd_info {
/*
* This mask is similar to the request_mask in statx(2).
@@ -62,7 +91,7 @@ struct pidfd_info {
__u32 sgid;
__u32 fsuid;
__u32 fsgid;
- __u32 spare0[1];
+ __s32 exit_code;
};
#define PIDFS_IOCTL_MAGIC 0xFF