From 12b5b138d111db0588492002fdd8089af61b80e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 3 Jun 2025 15:31:55 +0200 Subject: coredump: allow for flexible coredump handling Extend the coredump socket to allow the coredump server to tell the kernel how to process individual coredumps. When the crashing task connects to the coredump socket the kernel will send a struct coredump_req to the coredump server. The kernel will set the size member of struct coredump_req allowing the coredump server how much data can be read. The coredump server uses MSG_PEEK to peek the size of struct coredump_req. If the kernel uses a newer struct coredump_req the coredump server just reads the size it knows and discard any remaining bytes in the buffer. If the kernel uses an older struct coredump_req the coredump server just reads the size the kernel knows. The returned struct coredump_req will inform the coredump server what features the kernel supports. The coredump_req->mask member is set to the currently know features. The coredump server may only use features whose bits were raised by the kernel in coredump_req->mask. In response to a coredump_req from the kernel the coredump server sends a struct coredump_ack to the kernel. The kernel informs the coredump server what version of struct coredump_ack it supports by setting struct coredump_req->size_ack to the size it knows about. The coredump server may only send as many bytes as coredump_req->size_ack indicates (a smaller size is fine of course). The coredump server must set coredump_ack->size accordingly. The coredump server sets the features it wants to use in struct coredump_ack->mask. Only bits returned in struct coredump_req->mask may be used. In case an invalid struct coredump_ack is sent to the kernel a non-zero u32 integer is sent indicating the reason for the failure. If it was successful a zero u32 integer is sent. In the initial version the following features are supported in coredump_{req,ack}->mask: * COREDUMP_KERNEL The kernel will write the coredump data to the socket. * COREDUMP_USERSPACE The kernel will not write coredump data but will indicate to the parent that a coredump has been generated. This is used when userspace generates its own coredumps. * COREDUMP_REJECT The kernel will skip generating a coredump for this task. * COREDUMP_WAIT The kernel will prevent the task from exiting until the coredump server has shutdown the socket connection. The flexible coredump socket can be enabled by using the "@@" prefix instead of the single "@" prefix for the regular coredump socket: @@/run/systemd/coredump.socket will enable flexible coredump handling. Current kernels already enforce that "@" must be followed by "/" and will reject anything else. So extending this is backward and forward compatible. Link: https://lore.kernel.org/20250603-work-coredump-socket-protocol-v2-1-05a5f0c18ecc@kernel.org Acked-by: Lennart Poettering Reviewed-by: Alexander Mikhalitsyn Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/coredump.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 168 insertions(+), 27 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index f217ebf2b3b6..b3eaa8c27ced 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -83,15 +84,17 @@ static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; enum coredump_type_t { - COREDUMP_FILE = 1, - COREDUMP_PIPE = 2, - COREDUMP_SOCK = 3, + COREDUMP_FILE = 1, + COREDUMP_PIPE = 2, + COREDUMP_SOCK = 3, + COREDUMP_SOCK_REQ = 4, }; struct core_name { char *corename; int used, size; enum coredump_type_t core_type; + u64 mask; }; static int expand_corename(struct core_name *cn, int size) @@ -235,6 +238,9 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, int pid_in_pattern = 0; int err = 0; + cn->mask = COREDUMP_KERNEL; + if (core_pipe_limit) + cn->mask |= COREDUMP_WAIT; cn->used = 0; cn->corename = NULL; if (*pat_ptr == '|') @@ -264,6 +270,13 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm, pat_ptr++; if (!(*pat_ptr)) return -ENOMEM; + if (*pat_ptr == '@') { + pat_ptr++; + if (!(*pat_ptr)) + return -ENOMEM; + + cn->core_type = COREDUMP_SOCK_REQ; + } err = cn_printf(cn, "%s", pat_ptr); if (err) @@ -632,6 +645,135 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) return 0; } +#ifdef CONFIG_UNIX +static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags) +{ + struct msghdr msg = {}; + struct kvec iov = { .iov_base = ack, .iov_len = size }; + ssize_t ret; + + memset(ack, 0, size); + ret = kernel_recvmsg(sock_from_file(file), &msg, &iov, 1, size, flags); + return ret == size; +} + +static inline bool coredump_sock_send(struct file *file, struct coredump_req *req) +{ + struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; + struct kvec iov = { .iov_base = req, .iov_len = sizeof(*req) }; + ssize_t ret; + + ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(*req)); + return ret == sizeof(*req); +} + +static_assert(sizeof(enum coredump_mark) == sizeof(__u32)); + +static inline bool coredump_sock_mark(struct file *file, enum coredump_mark mark) +{ + struct msghdr msg = { .msg_flags = MSG_NOSIGNAL }; + struct kvec iov = { .iov_base = &mark, .iov_len = sizeof(mark) }; + ssize_t ret; + + ret = kernel_sendmsg(sock_from_file(file), &msg, &iov, 1, sizeof(mark)); + return ret == sizeof(mark); +} + +static inline void coredump_sock_wait(struct file *file) +{ + ssize_t n; + + /* + * We use a simple read to wait for the coredump processing to + * finish. Either the socket is closed or we get sent unexpected + * data. In both cases, we're done. + */ + n = __kernel_read(file, &(char){ 0 }, 1, NULL); + if (n > 0) + coredump_report_failure("Coredump socket had unexpected data"); + else if (n < 0) + coredump_report_failure("Coredump socket failed"); +} + +static inline void coredump_sock_shutdown(struct file *file) +{ + struct socket *socket; + + socket = sock_from_file(file); + if (!socket) + return; + + /* Let userspace know we're done processing the coredump. */ + kernel_sock_shutdown(socket, SHUT_WR); +} + +static bool coredump_request(struct core_name *cn, struct coredump_params *cprm) +{ + struct coredump_req req = { + .size = sizeof(struct coredump_req), + .mask = COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT, + .size_ack = sizeof(struct coredump_ack), + }; + struct coredump_ack ack = {}; + ssize_t usize; + + if (cn->core_type != COREDUMP_SOCK_REQ) + return true; + + /* Let userspace know what we support. */ + if (!coredump_sock_send(cprm->file, &req)) + return false; + + /* Peek the size of the coredump_ack. */ + if (!coredump_sock_recv(cprm->file, &ack, sizeof(ack.size), + MSG_PEEK | MSG_WAITALL)) + return false; + + /* Refuse unknown coredump_ack sizes. */ + usize = ack.size; + if (usize < COREDUMP_ACK_SIZE_VER0) { + coredump_sock_mark(cprm->file, COREDUMP_MARK_MINSIZE); + return false; + } + + if (usize > sizeof(ack)) { + coredump_sock_mark(cprm->file, COREDUMP_MARK_MAXSIZE); + return false; + } + + /* Now retrieve the coredump_ack. */ + if (!coredump_sock_recv(cprm->file, &ack, usize, MSG_WAITALL)) + return false; + if (ack.size != usize) + return false; + + /* Refuse unknown coredump_ack flags. */ + if (ack.mask & ~req.mask) { + coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED); + return false; + } + + /* Refuse mutually exclusive options. */ + if (hweight64(ack.mask & (COREDUMP_USERSPACE | COREDUMP_KERNEL | + COREDUMP_REJECT)) != 1) { + coredump_sock_mark(cprm->file, COREDUMP_MARK_CONFLICTING); + return false; + } + + if (ack.spare) { + coredump_sock_mark(cprm->file, COREDUMP_MARK_UNSUPPORTED); + return false; + } + + cn->mask = ack.mask; + return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK); +} +#else +static inline void coredump_sock_wait(struct file *file) { } +static inline void coredump_sock_shutdown(struct file *file) { } +#endif + void do_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -850,6 +992,8 @@ void do_coredump(const kernel_siginfo_t *siginfo) } break; } + case COREDUMP_SOCK_REQ: + fallthrough; case COREDUMP_SOCK: { #ifdef CONFIG_UNIX struct file *file __free(fput) = NULL; @@ -918,6 +1062,9 @@ void do_coredump(const kernel_siginfo_t *siginfo) cprm.limit = RLIM_INFINITY; cprm.file = no_free_ptr(file); + + if (!coredump_request(&cn, &cprm)) + goto close_fail; #else coredump_report_failure("Core dump socket support %s disabled", cn.corename); goto close_fail; @@ -929,12 +1076,17 @@ void do_coredump(const kernel_siginfo_t *siginfo) goto close_fail; } + /* Don't even generate the coredump. */ + if (cn.mask & COREDUMP_REJECT) + goto close_fail; + /* get us an unshared descriptor table; almost always a no-op */ /* The cell spufs coredump code reads the file descriptor tables */ retval = unshare_files(); if (retval) goto close_fail; - if (!dump_interrupted()) { + + if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) { /* * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would * have this set to NULL. @@ -962,38 +1114,27 @@ void do_coredump(const kernel_siginfo_t *siginfo) free_vma_snapshot(&cprm); } -#ifdef CONFIG_UNIX - /* Let userspace know we're done processing the coredump. */ - if (sock_from_file(cprm.file)) - kernel_sock_shutdown(sock_from_file(cprm.file), SHUT_WR); -#endif + coredump_sock_shutdown(cprm.file); + + /* Let the parent know that a coredump was generated. */ + if (cn.mask & COREDUMP_USERSPACE) + core_dumped = true; /* * When core_pipe_limit is set we wait for the coredump server * or usermodehelper to finish before exiting so it can e.g., * inspect /proc/. */ - if (core_pipe_limit) { + if (cn.mask & COREDUMP_WAIT) { switch (cn.core_type) { case COREDUMP_PIPE: wait_for_dump_helpers(cprm.file); break; -#ifdef CONFIG_UNIX - case COREDUMP_SOCK: { - ssize_t n; - - /* - * We use a simple read to wait for the coredump - * processing to finish. Either the socket is - * closed or we get sent unexpected data. In - * both cases, we're done. - */ - n = __kernel_read(cprm.file, &(char){ 0 }, 1, NULL); - if (n != 0) - coredump_report_failure("Unexpected data on coredump socket"); + case COREDUMP_SOCK_REQ: + fallthrough; + case COREDUMP_SOCK: + coredump_sock_wait(cprm.file); break; - } -#endif default: break; } @@ -1249,8 +1390,8 @@ static inline bool check_coredump_socket(void) if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) return false; - /* Must be an absolute path. */ - if (*(core_pattern + 1) != '/') + /* Must be an absolute path or the socket request. */ + if (*(core_pattern + 1) != '/' && *(core_pattern + 1) != '@') return false; return true; -- cgit v1.2.3 From e04f97c8be29523bae2576fceee84a4b030406fb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 5 Jun 2025 11:53:15 +0200 Subject: coredump: cleanup coredump socket functions We currently use multiple CONFIG_UNIX guards. This looks messy and makes the code harder to follow and maintain. Use a helper function coredump_sock_connect() that handles the connect portion. This allows us to remove the CONFIG_UNIX guard in the main do_coredump() function. Link: https://lore.kernel.org/20250605-schlamm-touren-720ba2b60a85@brauner Signed-off-by: Christian Brauner --- fs/coredump.c | 157 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 83 insertions(+), 74 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index b3eaa8c27ced..3568c300623c 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -646,6 +646,77 @@ static int umh_coredump_setup(struct subprocess_info *info, struct cred *new) } #ifdef CONFIG_UNIX +static bool coredump_sock_connect(struct core_name *cn, struct coredump_params *cprm) +{ + struct file *file __free(fput) = NULL; + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + ssize_t addr_len; + int retval; + struct socket *socket; + + addr_len = strscpy(addr.sun_path, cn->corename); + if (addr_len < 0) + return false; + addr_len += offsetof(struct sockaddr_un, sun_path) + 1; + + /* + * It is possible that the userspace process which is supposed + * to handle the coredump and is listening on the AF_UNIX socket + * coredumps. Userspace should just mark itself non dumpable. + */ + + retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket); + if (retval < 0) + return false; + + file = sock_alloc_file(socket, 0, NULL); + if (IS_ERR(file)) + return false; + + /* + * Set the thread-group leader pid which is used for the peer + * credentials during connect() below. Then immediately register + * it in pidfs... + */ + cprm->pid = task_tgid(current); + retval = pidfs_register_pid(cprm->pid); + if (retval) + return false; + + /* + * ... and set the coredump information so userspace has it + * available after connect()... + */ + pidfs_coredump(cprm); + + retval = kernel_connect(socket, (struct sockaddr *)(&addr), addr_len, + O_NONBLOCK | SOCK_COREDUMP); + /* + * ... Make sure to only put our reference after connect() took + * its own reference keeping the pidfs entry alive ... + */ + pidfs_put_pid(cprm->pid); + + if (retval) { + if (retval == -EAGAIN) + coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path); + else + coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval); + return false; + } + + /* ... and validate that @sk_peer_pid matches @cprm.pid. */ + if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm->pid)) + return false; + + cprm->limit = RLIM_INFINITY; + cprm->file = no_free_ptr(file); + + return true; +} + static inline bool coredump_sock_recv(struct file *file, struct coredump_ack *ack, size_t size, int flags) { struct msghdr msg = {}; @@ -707,7 +778,7 @@ static inline void coredump_sock_shutdown(struct file *file) kernel_sock_shutdown(socket, SHUT_WR); } -static bool coredump_request(struct core_name *cn, struct coredump_params *cprm) +static bool coredump_sock_request(struct core_name *cn, struct coredump_params *cprm) { struct coredump_req req = { .size = sizeof(struct coredump_req), @@ -770,6 +841,14 @@ static bool coredump_request(struct core_name *cn, struct coredump_params *cprm) return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK); } #else +static bool coredump_sock_connect(struct core_name *cn, + struct coredump_params *cprm) +{ + coredump_report_failure("Core dump socket support %s disabled", cn->corename); + return false; +} +static bool coredump_sock_request(struct core_name *cn, + struct coredump_params *cprm) { return false; } static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } #endif @@ -994,83 +1073,13 @@ void do_coredump(const kernel_siginfo_t *siginfo) } case COREDUMP_SOCK_REQ: fallthrough; - case COREDUMP_SOCK: { -#ifdef CONFIG_UNIX - struct file *file __free(fput) = NULL; - struct sockaddr_un addr = { - .sun_family = AF_UNIX, - }; - ssize_t addr_len; - struct socket *socket; - - addr_len = strscpy(addr.sun_path, cn.corename); - if (addr_len < 0) - goto close_fail; - addr_len += offsetof(struct sockaddr_un, sun_path) + 1; - - /* - * It is possible that the userspace process which is - * supposed to handle the coredump and is listening on - * the AF_UNIX socket coredumps. Userspace should just - * mark itself non dumpable. - */ - - retval = sock_create_kern(&init_net, AF_UNIX, SOCK_STREAM, 0, &socket); - if (retval < 0) - goto close_fail; - - file = sock_alloc_file(socket, 0, NULL); - if (IS_ERR(file)) - goto close_fail; - - /* - * Set the thread-group leader pid which is used for the - * peer credentials during connect() below. Then - * immediately register it in pidfs... - */ - cprm.pid = task_tgid(current); - retval = pidfs_register_pid(cprm.pid); - if (retval) - goto close_fail; - - /* - * ... and set the coredump information so userspace - * has it available after connect()... - */ - pidfs_coredump(&cprm); - - retval = kernel_connect(socket, (struct sockaddr *)(&addr), - addr_len, O_NONBLOCK | SOCK_COREDUMP); - - /* - * ... Make sure to only put our reference after connect() took - * its own reference keeping the pidfs entry alive ... - */ - pidfs_put_pid(cprm.pid); - - if (retval) { - if (retval == -EAGAIN) - coredump_report_failure("Coredump socket %s receive queue full", addr.sun_path); - else - coredump_report_failure("Coredump socket connection %s failed %d", addr.sun_path, retval); + case COREDUMP_SOCK: + if (!coredump_sock_connect(&cn, &cprm)) goto close_fail; - } - /* ... and validate that @sk_peer_pid matches @cprm.pid. */ - if (WARN_ON_ONCE(unix_peer(socket->sk)->sk_peer_pid != cprm.pid)) + if (!coredump_sock_request(&cn, &cprm)) goto close_fail; - - cprm.limit = RLIM_INFINITY; - cprm.file = no_free_ptr(file); - - if (!coredump_request(&cn, &cprm)) - goto close_fail; -#else - coredump_report_failure("Core dump socket support %s disabled", cn.corename); - goto close_fail; -#endif break; - } default: WARN_ON_ONCE(true); goto close_fail; -- cgit v1.2.3 From fb4366ba8f1c86a72ffcb2b6f349e05cf77897d0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:15 +0200 Subject: coredump: rename format_corename() It's not really about the name anymore. It parses very distinct information. Reflect that in the name. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-1-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 3568c300623c..79a3c8141e8c 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -225,12 +225,13 @@ put_exe_file: return ret; } -/* format_corename will inspect the pattern parameter, and output a - * name into corename, which must have space for at least - * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. +/* + * coredump_parse will inspect the pattern parameter, and output a name + * into corename, which must have space for at least CORENAME_MAX_SIZE + * bytes plus one byte for the zero terminator. */ -static int format_corename(struct core_name *cn, struct coredump_params *cprm, - size_t **argv, int *argc) +static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; @@ -910,7 +911,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - retval = format_corename(&cn, &cprm, &argv, &argc); + retval = coredump_parse(&cn, &cprm, &argv, &argc); if (retval < 0) { coredump_report_failure("format_corename failed, aborting core"); goto fail_unlock; -- cgit v1.2.3 From a5715af549b2ee0139ff965d337cfd1a5f7ee615 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:16 +0200 Subject: coredump: make coredump_parse() return bool There's no point in returning negative error values. They will never be seen by anyone. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-2-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 79a3c8141e8c..42ceb9db2a5a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -230,8 +230,8 @@ put_exe_file: * into corename, which must have space for at least CORENAME_MAX_SIZE * bytes plus one byte for the zero terminator. */ -static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, - size_t **argv, int *argc) +static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc) { const struct cred *cred = current_cred(); const char *pat_ptr = core_pattern; @@ -251,7 +251,7 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, else cn->core_type = COREDUMP_FILE; if (expand_corename(cn, core_name_size)) - return -ENOMEM; + return false; cn->corename[0] = '\0'; switch (cn->core_type) { @@ -259,33 +259,33 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, int argvs = sizeof(core_pattern) / 2; (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL); if (!(*argv)) - return -ENOMEM; + return false; (*argv)[(*argc)++] = 0; ++pat_ptr; if (!(*pat_ptr)) - return -ENOMEM; + return false; break; } case COREDUMP_SOCK: { /* skip the @ */ pat_ptr++; if (!(*pat_ptr)) - return -ENOMEM; + return false; if (*pat_ptr == '@') { pat_ptr++; if (!(*pat_ptr)) - return -ENOMEM; + return false; cn->core_type = COREDUMP_SOCK_REQ; } err = cn_printf(cn, "%s", pat_ptr); if (err) - return err; + return false; /* Require absolute paths. */ if (cn->corename[0] != '/') - return -EINVAL; + return false; /* * Ensure we can uses spaces to indicate additional @@ -293,7 +293,7 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, */ if (strchr(cn->corename, ' ')) { coredump_report_failure("Coredump socket may not %s contain spaces", cn->corename); - return -EINVAL; + return false; } /* @@ -303,13 +303,13 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, * via /proc/, using the SO_PEERPIDFD to guard * against pid recycling when opening /proc/. */ - return 0; + return true; } case COREDUMP_FILE: break; default: WARN_ON_ONCE(true); - return -EINVAL; + return false; } /* Repeat as long as we have more pattern to process and more output @@ -447,7 +447,7 @@ static int coredump_parse(struct core_name *cn, struct coredump_params *cprm, } if (err) - return err; + return false; } out: @@ -457,9 +457,9 @@ out: * and core_uses_pid is set, then .%pid will be appended to * the filename. Do not do this for piped commands. */ if (cn->core_type == COREDUMP_FILE && !pid_in_pattern && core_uses_pid) - return cn_printf(cn, ".%d", task_tgid_vnr(current)); + return cn_printf(cn, ".%d", task_tgid_vnr(current)) == 0; - return 0; + return true; } static int zap_process(struct signal_struct *signal, int exit_code) @@ -911,8 +911,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - retval = coredump_parse(&cn, &cprm, &argv, &argc); - if (retval < 0) { + if (!coredump_parse(&cn, &cprm, &argv, &argc)) { coredump_report_failure("format_corename failed, aborting core"); goto fail_unlock; } -- cgit v1.2.3 From 67c3a0b0ad1a78d7ee9c3aadaed22561f7f85466 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:17 +0200 Subject: coredump: fix socket path validation Make sure that we keep it extensible and well-formed. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-3-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 42ceb9db2a5a..70e37435eca9 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1399,9 +1399,17 @@ static inline bool check_coredump_socket(void) if (current->nsproxy->mnt_ns != init_task.nsproxy->mnt_ns) return false; - /* Must be an absolute path or the socket request. */ - if (*(core_pattern + 1) != '/' && *(core_pattern + 1) != '@') + /* Must be an absolute path... */ + if (core_pattern[1] != '/') { + /* ... or the socket request protocol... */ + if (core_pattern[1] != '@') + return false; + /* ... and if so must be an absolute path. */ + if (core_pattern[2] != '/') + return false; + /* Anything else is unsupported. */ return false; + } return true; } -- cgit v1.2.3 From 3a2c977c463c68bf6fcd0138d15efa5f3adc743c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:18 +0200 Subject: coredump: validate that path doesn't exceed UNIX_PATH_MAX so we don't pointlessly accepts things that go over the limit. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-4-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 70e37435eca9..a64b87878ab3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1388,6 +1388,8 @@ void validate_coredump_safety(void) static inline bool check_coredump_socket(void) { + const char *p; + if (core_pattern[0] != '@') return true; @@ -1407,10 +1409,15 @@ static inline bool check_coredump_socket(void) /* ... and if so must be an absolute path. */ if (core_pattern[2] != '/') return false; - /* Anything else is unsupported. */ - return false; + p = &core_pattern[2]; + } else { + p = &core_pattern[1]; } + /* The path obviously cannot exceed UNIX_PATH_MAX. */ + if (strlen(p) >= UNIX_PATH_MAX) + return false; + return true; } -- cgit v1.2.3 From edfe3bdbbb52339cd8c2366402f2702c5ebc15c7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:20 +0200 Subject: coredump: don't allow ".." in coredump socket path There's no point in allowing to walk upwards for the coredump socket. We already force userspace to give use a sane path, no symlinks, no magiclinks, and also block "..". Use an absolute path without any shenanigans. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-6-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index a64b87878ab3..8437bdc26d08 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1418,6 +1418,10 @@ static inline bool check_coredump_socket(void) if (strlen(p) >= UNIX_PATH_MAX) return false; + /* Must not contain ".." in the path. */ + if (name_contains_dotdot(core_pattern)) + return false; + return true; } -- cgit v1.2.3 From 6dfc06d328b70af22c577bb908c97f8841b9f4fc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:21 +0200 Subject: coredump: validate socket path in coredump_parse() properly again. Someone might have modified the buffer concurrently. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-7-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 8437bdc26d08..52efd1b34261 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -296,6 +296,17 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, return false; } + /* Must not contain ".." in the path. */ + if (name_contains_dotdot(cn->corename)) { + coredump_report_failure("Coredump socket may not %s contain '..' spaces", cn->corename); + return false; + } + + if (strlen(cn->corename) >= UNIX_PATH_MAX) { + coredump_report_failure("Coredump socket path %s too long", cn->corename); + return false; + } + /* * Currently no need to parse any other options. * Relevant information can be retrieved from the peer -- cgit v1.2.3 From 70e3ee31282d293c794fb5bbec8efe495c32044b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:23 +0200 Subject: coredump: rename do_coredump() to vfs_coredump() Align the naming with the rest of our helpers exposed outside of core vfs. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-9-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 52efd1b34261..8a401eeee940 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -865,7 +865,7 @@ static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } #endif -void do_coredump(const kernel_siginfo_t *siginfo) +void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; struct core_name cn; -- cgit v1.2.3 From 7bbb05dbea38e56d9f6ac7ac1040f93b0808cbce Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:24 +0200 Subject: coredump: split file coredumping into coredump_file() * Move that whole mess into a separate helper instead of having all that hanging around in vfs_coredump() directly. * Stop using that need_suid_safe variable and add an inline helper that clearly communicates what's going on everywhere consistently. The mm flag snapshot is stable and can't change so nothing's gained with that boolean. * Only setup cprm->file once everything else succeeded, using RAII for the coredump file before. That allows to don't care to what goto label we jump in vfs_coredump(). Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-10-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 207 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 106 insertions(+), 101 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 8a401eeee940..9f9d8ae29359 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -865,6 +865,108 @@ static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } #endif +/* cprm->mm_flags contains a stable snapshot of dumpability flags. */ +static inline bool coredump_force_suid_safe(const struct coredump_params *cprm) +{ + /* Require nonrelative corefile path and be extra careful. */ + return __get_dumpable(cprm->mm_flags) == SUID_DUMP_ROOT; +} + +static bool coredump_file(struct core_name *cn, struct coredump_params *cprm, + const struct linux_binfmt *binfmt) +{ + struct mnt_idmap *idmap; + struct inode *inode; + struct file *file __free(fput) = NULL; + int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | O_LARGEFILE | O_EXCL; + + if (cprm->limit < binfmt->min_coredump) + return false; + + if (coredump_force_suid_safe(cprm) && cn->corename[0] != '/') { + coredump_report_failure("this process can only dump core to a fully qualified path, skipping core dump"); + return false; + } + + /* + * Unlink the file if it exists unless this is a SUID + * binary - in that case, we're running around with root + * privs and don't want to unlink another user's coredump. + */ + if (!coredump_force_suid_safe(cprm)) { + /* + * If it doesn't exist, that's fine. If there's some + * other problem, we'll catch it at the filp_open(). + */ + do_unlinkat(AT_FDCWD, getname_kernel(cn->corename)); + } + + /* + * There is a race between unlinking and creating the + * file, but if that causes an EEXIST here, that's + * fine - another process raced with us while creating + * the corefile, and the other process won. To userspace, + * what matters is that at least one of the two processes + * writes its coredump successfully, not which one. + */ + if (coredump_force_suid_safe(cprm)) { + /* + * Using user namespaces, normal user tasks can change + * their current->fs->root to point to arbitrary + * directories. Since the intention of the "only dump + * with a fully qualified path" rule is to control where + * coredumps may be placed using root privileges, + * current->fs->root must not be used. Instead, use the + * root directory of init_task. + */ + struct path root; + + task_lock(&init_task); + get_fs_root(init_task.fs, &root); + task_unlock(&init_task); + file = file_open_root(&root, cn->corename, open_flags, 0600); + path_put(&root); + } else { + file = filp_open(cn->corename, open_flags, 0600); + } + if (IS_ERR(file)) + return false; + + inode = file_inode(file); + if (inode->i_nlink > 1) + return false; + if (d_unhashed(file->f_path.dentry)) + return false; + /* + * AK: actually i see no reason to not allow this for named + * pipes etc, but keep the previous behaviour for now. + */ + if (!S_ISREG(inode->i_mode)) + return false; + /* + * Don't dump core if the filesystem changed owner or mode + * of the file during file creation. This is an issue when + * a process dumps core while its cwd is e.g. on a vfat + * filesystem. + */ + idmap = file_mnt_idmap(file); + if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), current_fsuid())) { + coredump_report_failure("Core dump to %s aborted: cannot preserve file owner", cn->corename); + return false; + } + if ((inode->i_mode & 0677) != 0600) { + coredump_report_failure("Core dump to %s aborted: cannot preserve file permissions", cn->corename); + return false; + } + if (!(file->f_mode & FMODE_CAN_WRITE)) + return false; + if (do_truncate(idmap, file->f_path.dentry, 0, 0, file)) + return false; + + cprm->file = no_free_ptr(file); + return true; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -876,8 +978,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) int retval = 0; size_t *argv = NULL; int argc = 0; - /* require nonrelative corefile path and be extra careful */ - bool need_suid_safe = false; bool core_dumped = false; static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { @@ -910,11 +1010,8 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) * so we dump it as root in mode 2, and only into a controlled * environment (pipe handler or fully qualified path). */ - if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) { - /* Setuid core dump mode */ - cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */ - need_suid_safe = true; - } + if (coredump_force_suid_safe(&cprm)) + cred->fsuid = GLOBAL_ROOT_UID; retval = coredump_wait(siginfo->si_signo, &core_state); if (retval < 0) @@ -928,102 +1025,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } switch (cn.core_type) { - case COREDUMP_FILE: { - struct mnt_idmap *idmap; - struct inode *inode; - int open_flags = O_CREAT | O_WRONLY | O_NOFOLLOW | - O_LARGEFILE | O_EXCL; - - if (cprm.limit < binfmt->min_coredump) - goto fail_unlock; - - if (need_suid_safe && cn.corename[0] != '/') { - coredump_report_failure( - "this process can only dump core to a fully qualified path, skipping core dump"); - goto fail_unlock; - } - - /* - * Unlink the file if it exists unless this is a SUID - * binary - in that case, we're running around with root - * privs and don't want to unlink another user's coredump. - */ - if (!need_suid_safe) { - /* - * If it doesn't exist, that's fine. If there's some - * other problem, we'll catch it at the filp_open(). - */ - do_unlinkat(AT_FDCWD, getname_kernel(cn.corename)); - } - - /* - * There is a race between unlinking and creating the - * file, but if that causes an EEXIST here, that's - * fine - another process raced with us while creating - * the corefile, and the other process won. To userspace, - * what matters is that at least one of the two processes - * writes its coredump successfully, not which one. - */ - if (need_suid_safe) { - /* - * Using user namespaces, normal user tasks can change - * their current->fs->root to point to arbitrary - * directories. Since the intention of the "only dump - * with a fully qualified path" rule is to control where - * coredumps may be placed using root privileges, - * current->fs->root must not be used. Instead, use the - * root directory of init_task. - */ - struct path root; - - task_lock(&init_task); - get_fs_root(init_task.fs, &root); - task_unlock(&init_task); - cprm.file = file_open_root(&root, cn.corename, - open_flags, 0600); - path_put(&root); - } else { - cprm.file = filp_open(cn.corename, open_flags, 0600); - } - if (IS_ERR(cprm.file)) - goto fail_unlock; - - inode = file_inode(cprm.file); - if (inode->i_nlink > 1) - goto close_fail; - if (d_unhashed(cprm.file->f_path.dentry)) - goto close_fail; - /* - * AK: actually i see no reason to not allow this for named - * pipes etc, but keep the previous behaviour for now. - */ - if (!S_ISREG(inode->i_mode)) - goto close_fail; - /* - * Don't dump core if the filesystem changed owner or mode - * of the file during file creation. This is an issue when - * a process dumps core while its cwd is e.g. on a vfat - * filesystem. - */ - idmap = file_mnt_idmap(cprm.file); - if (!vfsuid_eq_kuid(i_uid_into_vfsuid(idmap, inode), - current_fsuid())) { - coredump_report_failure("Core dump to %s aborted: " - "cannot preserve file owner", cn.corename); - goto close_fail; - } - if ((inode->i_mode & 0677) != 0600) { - coredump_report_failure("Core dump to %s aborted: " - "cannot preserve file permissions", cn.corename); - goto close_fail; - } - if (!(cprm.file->f_mode & FMODE_CAN_WRITE)) - goto close_fail; - if (do_truncate(idmap, cprm.file->f_path.dentry, - 0, 0, cprm.file)) + case COREDUMP_FILE: + if (!coredump_file(&cn, &cprm, binfmt)) goto close_fail; break; - } case COREDUMP_PIPE: { int argi; int dump_count; -- cgit v1.2.3 From a961c737cda8f172e108da881691cadafb9a061e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:25 +0200 Subject: coredump: prepare to simplify exit paths The exit path is currently entangled with core pipe limit accounting which is really unpleasant. Use a local variable in struct core_name that remembers whether the count was incremented and if so to clean decrement in once the coredump is done. Assert that this only happens for pipes. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-11-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 9f9d8ae29359..4afaf792a12e 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -93,6 +93,7 @@ enum coredump_type_t { struct core_name { char *corename; int used, size; + unsigned int core_pipe_limit; enum coredump_type_t core_type; u64 mask; }; @@ -244,6 +245,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, cn->mask |= COREDUMP_WAIT; cn->used = 0; cn->corename = NULL; + cn->core_pipe_limit = 0; if (*pat_ptr == '|') cn->core_type = COREDUMP_PIPE; else if (*pat_ptr == '@') @@ -1031,7 +1033,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) break; case COREDUMP_PIPE: { int argi; - int dump_count; char **helper_argv; struct subprocess_info *sub_info; @@ -1052,21 +1053,21 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) * core_pattern process dies. */ coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); - goto fail_unlock; + goto close_fail; } cprm.limit = RLIM_INFINITY; - dump_count = atomic_inc_return(&core_dump_count); - if (core_pipe_limit && (core_pipe_limit < dump_count)) { + cn.core_pipe_limit = atomic_inc_return(&core_dump_count); + if (core_pipe_limit && (core_pipe_limit < cn.core_pipe_limit)) { coredump_report_failure("over core_pipe_limit, skipping core dump"); - goto fail_dropcount; + goto close_fail; } helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL); if (!helper_argv) { coredump_report_failure("%s failed to allocate memory", __func__); - goto fail_dropcount; + goto close_fail; } for (argi = 0; argi < argc; argi++) helper_argv[argi] = cn.corename + argv[argi]; @@ -1168,9 +1169,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) close_fail: if (cprm.file) filp_close(cprm.file, NULL); -fail_dropcount: - if (cn.core_type == COREDUMP_PIPE) + if (cn.core_pipe_limit) { + VFS_WARN_ON_ONCE(cn.core_type != COREDUMP_PIPE); atomic_dec(&core_dump_count); + } fail_unlock: kfree(argv); kfree(cn.corename); -- cgit v1.2.3 From 4f599219f71399ac2092d2e06b2cc38e50c45c53 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:26 +0200 Subject: coredump: move core_pipe_count to global variable The pipe coredump counter is a static local variable instead of a global variable like all of the rest. Move it to a global variable so it's all consistent. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-12-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 4afaf792a12e..c863e053b1f8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -82,6 +82,7 @@ static unsigned int core_sort_vma; static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; unsigned int core_file_note_size_limit = CORE_FILE_NOTE_SIZE_DEFAULT; +static atomic_t core_pipe_count = ATOMIC_INIT(0); enum coredump_type_t { COREDUMP_FILE = 1, @@ -981,7 +982,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) size_t *argv = NULL; int argc = 0; bool core_dumped = false; - static atomic_t core_dump_count = ATOMIC_INIT(0); struct coredump_params cprm = { .siginfo = siginfo, .limit = rlimit(RLIMIT_CORE), @@ -1057,7 +1057,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } cprm.limit = RLIM_INFINITY; - cn.core_pipe_limit = atomic_inc_return(&core_dump_count); + cn.core_pipe_limit = atomic_inc_return(&core_pipe_count); if (core_pipe_limit && (core_pipe_limit < cn.core_pipe_limit)) { coredump_report_failure("over core_pipe_limit, skipping core dump"); goto close_fail; @@ -1171,7 +1171,7 @@ close_fail: filp_close(cprm.file, NULL); if (cn.core_pipe_limit) { VFS_WARN_ON_ONCE(cn.core_type != COREDUMP_PIPE); - atomic_dec(&core_dump_count); + atomic_dec(&core_pipe_count); } fail_unlock: kfree(argv); -- cgit v1.2.3 From 9f29a347d7b1b2022dfc6e5a93d4f2a7b34f5d4d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:27 +0200 Subject: coredump: split pipe coredumping into coredump_pipe() * Move that whole mess into a separate helper instead of having all that hanging around in vfs_coredump() directly. Cleanup paths are already centralized. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-13-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 114 ++++++++++++++++++++++++++++++---------------------------- 1 file changed, 59 insertions(+), 55 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index c863e053b1f8..f4f7f0a0ae40 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -970,6 +970,63 @@ static bool coredump_file(struct core_name *cn, struct coredump_params *cprm, return true; } +static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, + size_t *argv, int argc) +{ + int argi; + char **helper_argv __free(kfree) = NULL; + struct subprocess_info *sub_info; + + if (cprm->limit == 1) { + /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. + * + * Normally core limits are irrelevant to pipes, since + * we're not writing to the file system, but we use + * cprm.limit of 1 here as a special value, this is a + * consistent way to catch recursive crashes. + * We can still crash if the core_pattern binary sets + * RLIM_CORE = !1, but it runs as root, and can do + * lots of stupid things. + * + * Note that we use task_tgid_vnr here to grab the pid + * of the process group leader. That way we get the + * right pid if a thread in a multi-threaded + * core_pattern process dies. + */ + coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); + return false; + } + cprm->limit = RLIM_INFINITY; + + cn->core_pipe_limit = atomic_inc_return(&core_pipe_count); + if (core_pipe_limit && (core_pipe_limit < cn->core_pipe_limit)) { + coredump_report_failure("over core_pipe_limit, skipping core dump"); + return false; + } + + helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), GFP_KERNEL); + if (!helper_argv) { + coredump_report_failure("%s failed to allocate memory", __func__); + return false; + } + for (argi = 0; argi < argc; argi++) + helper_argv[argi] = cn->corename + argv[argi]; + helper_argv[argi] = NULL; + + sub_info = call_usermodehelper_setup(helper_argv[0], helper_argv, NULL, + GFP_KERNEL, umh_coredump_setup, + NULL, cprm); + if (!sub_info) + return false; + + if (call_usermodehelper_exec(sub_info, UMH_WAIT_EXEC)) { + coredump_report_failure("|%s pipe failed", cn->corename); + return false; + } + + return true; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -1031,63 +1088,10 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (!coredump_file(&cn, &cprm, binfmt)) goto close_fail; break; - case COREDUMP_PIPE: { - int argi; - char **helper_argv; - struct subprocess_info *sub_info; - - if (cprm.limit == 1) { - /* See umh_coredump_setup() which sets RLIMIT_CORE = 1. - * - * Normally core limits are irrelevant to pipes, since - * we're not writing to the file system, but we use - * cprm.limit of 1 here as a special value, this is a - * consistent way to catch recursive crashes. - * We can still crash if the core_pattern binary sets - * RLIM_CORE = !1, but it runs as root, and can do - * lots of stupid things. - * - * Note that we use task_tgid_vnr here to grab the pid - * of the process group leader. That way we get the - * right pid if a thread in a multi-threaded - * core_pattern process dies. - */ - coredump_report_failure("RLIMIT_CORE is set to 1, aborting core"); - goto close_fail; - } - cprm.limit = RLIM_INFINITY; - - cn.core_pipe_limit = atomic_inc_return(&core_pipe_count); - if (core_pipe_limit && (core_pipe_limit < cn.core_pipe_limit)) { - coredump_report_failure("over core_pipe_limit, skipping core dump"); + case COREDUMP_PIPE: + if (!coredump_pipe(&cn, &cprm, argv, argc)) goto close_fail; - } - - helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv), - GFP_KERNEL); - if (!helper_argv) { - coredump_report_failure("%s failed to allocate memory", __func__); - goto close_fail; - } - for (argi = 0; argi < argc; argi++) - helper_argv[argi] = cn.corename + argv[argi]; - helper_argv[argi] = NULL; - - retval = -ENOMEM; - sub_info = call_usermodehelper_setup(helper_argv[0], - helper_argv, NULL, GFP_KERNEL, - umh_coredump_setup, NULL, &cprm); - if (sub_info) - retval = call_usermodehelper_exec(sub_info, - UMH_WAIT_EXEC); - - kfree(helper_argv); - if (retval) { - coredump_report_failure("|%s pipe failed", cn.corename); - goto close_fail; - } break; - } case COREDUMP_SOCK_REQ: fallthrough; case COREDUMP_SOCK: -- cgit v1.2.3 From 5153053692987035a82bb4a6714ea12a5bd2bfdc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:28 +0200 Subject: coredump: move pipe specific file check into coredump_pipe() There's no point in having this eyesore in the middle of vfs_coredump(). Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-14-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index f4f7f0a0ae40..1e05d831cda8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1024,6 +1024,15 @@ static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, return false; } + /* + * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would + * have this set to NULL. + */ + if (!cprm->file) { + coredump_report_failure("Core dump to |%s disabled", cn->corename); + return false; + } + return true; } @@ -1117,14 +1126,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) goto close_fail; if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) { - /* - * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would - * have this set to NULL. - */ - if (!cprm.file) { - coredump_report_failure("Core dump to |%s disabled", cn.corename); - goto close_fail; - } if (!dump_vma_snapshot(&cprm)) goto close_fail; -- cgit v1.2.3 From d6527db34d08d7d411c377a25c05fff30eeba9ea Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:29 +0200 Subject: coredump: use a single helper for the socket Don't split it into multiple functions. Just use a single one like we do for coredump_file() and coredump_pipe() now. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-15-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 1e05d831cda8..48d90ec8c86a 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -855,17 +855,18 @@ static bool coredump_sock_request(struct core_name *cn, struct coredump_params * cn->mask = ack.mask; return coredump_sock_mark(cprm->file, COREDUMP_MARK_REQACK); } -#else -static bool coredump_sock_connect(struct core_name *cn, - struct coredump_params *cprm) + +static bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { - coredump_report_failure("Core dump socket support %s disabled", cn->corename); - return false; + if (!coredump_sock_connect(cn, cprm)) + return false; + + return coredump_sock_request(cn, cprm); } -static bool coredump_sock_request(struct core_name *cn, - struct coredump_params *cprm) { return false; } +#else static inline void coredump_sock_wait(struct file *file) { } static inline void coredump_sock_shutdown(struct file *file) { } +static inline bool coredump_socket(struct core_name *cn, struct coredump_params *cprm) { return false; } #endif /* cprm->mm_flags contains a stable snapshot of dumpability flags. */ @@ -1104,10 +1105,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) case COREDUMP_SOCK_REQ: fallthrough; case COREDUMP_SOCK: - if (!coredump_sock_connect(&cn, &cprm)) - goto close_fail; - - if (!coredump_sock_request(&cn, &cprm)) + if (!coredump_socket(&cn, &cprm)) goto close_fail; break; default: -- cgit v1.2.3 From 3a4db72d0368c5f618e18a12580d5c5dca7b2839 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:30 +0200 Subject: coredump: add coredump_write() to encapsulate that logic simplifying vfs_coredump() even further. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-16-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 56 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 22 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 48d90ec8c86a..dea4823b55b8 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -95,6 +95,7 @@ struct core_name { char *corename; int used, size; unsigned int core_pipe_limit; + bool core_dumped; enum coredump_type_t core_type; u64 mask; }; @@ -247,6 +248,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, cn->used = 0; cn->corename = NULL; cn->core_pipe_limit = 0; + cn->core_dumped = false; if (*pat_ptr == '|') cn->core_type = COREDUMP_PIPE; else if (*pat_ptr == '@') @@ -1037,6 +1039,34 @@ static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, return true; } +static bool coredump_write(struct core_name *cn, + struct coredump_params *cprm, + struct linux_binfmt *binfmt) +{ + + if (dump_interrupted()) + return true; + + if (!dump_vma_snapshot(cprm)) + return false; + + file_start_write(cprm->file); + cn->core_dumped = binfmt->core_dump(cprm); + /* + * Ensures that file size is big enough to contain the current + * file postion. This prevents gdb from complaining about + * a truncated file if the last "write" to the file was + * dump_skip. + */ + if (cprm->to_skip) { + cprm->to_skip--; + dump_emit(cprm, "", 1); + } + file_end_write(cprm->file); + free_vma_snapshot(cprm); + return true; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -1048,7 +1078,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) int retval = 0; size_t *argv = NULL; int argc = 0; - bool core_dumped = false; struct coredump_params cprm = { .siginfo = siginfo, .limit = rlimit(RLIMIT_CORE), @@ -1123,31 +1152,14 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (retval) goto close_fail; - if ((cn.mask & COREDUMP_KERNEL) && !dump_interrupted()) { - if (!dump_vma_snapshot(&cprm)) - goto close_fail; - - file_start_write(cprm.file); - core_dumped = binfmt->core_dump(&cprm); - /* - * Ensures that file size is big enough to contain the current - * file postion. This prevents gdb from complaining about - * a truncated file if the last "write" to the file was - * dump_skip. - */ - if (cprm.to_skip) { - cprm.to_skip--; - dump_emit(&cprm, "", 1); - } - file_end_write(cprm.file); - free_vma_snapshot(&cprm); - } + if ((cn.mask & COREDUMP_KERNEL) && !coredump_write(&cn, &cprm, binfmt)) + goto close_fail; coredump_sock_shutdown(cprm.file); /* Let the parent know that a coredump was generated. */ if (cn.mask & COREDUMP_USERSPACE) - core_dumped = true; + cn.core_dumped = true; /* * When core_pipe_limit is set we wait for the coredump server @@ -1179,7 +1191,7 @@ close_fail: fail_unlock: kfree(argv); kfree(cn.corename); - coredump_finish(core_dumped); + coredump_finish(cn.core_dumped); revert_creds(old_cred); fail_creds: put_cred(cred); -- cgit v1.2.3 From 4a9f5d7fb6649af534c36aa8cc9c1aa51b172ad9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:31 +0200 Subject: coredump: auto cleanup argv to prepare for a simpler exit path. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-17-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index dea4823b55b8..68da77d00170 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1076,7 +1076,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) const struct cred *old_cred; struct cred *cred; int retval = 0; - size_t *argv = NULL; + size_t *argv __free(kfree) = NULL; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, @@ -1189,7 +1189,6 @@ close_fail: atomic_dec(&core_pipe_count); } fail_unlock: - kfree(argv); kfree(cn.corename); coredump_finish(cn.core_dumped); revert_creds(old_cred); -- cgit v1.2.3 From 8434fac512d35597488b13e23cc85e2903f5c8ae Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:32 +0200 Subject: coredump: directly return instead of jumping to a pointless cleanup label. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-18-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 68da77d00170..b2e9ac34d9a3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1095,13 +1095,13 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) binfmt = mm->binfmt; if (!binfmt || !binfmt->core_dump) - goto fail; + return; if (!__get_dumpable(cprm.mm_flags)) - goto fail; + return; cred = prepare_creds(); if (!cred) - goto fail; + return; /* * We cannot trust fsuid as being the "true" uid of the process * nor do we know its entire history. We only know it was tainted @@ -1194,7 +1194,6 @@ fail_unlock: revert_creds(old_cred); fail_creds: put_cred(cred); -fail: return; } -- cgit v1.2.3 From 7a568fcdad7c75a1ee196921cf651de607c2c5d5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:34 +0200 Subject: coredump: auto cleanup prepare_creds() which will allow us to simplify the exit path in further patches. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-20-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index b2e9ac34d9a3..e9c8696283f6 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1074,7 +1074,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; - struct cred *cred; + struct cred *cred __free(put_cred) = NULL; int retval = 0; size_t *argv __free(kfree) = NULL; int argc = 0; @@ -1113,7 +1113,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) retval = coredump_wait(siginfo->si_signo, &core_state); if (retval < 0) - goto fail_creds; + return; old_cred = override_creds(cred); @@ -1192,8 +1192,6 @@ fail_unlock: kfree(cn.corename); coredump_finish(cn.core_dumped); revert_creds(old_cred); -fail_creds: - put_cred(cred); return; } -- cgit v1.2.3 From cfd6c12293d7cc257f27770399a3d8f11bf7d25c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:35 +0200 Subject: coredump: add coredump_cleanup() Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-21-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index e9c8696283f6..9c028ef087d4 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1067,6 +1067,18 @@ static bool coredump_write(struct core_name *cn, return true; } +static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) +{ + if (cprm->file) + filp_close(cprm->file, NULL); + if (cn->core_pipe_limit) { + VFS_WARN_ON_ONCE(cn->core_type != COREDUMP_PIPE); + atomic_dec(&core_pipe_count); + } + kfree(cn->corename); + coredump_finish(cn->core_dumped); +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct core_state core_state; @@ -1119,7 +1131,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (!coredump_parse(&cn, &cprm, &argv, &argc)) { coredump_report_failure("format_corename failed, aborting core"); - goto fail_unlock; + goto close_fail; } switch (cn.core_type) { @@ -1182,15 +1194,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } close_fail: - if (cprm.file) - filp_close(cprm.file, NULL); - if (cn.core_pipe_limit) { - VFS_WARN_ON_ONCE(cn.core_type != COREDUMP_PIPE); - atomic_dec(&core_pipe_count); - } -fail_unlock: - kfree(cn.corename); - coredump_finish(cn.core_dumped); + coredump_cleanup(&cn, &cprm); revert_creds(old_cred); return; } -- cgit v1.2.3 From ae20958b37acf82da4928910ca6351719b5ddba7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:36 +0200 Subject: coredump: order auto cleanup variables at the top so they're easy to spot. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-22-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 9c028ef087d4..d3f09bf71f5f 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1081,14 +1081,14 @@ static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) void vfs_coredump(const kernel_siginfo_t *siginfo) { + struct cred *cred __free(put_cred) = NULL; + size_t *argv __free(kfree) = NULL; struct core_state core_state; struct core_name cn; struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; - struct cred *cred __free(put_cred) = NULL; int retval = 0; - size_t *argv __free(kfree) = NULL; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, -- cgit v1.2.3 From 9dd88f3626462e4ffd008196e053d4004e44427b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:37 +0200 Subject: coredump: avoid pointless variable we don't use that value at all so don't bother with it in the first place. Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-23-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index d3f09bf71f5f..178eddbcd6ad 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1088,7 +1088,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct mm_struct *mm = current->mm; struct linux_binfmt * binfmt; const struct cred *old_cred; - int retval = 0; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, @@ -1123,8 +1122,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (coredump_force_suid_safe(&cprm)) cred->fsuid = GLOBAL_ROOT_UID; - retval = coredump_wait(siginfo->si_signo, &core_state); - if (retval < 0) + if (coredump_wait(siginfo->si_signo, &core_state) < 0) return; old_cred = override_creds(cred); @@ -1160,8 +1158,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) /* get us an unshared descriptor table; almost always a no-op */ /* The cell spufs coredump code reads the file descriptor tables */ - retval = unshare_files(); - if (retval) + if (unshare_files()) goto close_fail; if ((cn.mask & COREDUMP_KERNEL) && !coredump_write(&cn, &cprm, binfmt)) -- cgit v1.2.3 From da9029b47d790675dcd82a6d9e332bc41e1a17f1 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 12 Jun 2025 15:25:38 +0200 Subject: coredump: add coredump_skip() helper Link: https://lore.kernel.org/20250612-work-coredump-massage-v1-24-315c0c34ba94@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/coredump.c b/fs/coredump.c index 178eddbcd6ad..fadf9d4be2e1 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1079,6 +1079,18 @@ static void coredump_cleanup(struct core_name *cn, struct coredump_params *cprm) coredump_finish(cn->core_dumped); } +static inline bool coredump_skip(const struct coredump_params *cprm, + const struct linux_binfmt *binfmt) +{ + if (!binfmt) + return true; + if (!binfmt->core_dump) + return true; + if (!__get_dumpable(cprm->mm_flags)) + return true; + return false; +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct cred *cred __free(put_cred) = NULL; @@ -1086,7 +1098,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct core_state core_state; struct core_name cn; struct mm_struct *mm = current->mm; - struct linux_binfmt * binfmt; + struct linux_binfmt *binfmt = mm->binfmt; const struct cred *old_cred; int argc = 0; struct coredump_params cprm = { @@ -1104,10 +1116,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) audit_core_dumps(siginfo->si_signo); - binfmt = mm->binfmt; - if (!binfmt || !binfmt->core_dump) - return; - if (!__get_dumpable(cprm.mm_flags)) + if (coredump_skip(&cprm, binfmt)) return; cred = prepare_creds(); -- cgit v1.2.3