From f14c234b4bc5184fd40d9a47830e5b32c3b36d49 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Tue, 1 Oct 2019 11:10:53 +1000 Subject: clone3: switch to copy_struct_from_user() Switch clone3() syscall from it's own copying struct clone_args from userspace to the new dedicated copy_struct_from_user() helper. The change is very straightforward, and helps unify the syscall interface for struct-from-userspace syscalls. Additionally, explicitly define CLONE_ARGS_SIZE_VER0 to match the other users of the struct-extension pattern. Signed-off-by: Aleksa Sarai Reviewed-by: Kees Cook Reviewed-by: Christian Brauner [christian.brauner@ubuntu.com: improve commit message] Link: https://lore.kernel.org/r/20191001011055.19283-3-cyphar@cyphar.com Signed-off-by: Christian Brauner --- kernel/fork.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index f9572f416126..2ef529869c64 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2525,39 +2525,19 @@ SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, #ifdef __ARCH_WANT_SYS_CLONE3 noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs, struct clone_args __user *uargs, - size_t size) + size_t usize) { + int err; struct clone_args args; - if (unlikely(size > PAGE_SIZE)) + if (unlikely(usize > PAGE_SIZE)) return -E2BIG; - - if (unlikely(size < sizeof(struct clone_args))) + if (unlikely(usize < CLONE_ARGS_SIZE_VER0)) return -EINVAL; - if (unlikely(!access_ok(uargs, size))) - return -EFAULT; - - if (size > sizeof(struct clone_args)) { - unsigned char __user *addr; - unsigned char __user *end; - unsigned char val; - - addr = (void __user *)uargs + sizeof(struct clone_args); - end = (void __user *)uargs + size; - - for (; addr < end; addr++) { - if (get_user(val, addr)) - return -EFAULT; - if (val) - return -E2BIG; - } - - size = sizeof(struct clone_args); - } - - if (copy_from_user(&args, uargs, size)) - return -EFAULT; + err = copy_struct_from_user(&args, sizeof(args), uargs, usize); + if (err) + return err; /* * Verify that higher 32bits of exit_signal are unset and that -- cgit v1.2.3 From 501bd0166eb949820c8e4204e62aaee5c89c84d9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 27 Sep 2019 17:28:42 +0200 Subject: fork: add kernel-doc for clone3 Add kernel-doc for the clone3() syscall. Link: https://lore.kernel.org/r/20191001114701.24661-2-christian.brauner@ubuntu.com Signed-off-by: Christian Brauner --- kernel/fork.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index f9572f416126..bf11cf39579a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2604,6 +2604,17 @@ static bool clone3_args_valid(const struct kernel_clone_args *kargs) return true; } +/** + * clone3 - create a new process with specific properties + * @uargs: argument structure + * @size: size of @uargs + * + * clone3() is the extensible successor to clone()/clone2(). + * It takes a struct as argument that is versioned by its size. + * + * Return: On success, a positive PID for the child process. + * On error, a negative errno number. + */ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size) { int err; -- cgit v1.2.3 From b0f53dbc4bc4c371f38b14c391095a3bb8a0bb40 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sun, 6 Oct 2019 17:58:19 -0700 Subject: kernel/sysctl.c: do not override max_threads provided by userspace Partially revert 16db3d3f1170 ("kernel/sysctl.c: threads-max observe limits") because the patch is causing a regression to any workload which needs to override the auto-tuning of the limit provided by kernel. set_max_threads is implementing a boot time guesstimate to provide a sensible limit of the concurrently running threads so that runaways will not deplete all the memory. This is a good thing in general but there are workloads which might need to increase this limit for an application to run (reportedly WebSpher MQ is affected) and that is simply not possible after the mentioned change. It is also very dubious to override an admin decision by an estimation that doesn't have any direct relation to correctness of the kernel operation. Fix this by dropping set_max_threads from sysctl_max_threads so any value is accepted as long as it fits into MAX_THREADS which is important to check because allowing more threads could break internal robust futex restriction. While at it, do not use MIN_THREADS as the lower boundary because it is also only a heuristic for automatic estimation and admin might have a good reason to stop new threads to be created even when below this limit. This became more severe when we switched x86 from 4k to 8k kernel stacks. Starting since 6538b8ea886e ("x86_64: expand kernel stack to 16K") (3.16) we use THREAD_SIZE_ORDER = 2 and that halved the auto-tuned value. In the particular case 3.12 kernel.threads-max = 515561 4.4 kernel.threads-max = 200000 Neither of the two values is really insane on 32GB machine. I am not sure we want/need to tune the max_thread value further. If anything the tuning should be removed altogether if proven not useful in general. But we definitely need a way to override this auto-tuning. Link: http://lkml.kernel.org/r/20190922065801.GB18814@dhcp22.suse.cz Fixes: 16db3d3f1170 ("kernel/sysctl.c: threads-max observe limits") Signed-off-by: Michal Hocko Reviewed-by: "Eric W. Biederman" Cc: Heinrich Schuchardt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 1f6c45f6a734..bcdf53125210 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2925,7 +2925,7 @@ int sysctl_max_threads(struct ctl_table *table, int write, struct ctl_table t; int ret; int threads = max_threads; - int min = MIN_THREADS; + int min = 1; int max = MAX_THREADS; t = *table; @@ -2937,7 +2937,7 @@ int sysctl_max_threads(struct ctl_table *table, int write, if (ret || !write) return ret; - set_max_threads(threads); + max_threads = threads; return 0; } -- cgit v1.2.3