From 6b65c268df798c5c3533ff2e4962533d020a37d9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 7 Mar 2019 16:29:43 -0800 Subject: sysctl: handle overflow for file-max [ Upstream commit 32a5ad9c22852e6bd9e74bdec5934ef9d1480bc5 ] Currently, when writing echo 18446744073709551616 > /proc/sys/fs/file-max /proc/sys/fs/file-max will overflow and be set to 0. That quickly crashes the system. This commit sets the max and min value for file-max. The max value is set to long int. Any higher value cannot currently be used as the percpu counters are long ints and not unsigned integers. Note that the file-max value is ultimately parsed via __do_proc_doulongvec_minmax(). This function does not report error when min or max are exceeded. Which means if a value largen that long int is written userspace will not receive an error instead the old value will be kept. There is an argument to be made that this should be changed and __do_proc_doulongvec_minmax() should return an error when a dedicated min or max value are exceeded. However this has the potential to break userspace so let's defer this to an RFC patch. Link: http://lkml.kernel.org/r/20190107222700.15954-3-christian@brauner.io Signed-off-by: Christian Brauner Acked-by: Kees Cook Cc: Alexey Dobriyan Cc: Al Viro Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Joe Lawrence Cc: Luis Chamberlain Cc: Waiman Long [christian@brauner.io: v4] Link: http://lkml.kernel.org/r/20190210203943.8227-3-christian@brauner.io Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sysctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index efd340a510a9..5515d578095b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -125,6 +125,7 @@ static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; static unsigned long one_ul = 1; +static unsigned long long_max = LONG_MAX; static int one_hundred = 100; static int one_thousand = 1000; #ifdef CONFIG_PRINTK @@ -1682,6 +1683,8 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, + .extra1 = &zero, + .extra2 = &long_max, }, { .procname = "nr_open", -- cgit v1.2.3 From 3141fcc89f6695e75633e434fe93784cd31d0d22 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Apr 2019 18:39:38 -0700 Subject: kernel/sysctl.c: fix out-of-bounds access when setting file-max commit 9002b21465fa4d829edfc94a5a441005cffaa972 upstream. Commit 32a5ad9c2285 ("sysctl: handle overflow for file-max") hooked up min/max values for the file-max sysctl parameter via the .extra1 and .extra2 fields in the corresponding struct ctl_table entry. Unfortunately, the minimum value points at the global 'zero' variable, which is an int. This results in a KASAN splat when accessed as a long by proc_doulongvec_minmax on 64-bit architectures: | BUG: KASAN: global-out-of-bounds in __do_proc_doulongvec_minmax+0x5d8/0x6a0 | Read of size 8 at addr ffff2000133d1c20 by task systemd/1 | | CPU: 0 PID: 1 Comm: systemd Not tainted 5.1.0-rc3-00012-g40b114779944 #2 | Hardware name: linux,dummy-virt (DT) | Call trace: | dump_backtrace+0x0/0x228 | show_stack+0x14/0x20 | dump_stack+0xe8/0x124 | print_address_description+0x60/0x258 | kasan_report+0x140/0x1a0 | __asan_report_load8_noabort+0x18/0x20 | __do_proc_doulongvec_minmax+0x5d8/0x6a0 | proc_doulongvec_minmax+0x4c/0x78 | proc_sys_call_handler.isra.19+0x144/0x1d8 | proc_sys_write+0x34/0x58 | __vfs_write+0x54/0xe8 | vfs_write+0x124/0x3c0 | ksys_write+0xbc/0x168 | __arm64_sys_write+0x68/0x98 | el0_svc_common+0x100/0x258 | el0_svc_handler+0x48/0xc0 | el0_svc+0x8/0xc | | The buggy address belongs to the variable: | zero+0x0/0x40 | | Memory state around the buggy address: | ffff2000133d1b00: 00 00 00 00 00 00 00 00 fa fa fa fa 04 fa fa fa | ffff2000133d1b80: fa fa fa fa 04 fa fa fa fa fa fa fa 04 fa fa fa | >ffff2000133d1c00: fa fa fa fa 04 fa fa fa fa fa fa fa 00 00 00 00 | ^ | ffff2000133d1c80: fa fa fa fa 00 fa fa fa fa fa fa fa 00 00 00 00 | ffff2000133d1d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Fix the splat by introducing a unsigned long 'zero_ul' and using that instead. Link: http://lkml.kernel.org/r/20190403153409.17307-1-will.deacon@arm.com Fixes: 32a5ad9c2285 ("sysctl: handle overflow for file-max") Signed-off-by: Will Deacon Acked-by: Christian Brauner Cc: Kees Cook Cc: Alexey Dobriyan Cc: Matteo Croce Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5515d578095b..cf0aeaae567e 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -124,6 +124,7 @@ static int zero; static int __maybe_unused one = 1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; +static unsigned long zero_ul; static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; @@ -1683,7 +1684,7 @@ static struct ctl_table fs_table[] = { .maxlen = sizeof(files_stat.max_files), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero, + .extra1 = &zero_ul, .extra2 = &long_max, }, { -- cgit v1.2.3 From 726f69d32719db4543409b637bd163e30efe07e5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 14 May 2019 15:44:55 -0700 Subject: sysctl: return -EINVAL if val violates minmax [ Upstream commit e260ad01f0aa9e96b5386d5cd7184afd949dc457 ] Currently when userspace gives us a values that overflow e.g. file-max and other callers of __do_proc_doulongvec_minmax() we simply ignore the new value and leave the current value untouched. This can be problematic as it gives the illusion that the limit has indeed be bumped when in fact it failed. This commit makes sure to return EINVAL when an overflow is detected. Please note that this is a userspace facing change. Link: http://lkml.kernel.org/r/20190210203943.8227-4-christian@brauner.io Signed-off-by: Christian Brauner Acked-by: Luis Chamberlain Cc: Kees Cook Cc: Alexey Dobriyan Cc: Al Viro Cc: Dominik Brodowski Cc: "Eric W. Biederman" Cc: Joe Lawrence Cc: Waiman Long Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sysctl.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index cf0aeaae567e..6af1ac551ea3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2527,8 +2527,10 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int if (neg) continue; val = convmul * val / convdiv; - if ((min && val < *min) || (max && val > *max)) - continue; + if ((min && val < *min) || (max && val > *max)) { + err = -EINVAL; + break; + } *i = val; } else { val = convdiv * (*i) / convmul; -- cgit v1.2.3 From b231f9db5e9d240f6d44439020c22d7cd2694799 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Sat, 30 Nov 2019 17:56:08 -0800 Subject: kernel: sysctl: make drop_caches write-only [ Upstream commit 204cb79ad42f015312a5bbd7012d09c93d9b46fb ] Currently, the drop_caches proc file and sysctl read back the last value written, suggesting this is somehow a stateful setting instead of a one-time command. Make it write-only, like e.g. compact_memory. While mitigating a VM problem at scale in our fleet, there was confusion about whether writing to this file will permanently switch the kernel into a non-caching mode. This influences the decision making in a tense situation, where tens of people are trying to fix tens of thousands of affected machines: Do we need a rollback strategy? What are the performance implications of operating in a non-caching state for several days? It also caused confusion when the kernel team said we may need to write the file several times to make sure it's effective ("But it already reads back 3?"). Link: http://lkml.kernel.org/r/20191031221602.9375-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Chris Down Acked-by: Vlastimil Babka Acked-by: David Hildenbrand Acked-by: Michal Hocko Acked-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- kernel/sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 6af1ac551ea3..34449ec0689d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1398,7 +1398,7 @@ static struct ctl_table vm_table[] = { .procname = "drop_caches", .data = &sysctl_drop_caches, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = &one, .extra2 = &four, -- cgit v1.2.3