From 2fbe7b25c8edaf2d10e6c1a4cc9f8afe714c4764 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] i386/x86-64: Remove un/set_nmi_callback and reserve/release_lapic_nmi functions Removes the un/set_nmi_callback and reserve/release_lapic_nmi functions as they are no longer needed. The various subsystems are modified to register with the die_notifier instead. Also includes compile fixes by Andrew Morton. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- kernel/sysctl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc37138..83f168361624 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -76,8 +76,6 @@ extern int compat_log; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; -extern int proc_unknown_nmi_panic(ctl_table *, int, struct file *, - void __user *, size_t *, loff_t *); #endif /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -628,7 +626,7 @@ static ctl_table kern_table[] = { .data = &unknown_nmi_panic, .maxlen = sizeof (int), .mode = 0644, - .proc_handler = &proc_unknown_nmi_panic, + .proc_handler = &proc_dointvec, }, #endif #if defined(CONFIG_X86) -- cgit v1.2.3 From 407984f1af259b31957c7c05075a454a751bb801 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Add abilty to enable/disable nmi watchdog with sysctl Adds a new /proc/sys/kernel/nmi call that will enable/disable the nmi watchdog. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- kernel/sysctl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 83f168361624..040de6bd74dd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -76,6 +76,9 @@ extern int compat_log; #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) int unknown_nmi_panic; +int nmi_watchdog_enabled; +extern int proc_nmi_enabled(struct ctl_table *, int , struct file *, + void __user *, size_t *, loff_t *); #endif /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -628,6 +631,14 @@ static ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = KERN_NMI_WATCHDOG, + .procname = "nmi_watchdog", + .data = &nmi_watchdog_enabled, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_nmi_enabled, + }, #endif #if defined(CONFIG_X86) { -- cgit v1.2.3 From 8da5adda91df3d2fcc5300e68da491694c9af019 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Tue, 26 Sep 2006 10:52:27 +0200 Subject: [PATCH] x86: Allow users to force a panic on NMI To quote Alan Cox: The default Linux behaviour on an NMI of either memory or unknown is to continue operation. For many environments such as scientific computing it is preferable that the box is taken out and the error dealt with than an uncorrected parity/ECC error get propogated. A small number of systems do generate NMI's for bizarre random reasons such as power management so the default is unchanged. In other respects the new proc/sys entry works like the existing panic controls already in that directory. This is separate to the edac support - EDAC allows supported chipsets to handle ECC errors well, this change allows unsupported cases to at least panic rather than cause problems further down the line. Signed-off-by: Don Zickus Signed-off-by: Andi Kleen --- kernel/sysctl.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 040de6bd74dd..220e20564124 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -641,6 +641,14 @@ static ctl_table kern_table[] = { }, #endif #if defined(CONFIG_X86) + { + .ctl_name = KERN_PANIC_ON_NMI, + .procname = "panic_on_unrecovered_nmi", + .data = &panic_on_unrecovered_nmi, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, { .ctl_name = KERN_BOOTLOADER_TYPE, .procname = "bootloader_type", -- cgit v1.2.3 From 0ff38490c836dc379ff7ec45b10a15a662f4e5f6 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 25 Sep 2006 23:31:52 -0700 Subject: [PATCH] zone_reclaim: dynamic slab reclaim Currently one can enable slab reclaim by setting an explicit option in /proc/sys/vm/zone_reclaim_mode. Slab reclaim is then used as a final option if the freeing of unmapped file backed pages is not enough to free enough pages to allow a local allocation. However, that means that the slab can grow excessively and that most memory of a node may be used by slabs. We have had a case where a machine with 46GB of memory was using 40-42GB for slab. Zone reclaim was effective in dealing with pagecache pages. However, slab reclaim was only done during global reclaim (which is a bit rare on NUMA systems). This patch implements slab reclaim during zone reclaim. Zone reclaim occurs if there is a danger of an off node allocation. At that point we 1. Shrink the per node page cache if the number of pagecache pages is more than min_unmapped_ratio percent of pages in a zone. 2. Shrink the slab cache if the number of the nodes reclaimable slab pages (patch depends on earlier one that implements that counter) are more than min_slab_ratio (a new /proc/sys/vm tunable). The shrinking of the slab cache is a bit problematic since it is not node specific. So we simply calculate what point in the slab we want to reach (current per node slab use minus the number of pages that neeed to be allocated) and then repeately run the global reclaim until that is unsuccessful or we have reached the limit. I hope we will have zone based slab reclaim at some point which will make that easier. The default for the min_slab_ratio is 5% Also remove the slab option from /proc/sys/vm/zone_reclaim_mode. [akpm@osdl.org: cleanups] Signed-off-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 362a0cc37138..fd43c3e6786b 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -943,6 +943,17 @@ static ctl_table vm_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .ctl_name = VM_MIN_SLAB, + .procname = "min_slab_ratio", + .data = &sysctl_min_slab_ratio, + .maxlen = sizeof(sysctl_min_slab_ratio), + .mode = 0644, + .proc_handler = &sysctl_min_slab_ratio_sysctl_handler, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &one_hundred, + }, #endif #ifdef CONFIG_X86_32 { -- cgit v1.2.3 From b89a81712f486e4f7a606987413e387605fdeaf4 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 27 Sep 2006 01:51:04 -0700 Subject: [PATCH] sysctl: Allow /proc/sys without sys_sysctl Since sys_sysctl is deprecated start allow it to be compiled out. This should catch any remaining user space code that cares, and paves the way for further sysctl cleanups. [akpm@osdl.org: If sys_sysctl() is not compiled-in, emit a warning] Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sysctl.c | 113 +++++++++++++++++++------------------------------------- 1 file changed, 38 insertions(+), 75 deletions(-) (limited to 'kernel/sysctl.c') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bcb3a181dbb2..8bfa7d117c54 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -137,8 +137,11 @@ extern int no_unaligned_warning; extern int max_lock_depth; #endif -static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t, - ctl_table *, void **); +#ifdef CONFIG_SYSCTL_SYSCALL +static int parse_table(int __user *, int, void __user *, size_t __user *, + void __user *, size_t, ctl_table *, void **); +#endif + static int proc_doutsstring(ctl_table *table, int write, struct file *filp, void __user *buffer, size_t *lenp, loff_t *ppos); @@ -165,7 +168,7 @@ int sysctl_legacy_va_layout; /* /proc declarations: */ -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL static ssize_t proc_readsys(struct file *, char __user *, size_t, loff_t *); static ssize_t proc_writesys(struct file *, const char __user *, size_t, loff_t *); @@ -1166,12 +1169,13 @@ static void start_unregistering(struct ctl_table_header *p) void __init sysctl_init(void) { -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL register_proc_table(root_table, proc_sys_root, &root_table_header); init_irq_proc(); #endif } +#ifdef CONFIG_SYSCTL_SYSCALL int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1225,6 +1229,7 @@ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) unlock_kernel(); return error; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /* * ctl_perm does NOT grant the superuser all rights automatically, because @@ -1251,6 +1256,7 @@ static inline int ctl_perm(ctl_table *table, int op) return test_perm(table->mode, op); } +#ifdef CONFIG_SYSCTL_SYSCALL static int parse_table(int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, @@ -1340,6 +1346,7 @@ int do_sysctl_strategy (ctl_table *table, } return 0; } +#endif /* CONFIG_SYSCTL_SYSCALL */ /** * register_sysctl_table - register a sysctl hierarchy @@ -1427,7 +1434,7 @@ struct ctl_table_header *register_sysctl_table(ctl_table * table, else list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry); spin_unlock(&sysctl_lock); -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL register_proc_table(table, proc_sys_root, tmp); #endif return tmp; @@ -1445,18 +1452,31 @@ void unregister_sysctl_table(struct ctl_table_header * header) might_sleep(); spin_lock(&sysctl_lock); start_unregistering(header); -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL unregister_proc_table(header->ctl_table, proc_sys_root); #endif spin_unlock(&sysctl_lock); kfree(header); } +#else /* !CONFIG_SYSCTL */ +struct ctl_table_header * register_sysctl_table(ctl_table * table, + int insert_at_head) +{ + return NULL; +} + +void unregister_sysctl_table(struct ctl_table_header * table) +{ +} + +#endif /* CONFIG_SYSCTL */ + /* * /proc/sys support */ -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_PROC_SYSCTL /* Scan the sysctl entries in table and add them all into /proc */ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root, void *set) @@ -2318,6 +2338,7 @@ int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_SYSCTL_SYSCALL /* * General sysctl support routines */ @@ -2460,11 +2481,19 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, return 1; } -#else /* CONFIG_SYSCTL */ +#else /* CONFIG_SYSCTL_SYSCALL */ asmlinkage long sys_sysctl(struct __sysctl_args __user *args) { + static int msg_count; + + if (msg_count < 5) { + msg_count++; + printk(KERN_INFO + "warning: process `%s' used the removed sysctl " + "system call\n", current->comm); + } return -ENOSYS; } @@ -2496,73 +2525,7 @@ int sysctl_ms_jiffies(ctl_table *table, int __user *name, int nlen, return -ENOSYS; } -int proc_dostring(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_bset(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_dointvec_ms_jiffies(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_doulongvec_minmax(ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -int proc_doulongvec_ms_jiffies_minmax(ctl_table *table, int write, - struct file *filp, - void __user *buffer, - size_t *lenp, loff_t *ppos) -{ - return -ENOSYS; -} - -struct ctl_table_header * register_sysctl_table(ctl_table * table, - int insert_at_head) -{ - return NULL; -} - -void unregister_sysctl_table(struct ctl_table_header * table) -{ -} - -#endif /* CONFIG_SYSCTL */ +#endif /* CONFIG_SYSCTL_SYSCALL */ /* * No sense putting this after each symbol definition, twice, -- cgit v1.2.3