From 84e478c6f1eb9c4bfa1fff2f8108e9a061b46428 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 5 Feb 2010 21:47:05 -0500 Subject: nmi_watchdog: Config option to enable new nmi_watchdog These are the bits that enable the new nmi_watchdog and safely isolate the old nmi_watchdog. Only one or the other can run, not both at the same time. Signed-off-by: Don Zickus Cc: Linus Torvalds Cc: Andrew Morton Cc: gorcunov@gmail.com Cc: aris@redhat.com Cc: peterz@infradead.org LKML-Reference: <1265424425-31562-4-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b752e807adde..a42ff0bef708 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -47,4 +47,8 @@ static inline bool trigger_all_cpu_backtrace(void) } #endif +#ifdef CONFIG_NMI_WATCHDOG +int hw_nmi_is_cpu_stuck(struct pt_regs *); +#endif + #endif -- cgit v1.2.3 From 504d7cf10ee42bb76b9556859f23d4121dee0a77 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 12 Feb 2010 17:19:19 -0500 Subject: nmi_watchdog: Compile and portability fixes The original patch was x86_64 centric. Changed the code to make it less so. ested by building and running on a powerpc. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266013161-31197-2-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar --- include/linux/nmi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index a42ff0bef708..794e7354c5bf 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void); extern void acpi_nmi_disable(void); extern void acpi_nmi_enable(void); #else +#ifndef CONFIG_NMI_WATCHDOG static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); } +#else +extern void touch_nmi_watchdog(void); +#endif static inline void acpi_nmi_disable(void) { } static inline void acpi_nmi_enable(void) { } #endif @@ -49,6 +53,11 @@ static inline bool trigger_all_cpu_backtrace(void) #ifdef CONFIG_NMI_WATCHDOG int hw_nmi_is_cpu_stuck(struct pt_regs *); +u64 hw_nmi_get_sample_period(void); +extern int nmi_watchdog_enabled; +struct ctl_table; +extern int proc_nmi_enabled(struct ctl_table *, int , + void __user *, size_t *, loff_t *); #endif #endif -- cgit v1.2.3 From 47195d57636604ff6048b0d7aa3e4ed9643f6073 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Mon, 22 Feb 2010 18:09:03 -0500 Subject: nmi_watchdog: Clean up various small details Mostly copy/paste whitespace damage with a couple of nitpicks by the checkpatch script. Fix the struct definition as requested by Ingo too. Signed-off-by: Don Zickus Cc: peterz@infradead.org Cc: gorcunov@gmail.com Cc: aris@redhat.com LKML-Reference: <1266880143-24943-1-git-send-email-dzickus@redhat.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/apic/hw_nmi.c | 14 +++++------ arch/x86/kernel/traps.c | 6 ++-- include/linux/nmi.h | 2 - kernel/nmi_watchdog.c | 51 ++++++++++++++++++++---------------------- 4 files changed, 36 insertions(+), 37 deletions(-) --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 794e7354c5bf..22cc7960b649 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -57,7 +57,7 @@ u64 hw_nmi_get_sample_period(void); extern int nmi_watchdog_enabled; struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , - void __user *, size_t *, loff_t *); + void __user *, size_t *, loff_t *); #endif #endif -- cgit v1.2.3 From 58687acba59266735adb8ccd9b5b9aa2c7cd205b Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:44 -0400 Subject: lockup_detector: Combine nmi_watchdog and softlockup detector The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/nmi.h | 8 ++++---- include/linux/sched.h | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 22cc7960b649..abd48aacaf79 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void); extern void acpi_nmi_disable(void); extern void acpi_nmi_enable(void); #else -#ifndef CONFIG_NMI_WATCHDOG +#ifndef CONFIG_LOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); @@ -51,12 +51,12 @@ static inline bool trigger_all_cpu_backtrace(void) } #endif -#ifdef CONFIG_NMI_WATCHDOG +#ifdef CONFIG_LOCKUP_DETECTOR int hw_nmi_is_cpu_stuck(struct pt_regs *); u64 hw_nmi_get_sample_period(void); -extern int nmi_watchdog_enabled; +extern int watchdog_enabled; struct ctl_table; -extern int proc_nmi_enabled(struct ctl_table *, int , +extern int proc_dowatchdog_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index dad7f668ebf7..37efe8fa5306 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -346,6 +346,12 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); #endif +#ifdef CONFIG_LOCKUP_DETECTOR +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); +#endif + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3 From 332fbdbca3f7716c5620970755ae054d213bcc4e Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 7 May 2010 17:11:45 -0400 Subject: lockup_detector: Touch_softlockup cleanups and softlockup_tick removal Just some code cleanup to make touch_softlockup clearer and remove the softlockup_tick function as it is no longer needed. Also remove the /proc softlockup_thres call as it has been changed to watchdog_thres. Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap LKML-Reference: <1273266711-18706-3-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker --- include/linux/sched.h | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 37efe8fa5306..33f9b2ad0bbb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -312,19 +312,15 @@ extern void scheduler_tick(void); extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP -extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; extern int softlockup_thresh; #else -static inline void softlockup_tick(void) -{ -} static inline void touch_softlockup_watchdog(void) { } @@ -346,12 +342,6 @@ extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, size_t *lenp, loff_t *ppos); #endif -#ifdef CONFIG_LOCKUP_DETECTOR -extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); -#endif - /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) -- cgit v1.2.3 From 19cc36c0f0457e5c6629ec24036fbbe8255c88ec Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 13 May 2010 02:30:49 +0200 Subject: lockup_detector: Fix forgotten config conversion Fix forgotten CONFIG_DETECT_SOFTLOCKUP -> CONFIG_LOCKUP_DETECTOR in sched.h Fixes: arch/x86/built-in.o: In function `touch_nmi_watchdog': (.text+0x1bd59): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `show_state_filter': (.text+0x10d01): undefined reference to `touch_all_softlockup_watchdogs' kernel/built-in.o: In function `sched_clock_idle_wakeup_event': (.text+0x362f9): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `timekeeping_resume': timekeeping.c:(.text+0x38757): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_nohz_handler': tick-sched.c:(.text+0x3e5b9): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_sched_timer': tick-sched.c:(.text+0x3e671): undefined reference to `touch_softlockup_watchdog' kernel/built-in.o: In function `tick_check_idle': (.text+0x3e90b): undefined reference to `touch_softlockup_watchdog' Signed-off-by: Frederic Weisbecker Cc: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Cyrill Gorcunov Cc: Eric Paris Cc: Randy Dunlap --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33f9b2ad0bbb..3958e0cd24f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -311,7 +311,7 @@ extern void scheduler_tick(void); extern void sched_show_task(struct task_struct *p); -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_LOCKUP_DETECTOR extern void touch_softlockup_watchdog(void); extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); -- cgit v1.2.3 From cafcd80d216bc2136b8edbb794327e495792c666 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Fri, 14 May 2010 11:11:21 -0400 Subject: lockup_detector: Cross arch compile fixes Combining the softlockup and hardlockup code causes watchdog.c to build even without the hardlockup detection support. So if an arch, that has the previous and the new nmi watchdog implementations cohabiting, wants to know if the generic one is in use, CONFIG_LOCKUP_DETECTOR is not a reliable check. We need to use CONFIG_HARDLOCKUP_DETECTOR instead. Fixes: kernel/built-in.o: In function `touch_nmi_watchdog': (.text+0x449bc): multiple definition of `touch_nmi_watchdog' arch/sparc/kernel/built-in.o:(.text+0x11b28): first defined here Signed-off-by: Don Zickus Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Don Zickus Cc: Cyrill Gorcunov LKML-Reference: <20100514151121.GR15159@redhat.com> [ use CONFIG_HARDLOCKUP_DETECTOR instead of CONFIG_PERF_EVENTS_NMI] Signed-off-by: Frederic Weisbecker --- include/linux/nmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index abd48aacaf79..06aab5eee134 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -20,7 +20,7 @@ extern void touch_nmi_watchdog(void); extern void acpi_nmi_disable(void); extern void acpi_nmi_enable(void); #else -#ifndef CONFIG_LOCKUP_DETECTOR +#ifndef CONFIG_HARDLOCKUP_DETECTOR static inline void touch_nmi_watchdog(void) { touch_softlockup_watchdog(); -- cgit v1.2.3