From d4d25deca49ec2527a634557bf5a6cf449f85deb Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: x86: fix NMI watchdog & 'stopped time' problem More than 3 years ago Niclas Gustafsson reported a 'stopped time' problem: > Watching the /proc/interrupts with 10s apart after the "stop". > > [root@s151 root]# more /proc/interrupts > CPU0 > 0: 66413955 local-APIC-edge timer [...] > LOC: 67355837 > ERR: 0 > MIS: 0 > [root@s151 root]# more /proc/interrupts > CPU0 > 0: 66413955 local-APIC-edge timer [...] > LOC: 67379568 > ERR: 0 > MIS: 0 This may be because buggy SMM firmware messes with the 8259A (configured for a transparent mode -- yes that rare "local-APIC-edge" mode is tricky ;-) ) insanely. this should resolve: http://bugzilla.kernel.org/show_bug.cgi?id=2544 http://bugzilla.kernel.org/show_bug.cgi?id=6296 Patch-dusted-off-by: Ingo Molnar Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/io_apic_32.c | 12 ++++++++++-- arch/x86/kernel/nmi_32.c | 9 +++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index f35c6eb33da9..fc4bbc1d1331 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2166,6 +2166,10 @@ static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; int vector; + unsigned int ver; + + ver = apic_read(APIC_LVR); + ver = GET_APIC_VERSION(ver); /* * get/set the timer IRQ vector: @@ -2179,11 +2183,15 @@ static inline void __init check_timer(void) * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. + * disabled in the local APIC. Finally timer interrupts + * need to be acknowledged manually in the 8259A for + * timer_interrupt() and for the i82489DX when using + * the NMI watchdog. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = 1; + timer_ack = !cpu_has_tsc; + timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); if (timer_over_8254 > 0) enable_8259A_irq(0); diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 600fd404e440..f5cc47c60b13 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -25,6 +25,7 @@ #include #include +#include #include "mach_traps.h" @@ -83,7 +84,7 @@ static int __init check_nmi_watchdog(void) prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - return -1; + goto error; printk(KERN_INFO "Testing NMI watchdog ... "); @@ -117,7 +118,7 @@ static int __init check_nmi_watchdog(void) if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - return -1; + goto error; } printk("OK.\n"); @@ -128,6 +129,10 @@ static int __init check_nmi_watchdog(void) kfree(prev_nmi_count); return 0; +error: + timer_ack = !cpu_has_tsc; + + return -1; } /* This needs to happen later in boot so counters are working */ late_initcall(check_nmi_watchdog); -- cgit v1.2.3 From 57c351de715458f8fbee1e92e8cc65ddc00da04c Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: x86: printk kernel version in WARN_ON and other dump_stack users today, all oopses contain a version number of the kernel, which is nice because the people who actually do bother to read the oops get this vital bit of information always without having to ask the reporter in another round trip. However, WARN_ON() and many other dump_stack() users right now lack this information; the patch below adds this. This information is essential for getting people to use their time effectively when looking at these things; in addition, it's essential for tools that try to collect statistics about defects. Please consider, since its so simple and important for long term kernel quality processes. The code is identical between 32/64 bit; a lot of this code should be unified over time, the patch keeps the identical-ness intact. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/traps_32.c | 5 +++++ arch/x86/kernel/traps_64.c | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 298d13ed3ab3..0a4c89382479 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -283,6 +283,11 @@ void dump_stack(void) { unsigned long stack; + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(current, NULL, &stack); } diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 4a6bd4965f56..1384e34a65a7 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -31,6 +31,7 @@ #include #include #include +#include #if defined(CONFIG_EDAC) #include @@ -400,6 +401,12 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) void dump_stack(void) { unsigned long dummy; + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(NULL, NULL, &dummy); } -- cgit v1.2.3 From bc84cf17b50ca5b49bec0a5fef63c58c1526d46b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: x86: turn off iommu merge by default revert this commit for now: commit 948062683004d13ca21c8c05ac052d387978a449 Author: Andi Kleen Date: Fri Oct 19 20:35:03 2007 +0200 x86: enable iommu_merge by default it's causing regressions: http://bugzilla.kernel.org/show_bug.cgi?id=9412 Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-dma_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma_64.c index aa805b11b24f..5552d23d23c2 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma_64.c @@ -12,7 +12,7 @@ #include #include -int iommu_merge __read_mostly = 1; +int iommu_merge __read_mostly = 0; EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address __read_mostly; -- cgit v1.2.3 From 000f4a9e718a665fd706500199a3f0c11fea30c1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: lockdep: annotate do_debug() trap handler Ensure the hardirq state is consistent before using locks. Use the rare trace_hardirqs_fixup() because the trap can happen in any context. resolves this rare lockdep warning: WARNING: at kernel/lockdep.c:2658 check_flags() [] check_flags+0x90/0x140 [] lock_release+0x4b/0x1d0 [] notifier_call_chain+0x2a/0x47 [] __atomic_notifier_call_chain+0x64/0x6d [] __atomic_notifier_call_chain+0x0/0x6d [] atomic_notifier_call_chain+0x17/0x1a [] notify_die+0x30/0x34 [] do_debug+0x3e/0xd4 [] debug_stack_correct+0x27/0x2c [] tcp_rcv_established+0x1/0x620 [] tcp_v4_do_rcv+0x2b/0x313 [] tcp_v4_rcv+0x467/0x85d [] _spin_lock_nested+0x27/0x32 [] tcp_v4_rcv+0x7fe/0x85d [] tcp_v4_rcv+0x3bf/0x85d [] ip_local_deliver_finish+0x11b/0x1b0 [] ip_local_deliver_finish+0x2e/0x1b0 [] ip_rcv_finish+0x27b/0x29a [] netif_receive_skb+0xfb/0x2a6 [] ip_rcv+0x0/0x1fb [] netif_receive_skb+0x26a/0x2a6 [] netif_receive_skb+0xfb/0x2a6 [] process_backlog+0x7f/0xc6 [] net_rx_action+0xb9/0x1ac [] net_rx_action+0x47/0x1ac [] trace_hardirqs_on+0x118/0x16b [] __do_softirq+0x49/0xa2 [] do_softirq+0x60/0xdd [] _spin_unlock_irq+0x20/0x2c [] restore_nocheck+0x12/0x15 [] handle_fasteoi_irq+0x0/0x9b [] do_IRQ+0x94/0xaa [] _spin_unlock_irq+0x20/0x2c [] common_interrupt+0x2e/0x34 [] native_safe_halt+0x2/0x3 [] default_idle+0x44/0x65 [] cpu_idle+0x42/0x50 [] start_kernel+0x26b/0x270 [] unknown_bootoption+0x0/0x196 ======================= irq event stamp: 559190 hardirqs last enabled at (559190): [] kprobe_exceptions_notify+0x299/0x305 hardirqs last disabled at (559189): [] do_int3+0x1d/0x95 softirqs last enabled at (559172): [] do_softirq+0x60/0xdd softirqs last disabled at (559181): [] do_softirq+0x60/0xdd Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/traps_32.c | 2 ++ arch/x86/kernel/traps_64.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 0a4c89382479..ef6010262597 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -833,6 +833,8 @@ fastcall void __kprobes do_debug(struct pt_regs * regs, long error_code) unsigned int condition; struct task_struct *tsk = current; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 1384e34a65a7..d11525ad81b4 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -853,6 +853,8 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, struct task_struct *tsk = current; siginfo_t info; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, -- cgit v1.2.3 From 43517854da512ae7daf15e7044e040edb15b8b23 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: x86: correctly set UTS_MACHINE for "make ARCH=x86" x86: correctly set UTS_MACHINE for "make ARCH=x86" For a kernel built with "make ARCH=x86" the following system information is displayed when running the new kernel $ uname -m x86 On some i386 systems (e.g. K7) we even have the following information $ uname -m x66 This is weird. The usual information for "uname -m" should be "x86_64" on 64-bit and "i386" or "i686" on 32-bit. This patch fixes the issue by setting UTS_MACHINE to "i386" for 32-bit kernel builds and to "x86_64" for 64-bit kernel builds. I.e., "x86" won't be used for UTS_MACHINE anymore. Acked-by: Sam Ravnborg Signed-off-by: Andreas Herrmann Cc: "H. Peter Anvin" Cc: Roman Zippel Cc: Andrew Morton Cc: Sam Ravnborg Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 116b03a45636..7aa1dc6d67c8 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -11,10 +11,9 @@ endif $(srctree)/arch/x86/Makefile%: ; ifeq ($(CONFIG_X86_32),y) + UTS_MACHINE := i386 include $(srctree)/arch/x86/Makefile_32 else + UTS_MACHINE := x86_64 include $(srctree)/arch/x86/Makefile_64 endif - - - -- cgit v1.2.3 From c82bc5ad5411ccd61d26355088e2f5c9e95ef54b Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: pci: use pci=bfsort for HP DL385 G2, DL585 G2 HP ProLiant systems DL385 G2 and DL585 G2 need pci=bfsort to enumerate PCI devices in the expected order. Matt sayeth: biosdevname is a userspace app I wrote to help solve this so we don't need to patch the kernel for future systems. It's not integrated into any distributions properly yet, but is included in openSUSE 10.3 and Fedora 8 for people who want to download and install it there. It acts as a udev helper. For the time being, patching the kernel is necessary. I really hope biosdevname eliminates that need in future distributions. http://linux.dell.com/biosdevname/ Signed-off-by: Michal Schmidt Acked-by: Andy Gospodarek Cc: mingo@elte.hu Cc: andy@greyhouse.net Cc: john.cagle@hp.com Cc: Matt Domsch Cc: Greg KH Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/pci/common.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index f4386990b150..862746390666 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -315,6 +315,22 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = { }, }, #endif + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL385 G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL385 G2"), + }, + }, + { + .callback = set_bf_sort, + .ident = "HP ProLiant DL585 G2", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "ProLiant DL585 G2"), + }, + }, {} }; -- cgit v1.2.3 From 8645419cdb588a743060175c1efbe23f6e44b29b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: x86: fix kprobes_64.c inlining borkage fix: arch/x86/kernel/kprobes_64.c: In function 'set_current_kprobe': arch/x86/kernel/kprobes_64.c:152: sorry, unimplemented: inlining failed in call to 'is_IF_modifier': recursive inlining arch/x86/kernel/kprobes_64.c:166: sorry, unimplemented: called from here Cc: Thomas Gleixner Signed-off-by: Andrew Morton Cc: mingo@elte.hu Cc: akpm@linux-foundation.org Cc: tglx@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/kprobes_64.c b/arch/x86/kernel/kprobes_64.c index 3db3611933d8..0c467644589c 100644 --- a/arch/x86/kernel/kprobes_64.c +++ b/arch/x86/kernel/kprobes_64.c @@ -58,7 +58,7 @@ const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); /* * returns non-zero if opcode modifies the interrupt flag. */ -static __always_inline int is_IF_modifier(kprobe_opcode_t *insn) +static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) { switch (*insn) { case 0xfa: /* cli */ -- cgit v1.2.3 From 8232fd625217dc641ed05dd238a8bb5c82828082 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 26 Nov 2007 20:42:19 +0100 Subject: x86: export the symbol empty_zero_page on the 32-bit x86 architecture The latest KVM driver wants to use the empty_zero_page symbol, and it's not exported in 32-bit x86 (although it is exported by x86_64, s390, and uml architectures). Signed-off-by: "Theodore Ts'o" Cc: tglx@linutronix.de Cc: linux-kernel@vger.kernel.com Cc: kvm-devel@lists.sourceforge.net Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/i386_ksyms_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index edd39ccf139e..02112fcc0de7 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -2,6 +2,7 @@ #include #include #include +#include EXPORT_SYMBOL(__down_failed); EXPORT_SYMBOL(__down_failed_interruptible); @@ -29,3 +30,4 @@ EXPORT_SYMBOL(__read_lock_failed); #endif EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(empty_zero_page); -- cgit v1.2.3 From f44d9efd3510776216938fef84adc99cc0e12412 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 26 Nov 2007 20:42:20 +0100 Subject: x86: fix APIC related bootup crash on Athlon XP CPUs warmbloodedcreature@gmail.com reported that an APIC-enabled Asus a7v8x-x with an Athlon XP reboots early in the bootup: http://bugzilla.kernel.org/show_bug.cgi?id=8723 after a long marathon of spontaneous-reboot debugging, it turns out to be caused by sync_Arb_ids(). AMD CPUs never really needed this sequence anyway, so just return early if we meet an AMD CPU. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic_32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 08b07c176962..96986b46bc85 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c @@ -789,7 +789,7 @@ void __init sync_Arb_IDs(void) * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not * needed on AMD. */ - if (modern_apic()) + if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) return; /* * Wait for idle. -- cgit v1.2.3