From eba9fe93a2959ec7f195c47c9db6ce7b5114ce1f Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 18 Mar 2008 15:24:32 -0500 Subject: [CPUFREQ] powernow-k8: improve error messages The most common error with powernow-k8 is an ACPI _PSS error caused either by failure to load the ACPI processor module or a bad parse of the _PSS object. Make the error message returned to the user in these situations more straightforward and easier to understand. -Mark Langsdorf Operating System Research Center AMD Signed-off-by: Mark Langsdorf Signed-off-by: Andreas Herrmann Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 46d4034d9f37..206791eb46e3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); +#ifndef CONFIG_ACPI_PROCESSOR + printk(KERN_ERR PFX "ACPI Processor support is required " + "for SMP systems but is absent. Please load the " + "ACPI Processor module before starting this " + "driver.\n"); +#else + printk(KERN_ERR PFX "Your BIOS does not provide ACPI " + "_PSS objects in a way that Linux understands. " + "Please report this to the Linux ACPI maintainers" + " and complain to your BIOS vendor.\n"); +#endif kfree(data); return -ENODEV; } if (pol->cpu != 0) { - printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); + printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than " + "CPU0. Complain to your BIOS vendor.\n"); kfree(data); return -ENODEV; } -- cgit v1.2.3 From 667ad4f70110357e8f024e81741c7bd1d7906e7d Mon Sep 17 00:00:00 2001 From: maximilian attems Date: Thu, 8 May 2008 22:10:01 +0200 Subject: [CPUFREQ] Crusoe: longrun cpufreq module reports false min freq The longrun cpufreq module reports a false minimum frequency 3MHz on 300-600MHz Crusoe processor. This may be due to a calculation bug in the module. Original patch from Kaz Sasayama submitted as http://bugs.debian.org/468149 patch ported to x86 Cc: Kaz Sasayama Signed-off-by: maximilian attems Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/longrun.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index af4a867a097c..777a7ff075de 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, if ((ecx > 95) || (ecx == 0) || (eax < ebx)) return -EIO; - edx = (eax - ebx) / (100 - ecx); + edx = ((eax - ebx) * 100) / (100 - ecx); *low_freq = edx * 1000; /* back to kHz */ dprintk("low frequency is %u kHz\n", *low_freq); -- cgit v1.2.3 From b6db80ee1331e7beaeb91b4b3d946dd16c72e388 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 18 May 2008 19:27:48 +0200 Subject: x86: fix setup of cyc2ns in tsc_64.c When the TSC is calibrated against the PIT due to the nonavailability of PMTIMER/HPET or due to SMI interference then the setup of the per CPU cyc2ns variables is skipped. This is unlikely to happen but it would definitely render sched_clock() unusable. This was introduced with commit 53d517cdbaac704352b3d0c10fecb99e0b54572e x86: scale cyc_2_nsec according to CPU frequency Update the per CPU cyc2ns variables in all exit pathes of tsc_calibrate. Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- arch/x86/kernel/tsc_64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index fcc16e58609e..1784b8077a12 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -227,14 +227,14 @@ void __init tsc_calibrate(void) /* hpet or pmtimer available ? */ if (!hpet && !pm1 && !pm2) { printk(KERN_INFO "TSC calibrated against PIT\n"); - return; + goto out; } /* Check, whether the sampling was disturbed by an SMI */ if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { printk(KERN_WARNING "TSC calibration disturbed by SMI, " "using PIT calibration result\n"); - return; + goto out; } tsc2 = (tsc2 - tsc1) * 1000000L; @@ -255,6 +255,7 @@ void __init tsc_calibrate(void) tsc_khz = tsc2 / tsc1; +out: for_each_possible_cpu(cpu) set_cyc2ns_scale(tsc_khz, cpu); } -- cgit v1.2.3 From 9ccc906c97e34fd91dc6aaf5b69b52d824386910 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 13 May 2008 12:31:00 +0200 Subject: x86: distangle user disabled TSC from unstable tsc_enabled is set to 0 from the command line switch "notsc" and from the mark_tsc_unstable code. Seperate those functionalities and replace tsc_enable with tsc_disable. This makes also the native_sched_clock() decision when to use TSC understandable. Preparatory patch to solve the sched_clock() issue on 32 bit. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_32.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index e4790728b224..b087d691f165 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -14,7 +14,7 @@ #include "mach_timer.h" -static int tsc_enabled; +static int tsc_disabled; /* * On some systems the TSC frequency does not @@ -28,8 +28,8 @@ EXPORT_SYMBOL_GPL(tsc_khz); static int __init tsc_setup(char *str) { printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC completely.\n"); - mark_tsc_unstable("user disabled TSC"); + "cannot disable TSC completely.\n"); + tsc_disabled = 1; return 1; } #else @@ -120,7 +120,7 @@ unsigned long long native_sched_clock(void) * very important for it to be as fast as the platform * can achive it. ) */ - if (unlikely(!tsc_enabled && !tsc_unstable)) + if (unlikely(tsc_disabled)) /* No locking but a rare wrong value is not a big deal: */ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); @@ -322,7 +322,6 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - tsc_enabled = 0; printk("Marking TSC unstable due to: %s.\n", reason); /* Can be called before registration */ if (clocksource_tsc.mult) @@ -336,7 +335,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) { printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", - d->ident); + d->ident); tsc_unstable = 1; return 0; } @@ -403,8 +402,11 @@ void __init tsc_init(void) { int cpu; - if (!cpu_has_tsc) + if (!cpu_has_tsc || tsc_disabled) { + /* Disable the TSC in case of !cpu_has_tsc */ + tsc_disabled = 1; return; + } cpu_khz = calculate_cpu_khz(); tsc_khz = cpu_khz; @@ -441,8 +443,6 @@ void __init tsc_init(void) if (check_tsc_unstable()) { clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } else - tsc_enabled = 1; - + } clocksource_register(&clocksource_tsc); } -- cgit v1.2.3 From 74dc51a3de06aa516e3b9fdc4017b2aeb38bf44b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 18 May 2008 22:17:59 +0200 Subject: x86: disable TSC for sched_clock() when calibration failed When the TSC calibration fails then TSC is still used in sched_clock(). Disable it completely in that case. Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- arch/x86/kernel/tsc_32.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index b087d691f165..068759db63dd 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -413,6 +413,11 @@ void __init tsc_init(void) if (!cpu_khz) { mark_tsc_unstable("could not calculate TSC khz"); + /* + * We need to disable the TSC completely in this case + * to prevent sched_clock() from using it. + */ + tsc_disabled = 1; return; } -- cgit v1.2.3 From 2584a82deed7196f48066f1b1a7fad4ec5bea961 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Tue, 20 May 2008 18:18:12 -0400 Subject: x86: don't read maxlvt before checking if APIC is mapped A check for unmapped apic was added before reading maxlvt but the early read of maxlvt wasn't removed. Signed-off-by: Chuck Ebbert Cc: Andi Kleen Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- arch/x86/kernel/apic_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5910020c3f24..0633cfd0dc29 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -534,7 +534,7 @@ int setup_profiling_timer(unsigned int multiplier) */ void clear_local_APIC(void) { - int maxlvt = lapic_get_maxlvt(); + int maxlvt; u32 v; /* APIC hasn't been mapped yet */ -- cgit v1.2.3 From 2ddfd20e7c55421435cbf95a5ed3dd6e423cf934 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 22 May 2008 10:37:48 +0200 Subject: namespacecheck: automated fixes Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvmclock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 4bc1be5d5472..08a30986d472 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -53,7 +53,7 @@ static cycle_t kvm_clock_read(void); * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -unsigned long kvm_get_wallclock(void) +static unsigned long kvm_get_wallclock(void) { u32 wc_sec, wc_nsec; u64 delta; @@ -86,7 +86,7 @@ unsigned long kvm_get_wallclock(void) return ts.tv_sec + 1; } -int kvm_set_wallclock(unsigned long now) +static int kvm_set_wallclock(unsigned long now) { return 0; } -- cgit v1.2.3 From db9f600b96c16bb3c7f094e294fbdd370226ad86 Mon Sep 17 00:00:00 2001 From: Miquel van Smoorenburg Date: Wed, 28 May 2008 10:31:25 +0200 Subject: x86: pci-dma.c: use __GFP_NO_OOM instead of __GFP_NORETRY On Wed, 2008-05-28 at 04:47 +0200, Andi Kleen wrote: > > So... why not just remove the setting of __GFP_NORETRY? Why is it > > wrong to oom-kill things in this case? > > When the 16MB zone overflows (which can be common in some workloads) > calling the OOM killer is pretty useless because it has barely any > real user data [only exception would be the "only 16MB" case Alan > mentioned]. Killing random processes in this case is bad. > > I think for 16MB __GFP_NORETRY is ok because there should be > nothing freeable in there so looping is useless. Only exception would be the > "only 16MB total" case again but I'm not sure 2.6 supports that at all > on x86. > > On the other hand d_a_c() does more allocations than just 16MB, especially > on 64bit and the other zones need different strategies. Okay, so how about this then ? Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index c5ef1af8e79d..069e843f0b93 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -397,9 +397,6 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dev->dma_mask == NULL) return NULL; - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; - #ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of @@ -410,7 +407,9 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, #endif again: - page = dma_alloc_pages(dev, gfp, get_order(size)); + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ + page = dma_alloc_pages(dev, + (gfp & GFP_DMA) ? gfp | __GFP_NORETRY : gfp, get_order(size)); if (page == NULL) return NULL; -- cgit v1.2.3 From f529626a86d61897862aa1bbbb4537773209238e Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 29 May 2008 00:30:21 -0700 Subject: suspend-vs-iommu: prevent suspend if we could not resume iommu/gart support misses suspend/resume code, which can do bad stuff, including memory corruption on resume. Prevent system suspend in case we would be unable to resume. Signed-off-by: Pavel Machek Tested-by: Patrick Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695f..aa8ec928caa8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) return aper_base; } +static int gart_resume(struct sys_device *dev) +{ + return 0; +} + +static int gart_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class gart_sysdev_class = { + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, + +}; + +static struct sys_device device_gart = { + .id = 0, + .cls = &gart_sysdev_class, +}; + /* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. @@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - int i; + int i, error; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) pci_write_config_dword(dev, 0x90, ctl); } + + error = sysdev_class_register(&gart_sysdev_class); + if (!error) + error = sysdev_register(&device_gart); + if (error) + panic("Could not register gart_sysdev -- would corrupt data on next suspend"); flush_gart(); printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", -- cgit v1.2.3 From fb3bbd6a663fe972611676381adc4c60ddfe61ac Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 22 May 2008 18:22:30 -0700 Subject: x86: fix APIC warning on 32bit v2 for http://bugzilla.kernel.org/show_bug.cgi?id=10613 BIOS bug, APIC version is 0 for CPU#0! fixing up to 0x10. (tell your hw vendor) v2: fix 64 bit compilation Signed-off-by: Yinghai Lu Cc: Andrew Morton Cc: "Rafael J. Wysocki" Cc: Gabriel C Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc6c41e..33c5216fd3e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) static void __cpuinit acpi_register_lapic(int id, u8 enabled) { + unsigned int ver = 0; + if (!enabled) { ++disabled_cpus; return; } - generic_processor_info(id, 0); +#ifdef CONFIG_X86_32 + if (boot_cpu_physical_apicid != -1U) + ver = apic_version[boot_cpu_physical_apicid]; +#endif + + generic_processor_info(id, ver); } static int __init @@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address) mp_lapic_addr = address; set_fixmap_nocache(FIX_APIC_BASE, address); - if (boot_cpu_physical_apicid == -1U) + if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); +#ifdef CONFIG_X86_32 + apic_version[boot_cpu_physical_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); +#endif + } } static int __init early_acpi_parse_madt_lapic_addr_ovr(void) -- cgit v1.2.3 From deef325086c3897393b8f7d6bccd03405244fe18 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 May 2008 15:44:38 +0200 Subject: x86: disable preemption in native_smp_prepare_cpus Priit Laes reported the following warning: Call Trace: [] warn_on_slowpath+0x51/0x63 [] sys_ioctl+0x2d/0x5d [] _spin_lock+0xe/0x24 [] task_rq_lock+0x3d/0x73 [] set_cpu_sibling_map+0x336/0x350 [] read_apic_id+0x30/0x62 [] verify_local_APIC+0x90/0x138 [] native_smp_prepare_cpus+0x1f9/0x305 [] kernel_init+0x59/0x2d9 [] _spin_unlock_irq+0x11/0x2b [] child_rip+0xa/0x12 [] kernel_init+0x0/0x2d9 [] child_rip+0x0/0x12 fix this by generally disabling preemption in native_smp_prepare_cpus(). Reported-and-bisected-by: Priit Laes Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/smpboot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 38988491c622..56078d61c793 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1190,6 +1190,7 @@ static void __init smp_cpu_index_default(void) */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { + preempt_disable(); nmi_watchdog_default(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; @@ -1206,7 +1207,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); - return; + goto out; } preempt_disable(); @@ -1246,6 +1247,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); +out: + preempt_enable(); } /* * Early setup to make printk work. -- cgit v1.2.3 From e8a496ac8cd00cabbdaa373db4818a9ad19a1c5a Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 23 May 2008 16:26:37 -0700 Subject: x86: fix broken math-emu with lazy allocation of fpu area Fix the math emulation that got broken with the recent lazy allocation of FPU area. init_fpu() need to be added for the math-emulation path aswell for the FPU area allocation. math emulation enabled kernel booted fine with this, in the presence of "no387 nofxsr" boot param. Signed-off-by: Suresh Siddha Cc: hpa@zytor.com Cc: mingo@elte.hu Signed-off-by: Thomas Gleixner --- arch/x86/kernel/i387.c | 44 +++++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e03cc952f233..eb9ddd8efb82 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void) void __init init_thread_xstate(void) { + if (!HAVE_HWFP) { + xstate_size = sizeof(struct i387_soft_struct); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -94,7 +99,7 @@ void __cpuinit fpu_init(void) int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { - if (tsk == current) + if (HAVE_HWFP && tsk == current) unlazy_fpu(tsk); return 0; } @@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk) return -ENOMEM; } +#ifdef CONFIG_X86_32 + if (!HAVE_HWFP) { + memset(tsk->thread.xstate, 0, xstate_size); + finit(); + set_stopped_child_used_math(tsk); + return 0; + } +#endif + if (cpu_has_fxsr) { struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; @@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - if (!HAVE_HWFP) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); if (ret) return ret; + if (!HAVE_HWFP) + return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fsave, 0, @@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - if (!HAVE_HWFP) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); if (ret) return ret; set_stopped_child_used_math(target); + if (!HAVE_HWFP) + return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); + if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fsave, 0, -1); @@ -474,18 +488,18 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int restore_i387_ia32(struct _fpstate_ia32 __user *buf) { int err; + struct task_struct *tsk = current; - if (HAVE_HWFP) { - struct task_struct *tsk = current; - + if (HAVE_HWFP) clear_fpu(tsk); - if (!used_math()) { - err = init_fpu(tsk); - if (err) - return err; - } + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } + if (HAVE_HWFP) { if (cpu_has_fxsr) err = restore_i387_fxsave(buf); else -- cgit v1.2.3 From cd76374e9de4501acc74f833dc6cb5e7a5dca115 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 29 May 2008 00:30:21 -0700 Subject: suspend-vs-iommu: prevent suspend if we could not resume iommu/gart support misses suspend/resume code, which can do bad stuff, including memory corruption on resume. Prevent system suspend in case we would be unable to resume. Signed-off-by: Pavel Machek Tested-by: Patrick Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-gart_64.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695f..aa8ec928caa8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) return aper_base; } +static int gart_resume(struct sys_device *dev) +{ + return 0; +} + +static int gart_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class gart_sysdev_class = { + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, + +}; + +static struct sys_device device_gart = { + .id = 0, + .cls = &gart_sysdev_class, +}; + /* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. @@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - int i; + int i, error; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) pci_write_config_dword(dev, 0x90, ctl); } + + error = sysdev_class_register(&gart_sysdev_class); + if (!error) + error = sysdev_register(&device_gart); + if (error) + panic("Could not register gart_sysdev -- would corrupt data on next suspend"); flush_gart(); printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", -- cgit v1.2.3 From 870568b39064cab2dd971fe57969916036982862 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 2 Jun 2008 15:57:27 -0700 Subject: x86, fpu: fix CONFIG_PREEMPT=y corruption of application's FPU stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jürgen Mell reported an FPU state corruption bug under CONFIG_PREEMPT, and bisected it to commit v2.6.19-1363-gacc2076, "i386: add sleazy FPU optimization". Add tsk_used_math() checks to prevent calling math_state_restore() which can sleep in the case of !tsk_used_math(). This prevents making a blocking call in __switch_to(). Apparently "fpu_counter > 5" check is not enough, as in some signal handling and fork/exec scenarios, fpu_counter > 5 and !tsk_used_math() is possible. It's a side effect though. This is the failing scenario: process 'A' in save_i387_ia32() just after clear_used_math() Got an interrupt and pre-empted out. At the next context switch to process 'A' again, kernel tries to restore the math state proactively and sees a fpu_counter > 0 and !tsk_used_math() This results in init_fpu() during the __switch_to()'s math_state_restore() And resulting in fpu corruption which will be saved/restored (save_i387_fxsave and restore_i387_fxsave) during the remaining part of the signal handling after the context switch. Bisected-by: Jürgen Mell Signed-off-by: Suresh Siddha Tested-by: Jürgen Mell Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- arch/x86/kernel/process_32.c | 5 ++++- arch/x86/kernel/process_64.c | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..6d5483356e74 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -649,8 +649,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now + * + * tsk_used_math() checks prevent calling math_state_restore(), + * which can sleep in the case of !tsk_used_math() */ - if (next_p->fpu_counter > 5) + if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..ac54ff56df80 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -658,8 +658,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now + * + * tsk_used_math() checks prevent calling math_state_restore(), + * which can sleep in the case of !tsk_used_math() */ - if (next_p->fpu_counter>5) + if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); return prev_p; } -- cgit v1.2.3 From b7f09ae583c49d28b2796d2fa5893dcf822e3a10 Mon Sep 17 00:00:00 2001 From: Miquel van Smoorenburg Date: Thu, 5 Jun 2008 18:14:44 +0200 Subject: x86, pci-dma.c: don't always add __GFP_NORETRY to gfp Currently arch/x86/kernel/pci-dma.c always adds __GFP_NORETRY to the allocation flags, because it wants to be reasonably sure not to deadlock when calling alloc_pages(). But really that should only be done in two cases: - when allocating memory in the lower 16 MB DMA zone. If there's no free memory there, waiting or OOM killing is of no use - when optimistically trying an allocation in the DMA32 zone when dma_mask < DMA_32BIT_MASK hoping that the allocation happens to fall within the limits of the dma_mask Also blindly adding __GFP_NORETRY to the the gfp variable might not be a good idea since we then also use it when calling dma_ops->alloc_coherent(). Clearing it might also not be a good idea, dma_alloc_coherent()'s caller might have set it on purpose. The gfp variable should not be clobbered. [ mingo@elte.hu: converted to delta patch ontop of previous version. ] Signed-off-by: Miquel van Smoorenburg Signed-off-by: Ingo Molnar --- arch/x86/kernel/pci-dma.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 069e843f0b93..dc00a1331ace 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, struct page *page; unsigned long dma_mask = 0; dma_addr_t bus; + int noretry = 0; /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -397,19 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dev->dma_mask == NULL) return NULL; + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ + if (gfp & __GFP_DMA) + noretry = 1; + #ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { gfp |= GFP_DMA32; + if (dma_mask < DMA_32BIT_MASK) + noretry = 1; + } #endif again: - /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ page = dma_alloc_pages(dev, - (gfp & GFP_DMA) ? gfp | __GFP_NORETRY : gfp, get_order(size)); + noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); if (page == NULL) return NULL; -- cgit v1.2.3 From 3703f39965a197ebd91743fc38d0f640606b8da3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 4 Jun 2008 18:13:37 +0200 Subject: geode: fix modular build -tip testing found this build bug: MODPOST 331 modules ERROR: "geode_mfgpt_toggle_event" [drivers/watchdog/geodewdt.ko] undefined! ERROR: "geode_mfgpt_alloc_timer" [drivers/watchdog/geodewdt.ko] undefined! make[1]: *** [__modpost] Error 1 make: *** [modules] Error 2 with this config: http://redhat.com/~mingo/misc/config-Wed_Jun__4_18_01_59_CEST_2008.bad export those symbols. Signed-off-by: Ingo Molnar --- arch/x86/kernel/mfgpt_32.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3cad17fe026b..07c0f828f488 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) wrmsr(msr, value, dummy); return 0; } +EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) { @@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain) /* No timers available - too bad */ return -1; } +EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); #ifdef CONFIG_GEODE_MFGPT_TIMER -- cgit v1.2.3 From 86b2b70e156203149c3861455feec54bc4906e6d Mon Sep 17 00:00:00 2001 From: Joe Korty Date: Mon, 2 Jun 2008 17:21:06 -0400 Subject: x86: fix asm warning in head_32.S On Mon, May 19, 2008 at 04:10:02PM -0700, Linus Torvalds wrote: > It also causes these warnings on 32-bit PAE: > > AS arch/x86/kernel/head_32.o > arch/x86/kernel/head_32.S: Assembler messages: > arch/x86/kernel/head_32.S:225: Warning: left operand is a bignum; integer 0 assumed > arch/x86/kernel/head_32.S:609: Warning: left operand is a bignum; integer 0 assumed > > and I do not see why (the end result seems to be identical). Fix head_32.S gcc bignum warnings when CONFIG_PAE=y. arch/x86/kernel/head_32.S: Assembler messages: arch/x86/kernel/head_32.S:225: Warning: left operand is a bignum; integer 0 assumed arch/x86/kernel/head_32.S:609: Warning: left operand is a bignum; integer 0 assumed The assembler was stumbling over the 64-bit constant 0x100000000 in the KPMDS #define. Testing: a cmp(1) on head_32.o before and after shows the binary is unchanged. Signed-off-by: Joe Korty Cc: Theodore Tso Cc: Gabriel C Cc: Keith Packard Cc: "Pallipadi Venkatesh" Cc: Eric Anholt Cc: "Siddha Suresh B" Cc: bugme-daemon@bugzilla.kernel.org Cc: airlied@linux.ie Cc: "Barnes Jesse" Cc: Jeremy Fitzhardinge Cc: Andrew Morton Cc: "Rafael J. Wysocki" Cc: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b2cc73768a9d..f7357cc0162c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -189,7 +189,7 @@ default_entry: * this stage. */ -#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ xorl %ebx,%ebx /* %ebx is kept at zero */ -- cgit v1.2.3 From 0b6a39f7ebcb1c82587ce35b401c513eed41ac5c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 9 Jun 2008 13:29:43 +0200 Subject: Revert "x86: fix ioapic bug again" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 6e908947b4995bc0e551a8257c586d5c3e428201. Németh Márton reported: | there is a problem in 2.6.26-rc3 which was not there in case of | 2.6.25: the CPU wakes up ~90,000 times per sec instead of ~60 per sec. | | I also "git bisected" the problem, the result is: | | 6e908947b4995bc0e551a8257c586d5c3e428201 is first bad commit | commit 6e908947b4995bc0e551a8257c586d5c3e428201 | Author: Ingo Molnar | Date: Fri Mar 21 14:32:36 2008 +0100 | | x86: fix ioapic bug again the original problem is fixed by Maciej W. Rozycki in the tip/x86/apic branch (confirmed by Márton), but those changes are too intrusive for v2.6.26 so we'll go for the less intrusive (repeated) revert now. Reported-and-bisected-by: Németh Márton Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic_32.c | 12 ++---------- arch/x86/kernel/nmi_32.c | 9 ++------- 2 files changed, 4 insertions(+), 17 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54fc1fdd..4dc8600d9d20 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2130,14 +2130,10 @@ static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; int vector; - unsigned int ver; unsigned long flags; local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - /* * get/set the timer IRQ vector: */ @@ -2150,15 +2146,11 @@ static inline void __init check_timer(void) * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. Finally timer interrupts - * need to be acknowledged manually in the 8259A for - * timer_interrupt() and for the i82489DX when using - * the NMI watchdog. + * disabled in the local APIC. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = !cpu_has_tsc; - timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + timer_ack = 1; if (timer_over_8254 > 0) enable_8259A_irq(0); diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 11b14bbaa61e..84160f74eeb0 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -26,7 +26,6 @@ #include #include -#include #include "mach_traps.h" @@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void) prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - goto error; + return -1; printk(KERN_INFO "Testing NMI watchdog ... "); @@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void) if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - goto error; + return -1; } printk("OK.\n"); @@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void) kfree(prev_nmi_count); return 0; -error: - timer_ack = !cpu_has_tsc; - - return -1; } static int __init setup_nmi_watchdog(char *str) -- cgit v1.2.3 From e32e58a96de4ac35a03349db2ab69f263ded958f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 6 Jun 2008 10:14:08 +0200 Subject: x86: fix lockdep warning during suspend-to-ram Andrew Morton wrote: > I've been seeing the below for a long time during suspend-to-ram on the Vaio. > > > PM: Syncing filesystems ... done. > PM: Preparing system for mem sleep > Freezing user space processes ... <4>------------[ cut here ]------------ > WARNING: at kernel/lockdep.c:2658 check_flags+0x4c/0x127() > Modules linked in: i915 drm ipw2200 sonypi ipv6 autofs4 hidp l2cap bluetooth sunrpc nf_conntrack_netbios_ns ipt_REJECT nf_conntrack_ipv4 xt_state nf_conntrack xt_tcpudp iptable_filter ip_tables x_tables acpi_cpufreq nvram ohci1394 ieee1394 ehci_hcd uhci_hcd sg joydev snd_hda_intel snd_seq_dummy sr_mod snd_seq_oss cdrom snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss ieee80211 pcspkr ieee80211_crypt snd_pcm i2c_i801 snd_timer i2c_core ide_pci_generic piix snd soundcore snd_page_alloc button ext3 jbd ide_disk ide_core [last unloaded: ipw2200] > Pid: 3250, comm: zsh Not tainted 2.6.26-rc5 #1 > [] warn_on_slowpath+0x41/0x6d > [] ? native_sched_clock+0x82/0x96 > [] ? mark_held_locks+0x41/0x5c > [] ? _spin_unlock_irqrestore+0x36/0x58 > [] ? trace_hardirqs_on+0xe6/0x10d > [] ? __lock_acquire+0xae3/0xb2b > [] ? schedule+0x39b/0x3b4 > [] check_flags+0x4c/0x127 > [] lock_acquire+0x3a/0x86 > [] _spin_lock+0x26/0x53 > [] ? refrigerator+0x13/0xc3 > [] refrigerator+0x13/0xc3 > [] get_signal_to_deliver+0x3c/0x31e > [] do_notify_resume+0x91/0x6ee > [] ? lock_release_holdtime+0x50/0x56 > [] ? _spin_unlock_irqrestore+0x36/0x58 > [] ? read_chan+0x0/0x58c > [] ? trace_hardirqs_on+0xe6/0x10d > [] ? _spin_unlock_irqrestore+0x42/0x58 > [] ? tty_ldisc_deref+0x5c/0x63 > [] ? tty_read+0x66/0x98 > [] ? audit_syscall_exit+0x2aa/0x2c5 > [] ? do_syscall_trace+0x6b/0x16f > [] work_notifysig+0x13/0x1b > ======================= > ---[ end trace 25b49fe59a25afa5 ]--- > possible reason: unannotated irqs-off. > irq event stamp: 58919 > hardirqs last enabled at (58919): [] syscall_exit_work+0x11/0x26 Joy - I so love entry.S Best I can make of it: syscall_exit_work resume_userspace DISABLE_INTERRUPTS (no TRACE_IRQS_OFF) work_pending work_notifysig do_notify_resume() do_signal() get_signal_to_deliver() try_to_freeze() refrigerator() task_lock() -> check_flags() -> BANG The normal path is: syscall_exit_work resume_userspace DISABLE_INTERRUPTS restore_all TRACE_IRQS_IRET iret No idea why that would not warn.. Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..c778e4fa55a2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -248,6 +248,7 @@ ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? -- cgit v1.2.3 From 4461145ef1be92851c230f858f6b6f457c99670f Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Thu, 12 Jun 2008 08:55:59 +0200 Subject: x86, lockdep: fix "WARNING: at kernel/lockdep.c:2658 check_flags+0x4c/0x128()" Alessandro Suardi reported: > Recently upgraded my FC6 desktop to Fedora 9; with the > latest nautilus RPM updates my VNC session went nuts > with nautilus pegging the CPU for everything that breathed. > > I now reverted to an earlier nautilus package, but during > the peak CPU period my kernel spat this: > > [314185.623294] ------------[ cut here ]------------ > [314185.623414] WARNING: at kernel/lockdep.c:2658 check_flags+0x4c/0x128() > [314185.623514] Modules linked in: iptable_filter ip_tables x_tables > sunrpc ipv6 fuse snd_via82xx snd_ac97_codec ac97_bus snd_mpu401_uart > snd_rawmidi via686a hwmon parport_pc sg parport uhci_hcd ehci_hcd > [314185.623924] Pid: 12314, comm: nautilus Not tainted 2.6.26-rc5-git2 #4 > [314185.624021] [] warn_on_slowpath+0x41/0x7b > [314185.624021] [] ? do_page_fault+0x2c1/0x5fd > [314185.624021] [] ? up_read+0x16/0x28 > [314185.624021] [] ? do_page_fault+0x2c1/0x5fd > [314185.624021] [] ? __lock_acquire+0xbb4/0xbc3 > [314185.624021] [] check_flags+0x4c/0x128 > [314185.624021] [] lock_acquire+0x31/0x7d > [314185.624021] [] __atomic_notifier_call_chain+0x30/0x80 > [314185.624021] [] ? __atomic_notifier_call_chain+0x0/0x80 > [314185.624021] [] atomic_notifier_call_chain+0xc/0xe > [314185.624021] [] notify_die+0x2d/0x2f > [314185.624021] [] do_int3+0x1f/0x4d > [314185.624021] [] int3+0x27/0x2c > [314185.624021] ======================= > [314185.624021] ---[ end trace 1923f65a2d7bb246 ]--- > [314185.624021] possible reason: unannotated irqs-off. > [314185.624021] irq event stamp: 488879 > [314185.624021] hardirqs last enabled at (488879): [] > restore_nocheck+0x12/0x15 > [314185.624021] hardirqs last disabled at (488878): [] > work_resched+0x19/0x30 > [314185.624021] softirqs last enabled at (488876): [] > __do_softirq+0xa6/0xac > [314185.624021] softirqs last disabled at (488865): [] > do_softirq+0x57/0xa6 > > I didn't seem to find it with some googling, so here it is. > > I was incidentally ltracing that process to try and find out > what was gulping down that much CPU (sorry, no idea > whether ltrace and the WARNING happened at the same > time or which came first) and: Yeah, this is extremely likely to be the source of the warning. The warning should be harmless, however. > Box is my trusty noname K7-800, 512MB RAM; if there's > anything else useful I might be able to provide, just ask. It would be interesting to see where the int3 comes from. Too bad, lockdep doesn't provide the register dump. The stacktrace also doesn't go further than the int3(), I wonder if this int3 came from userspace? The ltrace readme says "software breakpoints, like gdb", so I guess this is the case. Yep, seems like it. This looks relevant: | commit fb1dac909d94ff807cd833d340c6827c3a957159 | Author: Peter Zijlstra | Date: Wed Jan 16 09:51:59 2008 +0100 | | lockdep: more hardirq annotations for notify_die() I'm attaching a similarly-looking patch for this case (DO_VM86_ERROR), though I suspect it might be missing for the other cases (DO_ERROR/DO_ERROR_INFO) as well. Reported-by: Alessandro Suardi Signed-off-by: Vegard Nossum Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps_32.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bde6f63e15d5..08d752de4eee 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -544,6 +544,7 @@ vm86_trap: #define DO_ERROR(trapnr, signr, str, name) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ -- cgit v1.2.3 From 1da2e3d679a8ea2d9e82040359a706da0bd3bef6 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 12 Jun 2008 15:21:54 -0700 Subject: provide rtc_cmos platform device Recently (around 2.6.25) I've noticed that RTC no longer works for me. It turned out this is because I use pnpacpi=off kernel option to work around the parport_pc bugs. I always did so, but RTC used to work fine in the past, and now it have regressed. The patch fixes the problem by creating the platform device for the RTC when PNP is disabled. This may also help running the PNP-enabled kernel on an older PCs. Signed-off-by: Stas Sergeev Cc: David Brownell Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Bjorn Helgaas Cc: Adam Belay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/rtc.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 9615eee9b775..05191bbc68b8 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include #include @@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void) } EXPORT_SYMBOL(native_read_tsc); + +static struct resource rtc_resources[] = { + [0] = { + .start = RTC_PORT(0), + .end = RTC_PORT(1), + .flags = IORESOURCE_IO, + }, + [1] = { + .start = RTC_IRQ, + .end = RTC_IRQ, + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device rtc_device = { + .name = "rtc_cmos", + .id = -1, + .resource = rtc_resources, + .num_resources = ARRAY_SIZE(rtc_resources), +}; + +static __init int add_rtc_cmos(void) +{ +#ifdef CONFIG_PNP + if (!pnp_platform_devices) + platform_device_register(&rtc_device); +#else + platform_device_register(&rtc_device); +#endif /* CONFIG_PNP */ + return 0; +} +device_initcall(add_rtc_cmos); -- cgit v1.2.3