diff options
Diffstat (limited to 'arch/x86/kernel')
46 files changed, 562 insertions, 231 deletions
diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore index 4ea38a39aed4..08f4fd731469 100644 --- a/arch/x86/kernel/.gitignore +++ b/arch/x86/kernel/.gitignore @@ -1,2 +1,3 @@ vsyscall.lds vsyscall_32.lds +vmlinux.lds diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bbdacb398d48..77807d4769c9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -82,10 +82,9 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o +obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o -ifdef CONFIG_INPUT_PCSPKR -obj-y += pcspeaker.o -endif +obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o obj-$(CONFIG_SCx200) += scx200.o scx200-y += scx200_32.o diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 7335959b6aff..fd5ca97a2ad5 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -10,5 +10,5 @@ endif $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin $(obj)/realmode/wakeup.bin: FORCE - $(Q)$(MAKE) $(build)=$(obj)/realmode $@ + $(Q)$(MAKE) $(build)=$(obj)/realmode diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c49ebcc6c41e..33c5216fd3e1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table) static void __cpuinit acpi_register_lapic(int id, u8 enabled) { + unsigned int ver = 0; + if (!enabled) { ++disabled_cpus; return; } - generic_processor_info(id, 0); +#ifdef CONFIG_X86_32 + if (boot_cpu_physical_apicid != -1U) + ver = apic_version[boot_cpu_physical_apicid]; +#endif + + generic_processor_info(id, ver); } static int __init @@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address) mp_lapic_addr = address; set_fixmap_nocache(FIX_APIC_BASE, address); - if (boot_cpu_physical_apicid == -1U) + if (boot_cpu_physical_apicid == -1U) { boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); +#ifdef CONFIG_X86_32 + apic_version[boot_cpu_physical_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); +#endif + } } static int __init early_acpi_parse_madt_lapic_addr_ovr(void) diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 092900854acc..1c31cc0e9def 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile @@ -6,7 +6,8 @@ # for more details. # -targets := wakeup.bin wakeup.elf +always := wakeup.bin +targets := wakeup.elf wakeup.lds wakeup-y += wakeup.o wakemain.o video-mode.o copy.o @@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T CPPFLAGS_wakeup.lds += -P -C -$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE +$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE $(call if_changed,ld) OBJCOPYFLAGS_wakeup.bin := -O binary diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S index f9b77fb37e5b..3355973b12ac 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.S @@ -5,6 +5,7 @@ #include <asm/msr-index.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/processor-flags.h> .code16 .section ".header", "a" @@ -24,6 +25,11 @@ pmode_gdt: .quad 0 realmode_flags: .long 0 real_magic: .long 0 trampoline_segment: .word 0 +_pad1: .byte 0 +wakeup_jmp: .byte 0xea /* ljmpw */ +wakeup_jmp_off: .word 3f +wakeup_jmp_seg: .word 0 +wakeup_gdt: .quad 0, 0, 0 signature: .long 0x51ee1111 .text @@ -34,11 +40,34 @@ _start: cli cld + /* Apparently some dimwit BIOS programmers don't know how to + program a PM to RM transition, and we might end up here with + junk in the data segment descriptor registers. The only way + to repair that is to go into PM and fix it ourselves... */ + movw $16, %cx + lgdtl %cs:wakeup_gdt + movl %cr0, %eax + orb $X86_CR0_PE, %al + movl %eax, %cr0 + jmp 1f +1: ljmpw $8, $2f +2: + movw %cx, %ds + movw %cx, %es + movw %cx, %ss + movw %cx, %fs + movw %cx, %gs + + andb $~X86_CR0_PE, %al + movl %eax, %cr0 + jmp wakeup_jmp +3: /* Set up segments */ movw %cs, %ax movw %ax, %ds movw %ax, %es movw %ax, %ss + lidtl wakeup_idt movl $wakeup_stack_end, %esp @@ -98,7 +127,14 @@ bogus_real_magic: jmp 1b .data - .balign 4 + .balign 8 + + /* This is the standard real-mode IDT */ +wakeup_idt: + .word 0xffff /* limit */ + .long 0 /* address */ + .word 0 + .globl HEAP, heap_end HEAP: .long wakeup_heap diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h index ef8166fe8020..69d38d0b2b64 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.h +++ b/arch/x86/kernel/acpi/realmode/wakeup.h @@ -24,6 +24,11 @@ struct wakeup_header { u32 realmode_flags; u32 real_magic; u16 trampoline_segment; /* segment with trampoline code, 64-bit only */ + u8 _pad1; + u8 wakeup_jmp; + u16 wakeup_jmp_off; + u16 wakeup_jmp_seg; + u64 wakeup_gdt[3]; u32 signature; /* To check we have correct structure */ } __attribute__((__packed__)); diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S index 22fab6c4be15..7da00b799cda 100644 --- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S +++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S @@ -12,11 +12,6 @@ ENTRY(_start) SECTIONS { - . = HEADER_OFFSET; - .header : { - *(.header) - } - . = 0; .text : { *(.text*) @@ -50,6 +45,11 @@ SECTIONS __bss_end = .; } + . = HEADER_OFFSET; + .header : { + *(.header) + } + . = ALIGN(16); _end = .; diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index afc25ee9964b..36af01f029ed 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -50,6 +50,20 @@ int acpi_save_state_mem(void) header->video_mode = saved_video_mode; + header->wakeup_jmp_seg = acpi_wakeup_address >> 4; + /* GDT[0]: GDT self-pointer */ + header->wakeup_gdt[0] = + (u64)(sizeof(header->wakeup_gdt) - 1) + + ((u64)(acpi_wakeup_address + + ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) + << 16); + /* GDT[1]: real-mode-like code segment */ + header->wakeup_gdt[1] = (0x009bULL << 40) + + ((u64)acpi_wakeup_address << 16) + 0xffff; + /* GDT[2]: real-mode-like data segment */ + header->wakeup_gdt[2] = (0x0093ULL << 40) + + ((u64)acpi_wakeup_address << 16) + 0xffff; + #ifndef CONFIG_64BIT store_gdt((struct desc_ptr *)&header->pmode_gdt); @@ -111,7 +125,7 @@ void __init acpi_reserve_bootmem(void) return; } - acpi_wakeup_address = acpi_realmode; + acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); } diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 5910020c3f24..0633cfd0dc29 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c @@ -534,7 +534,7 @@ int setup_profiling_timer(unsigned int multiplier) */ void clear_local_APIC(void) { - int maxlvt = lapic_get_maxlvt(); + int maxlvt; u32 v; /* APIC hasn't been mapped yet */ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 238468ae1993..c2e1ce33c7cb 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -6,6 +6,7 @@ #include <linux/cpu.h> +#include <asm/pat.h> #include <asm/processor.h> struct cpuid_bit { @@ -48,3 +49,23 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) set_cpu_cap(c, cb->feature); } } + +#ifdef CONFIG_X86_PAT +void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) +{ + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (c->x86 >= 0xf && c->x86 <= 0x11) + return; + break; + case X86_VENDOR_INTEL: + if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + return; + break; + } + + pat_disable(cpu_has_pat ? + "PAT disabled. Not yet verified on this CPU type." : + "PAT not supported by CPU."); +} +#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 35b4f6a9c8ef..d0463a946247 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -12,6 +12,7 @@ #include <asm/mmu_context.h> #include <asm/mtrr.h> #include <asm/mce.h> +#include <asm/pat.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> @@ -308,19 +309,6 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) } - clear_cpu_cap(c, X86_FEATURE_PAT); - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_PAT); - break; - case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) - set_cpu_cap(c, X86_FEATURE_PAT); - break; - } - } /* @@ -409,18 +397,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); } - clear_cpu_cap(c, X86_FEATURE_PAT); - - switch (c->x86_vendor) { - case X86_VENDOR_AMD: - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_PAT); - break; - case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) - set_cpu_cap(c, X86_FEATURE_PAT); - break; - } } static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) @@ -651,6 +627,7 @@ void __init early_cpu_init(void) cpu_devs[cvdev->vendor] = cvdev->cpu_dev; early_cpu_detect(); + validate_pat_support(&boot_cpu_data); } /* Make sure %fs is initialized properly in idle threads */ diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index af4a867a097c..777a7ff075de 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c @@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq, if ((ecx > 95) || (ecx == 0) || (eax < ebx)) return -EIO; - edx = (eax - ebx) / (100 - ecx); + edx = ((eax - ebx) * 100) / (100 - ecx); *low_freq = edx * 1000; /* back to kHz */ dprintk("low frequency is %u kHz\n", *low_freq); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 46d4034d9f37..206791eb46e3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) * an UP version, and is deprecated by AMD. */ if (num_online_cpus() != 1) { - printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n"); +#ifndef CONFIG_ACPI_PROCESSOR + printk(KERN_ERR PFX "ACPI Processor support is required " + "for SMP systems but is absent. Please load the " + "ACPI Processor module before starting this " + "driver.\n"); +#else + printk(KERN_ERR PFX "Your BIOS does not provide ACPI " + "_PSS objects in a way that Linux understands. " + "Please report this to the Linux ACPI maintainers" + " and complain to your BIOS vendor.\n"); +#endif kfree(data); return -ENODEV; } if (pol->cpu != 0) { - printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n"); + printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than " + "CPU0. Complain to your BIOS vendor.\n"); kfree(data); return -ENODEV; } diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 5d23d85624d4..4b63c8e1f13b 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c @@ -49,13 +49,13 @@ void efi_call_phys_prelog(void) local_irq_save(efi_rt_eflags); /* - * If I don't have PSE, I should just duplicate two entries in page - * directory. If I have PSE, I just need to duplicate one entry in + * If I don't have PAE, I should just duplicate two entries in page + * directory. If I have PAE, I just need to duplicate one entry in * page directory. */ cr4 = read_cr4(); - if (cr4 & X86_CR4_PSE) { + if (cr4 & X86_CR4_PAE) { efi_bak_pg_dir_pointer[0].pgd = swapper_pg_dir[pgd_index(0)].pgd; swapper_pg_dir[0].pgd = @@ -93,7 +93,7 @@ void efi_call_phys_epilog(void) cr4 = read_cr4(); - if (cr4 & X86_CR4_PSE) { + if (cr4 & X86_CR4_PAE) { swapper_pg_dir[pgd_index(0)].pgd = efi_bak_pg_dir_pointer[0].pgd; } else { diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 2a609dc3271c..c778e4fa55a2 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -248,6 +248,7 @@ ENTRY(resume_userspace) DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done on # int/exception return? diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c index 9dad6ca6cd70..9b08e852fd1a 100644 --- a/arch/x86/kernel/geode_32.c +++ b/arch/x86/kernel/geode_32.c @@ -161,6 +161,28 @@ void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) } EXPORT_SYMBOL_GPL(geode_gpio_setup_event); +int geode_has_vsa2(void) +{ + static int has_vsa2 = -1; + + if (has_vsa2 == -1) { + u16 val; + + /* + * The VSA has virtual registers that we can query for a + * signature. + */ + outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); + outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); + + val = inw(VSA_VRC_DATA); + has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); + } + + return has_vsa2; +} +EXPORT_SYMBOL_GPL(geode_has_vsa2); + static int __init geode_southbridge_init(void) { if (!is_geode()) diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b2cc73768a9d..f7357cc0162c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -189,7 +189,7 @@ default_entry: * this stage. */ -#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */ +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ xorl %ebx,%ebx /* %ebx is kept at zero */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 10a1955bb1d1..b817974ef942 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -128,7 +128,7 @@ ident_complete: /* Fixup phys_base */ addq %rbp, phys_base(%rip) -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_TRAMPOLINE addq %rbp, trampoline_level4_pgt + 0(%rip) addq %rbp, trampoline_level4_pgt + (511*8)(%rip) #endif diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index db6839b53195..95e80e5033c3 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void) void __init init_thread_xstate(void) { + if (!HAVE_HWFP) { + xstate_size = sizeof(struct i387_soft_struct); + return; + } + if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); #ifdef CONFIG_X86_32 @@ -94,7 +99,7 @@ void __cpuinit fpu_init(void) int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { - if (tsk == current) + if (HAVE_HWFP && tsk == current) unlazy_fpu(tsk); return 0; } @@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk) return -ENOMEM; } +#ifdef CONFIG_X86_32 + if (!HAVE_HWFP) { + memset(tsk->thread.xstate, 0, xstate_size); + finit(); + set_stopped_child_used_math(tsk); + return 0; + } +#endif + if (cpu_has_fxsr) { struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; @@ -148,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, int ret; if (!cpu_has_fxsr) - return -ENODEV; + return -EIO; ret = init_fpu(target); if (ret) @@ -165,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, int ret; if (!cpu_has_fxsr) - return -ENODEV; + return -EIO; ret = init_fpu(target); if (ret) @@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - if (!HAVE_HWFP) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); if (ret) return ret; + if (!HAVE_HWFP) + return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fsave, 0, @@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - if (!HAVE_HWFP) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - ret = init_fpu(target); if (ret) return ret; set_stopped_child_used_math(target); + if (!HAVE_HWFP) + return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); + if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.xstate->fsave, 0, -1); @@ -450,7 +464,6 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; - clear_fpu(tsk); return __copy_from_user(&tsk->thread.xstate->fsave, buf, sizeof(struct i387_fsave_struct)); } @@ -461,7 +474,6 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) struct user_i387_ia32_struct env; int err; - clear_fpu(tsk); err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct)); /* mxcsr reserved bits must be masked to zero for security reasons */ @@ -476,6 +488,16 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int restore_i387_ia32(struct _fpstate_ia32 __user *buf) { int err; + struct task_struct *tsk = current; + + if (HAVE_HWFP) + clear_fpu(tsk); + + if (!used_math()) { + err = init_fpu(tsk); + if (err) + return err; + } if (HAVE_HWFP) { if (cpu_has_fxsr) diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index 3d01e47777db..a4f93b4120c1 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c @@ -11,7 +11,6 @@ #include <asm/desc.h> static struct fs_struct init_fs = INIT_FS; -static struct files_struct init_files = INIT_FILES; static struct signal_struct init_signals = INIT_SIGNALS(init_signals); static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct mm_struct init_mm = INIT_MM(init_mm); diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index a40d54fc1fdd..4dc8600d9d20 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c @@ -2130,14 +2130,10 @@ static inline void __init check_timer(void) { int apic1, pin1, apic2, pin2; int vector; - unsigned int ver; unsigned long flags; local_irq_save(flags); - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - /* * get/set the timer IRQ vector: */ @@ -2150,15 +2146,11 @@ static inline void __init check_timer(void) * mode for the 8259A whenever interrupts are routed * through I/O APICs. Also IRQ0 has to be enabled in * the 8259A which implies the virtual wire has to be - * disabled in the local APIC. Finally timer interrupts - * need to be acknowledged manually in the 8259A for - * timer_interrupt() and for the i82489DX when using - * the NMI watchdog. + * disabled in the local APIC. */ apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); init_8259A(1); - timer_ack = !cpu_has_tsc; - timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); + timer_ack = 1; if (timer_over_8254 > 0) enable_8259A_irq(0); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index ddee04043aeb..87edf1ceb1df 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -18,6 +18,7 @@ #include <linux/clocksource.h> #include <linux/kvm_para.h> +#include <asm/pvclock.h> #include <asm/arch_hooks.h> #include <asm/msr.h> #include <asm/apic.h> @@ -36,83 +37,47 @@ static int parse_no_kvmclock(char *arg) early_param("no-kvmclock", parse_no_kvmclock); /* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); -#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field +static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); +static struct pvclock_wall_clock wall_clock; -static inline u64 kvm_get_delta(u64 last_tsc) -{ - int cpu = smp_processor_id(); - u64 delta = native_read_tsc() - last_tsc; - return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; -} - -static struct kvm_wall_clock wall_clock; -static cycle_t kvm_clock_read(void); /* * The wallclock is the time of day when we booted. Since then, some time may * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -unsigned long kvm_get_wallclock(void) +static unsigned long kvm_get_wallclock(void) { - u32 wc_sec, wc_nsec; - u64 delta; + struct pvclock_vcpu_time_info *vcpu_time; struct timespec ts; - int version, nsec; int low, high; low = (int)__pa(&wall_clock); high = ((u64)__pa(&wall_clock) >> 32); + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); - delta = kvm_clock_read(); + vcpu_time = &get_cpu_var(hv_clock); + pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); + put_cpu_var(hv_clock); - native_write_msr(MSR_KVM_WALL_CLOCK, low, high); - do { - version = wall_clock.wc_version; - rmb(); - wc_sec = wall_clock.wc_sec; - wc_nsec = wall_clock.wc_nsec; - rmb(); - } while ((wall_clock.wc_version != version) || (version & 1)); - - delta = kvm_clock_read() - delta; - delta += wc_nsec; - nsec = do_div(delta, NSEC_PER_SEC); - set_normalized_timespec(&ts, wc_sec + delta, nsec); - /* - * Of all mechanisms of time adjustment I've tested, this one - * was the champion! - */ - return ts.tv_sec + 1; + return ts.tv_sec; } -int kvm_set_wallclock(unsigned long now) +static int kvm_set_wallclock(unsigned long now) { - return 0; + return -1; } -/* - * This is our read_clock function. The host puts an tsc timestamp each time - * it updates a new time. Without the tsc adjustment, we can have a situation - * in which a vcpu starts to run earlier (smaller system_time), but probes - * time later (compared to another vcpu), leading to backwards time - */ static cycle_t kvm_clock_read(void) { - u64 last_tsc, now; - int cpu; - - preempt_disable(); - cpu = smp_processor_id(); + struct pvclock_vcpu_time_info *src; + cycle_t ret; - last_tsc = get_clock(cpu, tsc_timestamp); - now = get_clock(cpu, system_time); - - now += kvm_get_delta(last_tsc); - preempt_enable(); - - return now; + src = &get_cpu_var(hv_clock); + ret = pvclock_clocksource_read(src); + put_cpu_var(hv_clock); + return ret; } + static struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_read, @@ -123,26 +88,37 @@ static struct clocksource kvm_clock = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static int kvm_register_clock(void) +static int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high; low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); - + printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", + cpu, high, low, txt); return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); } +#ifdef CONFIG_X86_LOCAL_APIC static void kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, * we shouldn't fail. */ - WARN_ON(kvm_register_clock()); + WARN_ON(kvm_register_clock("secondary cpu clock")); /* ok, done with our trickery, call native */ setup_secondary_APIC_clock(); } +#endif + +#ifdef CONFIG_SMP +void __init kvm_smp_prepare_boot_cpu(void) +{ + WARN_ON(kvm_register_clock("primary cpu clock")); + native_smp_prepare_boot_cpu(); +} +#endif /* * After the clock is registered, the host will keep writing to the @@ -172,12 +148,17 @@ void __init kvmclock_init(void) return; if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { - if (kvm_register_clock()) + if (kvm_register_clock("boot clock")) return; pv_time_ops.get_wallclock = kvm_get_wallclock; pv_time_ops.set_wallclock = kvm_set_wallclock; pv_time_ops.sched_clock = kvm_clock_read; +#ifdef CONFIG_X86_LOCAL_APIC pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; +#endif +#ifdef CONFIG_SMP + smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; +#endif machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC machine_ops.crash_shutdown = kvm_crash_shutdown; diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3cad17fe026b..07c0f828f488 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c @@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) wrmsr(msr, value, dummy); return 0; } +EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) { @@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain) /* No timers available - too bad */ return -1; } +EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); #ifdef CONFIG_GEODE_MFGPT_TIMER diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3e2c54dc8b29..404683b94e79 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -794,6 +794,11 @@ void __init find_smp_config(void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ +/* + * Keep this outside and initialized to 0, for !CONFIG_ACPI builds: + */ +int es7000_plat; + #ifdef CONFIG_ACPI #ifdef CONFIG_X86_IO_APIC @@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) MP_intsrc_info(&intsrc); } -int es7000_plat; - void __init mp_config_acpi_legacy_irqs(void) { struct mpc_config_intsrc intsrc; diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 11b14bbaa61e..84160f74eeb0 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -26,7 +26,6 @@ #include <asm/smp.h> #include <asm/nmi.h> -#include <asm/timer.h> #include "mach_traps.h" @@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void) prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); if (!prev_nmi_count) - goto error; + return -1; printk(KERN_INFO "Testing NMI watchdog ... "); @@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void) if (!atomic_read(&nmi_active)) { kfree(prev_nmi_count); atomic_set(&nmi_active, -1); - goto error; + return -1; } printk("OK.\n"); @@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void) kfree(prev_nmi_count); return 0; -error: - timer_ack = !cpu_has_tsc; - - return -1; } static int __init setup_nmi_watchdog(char *str) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0c37f16b6950..dc00a1331ace 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, struct page *page; unsigned long dma_mask = 0; dma_addr_t bus; + int noretry = 0; /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); @@ -385,30 +386,37 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) return memory; - if (!dev) + if (!dev) { dev = &fallback_dev; + gfp |= GFP_DMA; + } dma_mask = dev->coherent_dma_mask; if (dma_mask == 0) - dma_mask = DMA_32BIT_MASK; + dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK; /* Device not DMA able */ if (dev->dma_mask == NULL) return NULL; - /* Don't invoke OOM killer */ - gfp |= __GFP_NORETRY; + /* Don't invoke OOM killer or retry in lower 16MB DMA zone */ + if (gfp & __GFP_DMA) + noretry = 1; #ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If not retry with true GFP_DMA. -AK */ - if (dma_mask <= DMA_32BIT_MASK) + if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) { gfp |= GFP_DMA32; + if (dma_mask < DMA_32BIT_MASK) + noretry = 1; + } #endif again: - page = dma_alloc_pages(dev, gfp, get_order(size)); + page = dma_alloc_pages(dev, + noretry ? gfp | __GFP_NORETRY : gfp, get_order(size)); if (page == NULL) return NULL; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c07455d1695f..aa8ec928caa8 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -26,6 +26,7 @@ #include <linux/kdebug.h> #include <linux/scatterlist.h> #include <linux/iommu-helper.h> +#include <linux/sysdev.h> #include <asm/atomic.h> #include <asm/io.h> #include <asm/mtrr.h> @@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size) return aper_base; } +static int gart_resume(struct sys_device *dev) +{ + return 0; +} + +static int gart_suspend(struct sys_device *dev, pm_message_t state) +{ + return -EINVAL; +} + +static struct sysdev_class gart_sysdev_class = { + .name = "gart", + .suspend = gart_suspend, + .resume = gart_resume, + +}; + +static struct sys_device device_gart = { + .id = 0, + .cls = &gart_sysdev_class, +}; + /* * Private Northbridge GATT initialization in case we cannot use the * AGP driver for some reason. @@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) unsigned aper_base, new_aper_base; struct pci_dev *dev; void *gatt; - int i; + int i, error; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) pci_write_config_dword(dev, 0x90, ctl); } + + error = sysdev_class_register(&gart_sysdev_class); + if (!error) + error = sysdev_register(&device_gart); + if (error) + panic("Could not register gart_sysdev -- would corrupt data on next suspend"); flush_gart(); printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 67e9b4a1e89d..ba370dc8685b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -99,15 +99,6 @@ static void mwait_idle(void) local_irq_enable(); } - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - /* * On SMP it's slightly faster (but much more power-consuming!) * to poll the ->work.need_resched flag instead of waiting for the @@ -119,6 +110,33 @@ static void poll_idle(void) cpu_relax(); } +/* + * mwait selection logic: + * + * It depends on the CPU. For AMD CPUs that support MWAIT this is + * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings + * then depend on a clock divisor and current Pstate of the core. If + * all cores of a processor are in halt state (C1) the processor can + * enter the C1E (C1 enhanced) state. If mwait is used this will never + * happen. + * + * idle=mwait overrides this decision and forces the usage of mwait. + */ +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +{ + if (force_mwait) + return 1; + + if (c->x86_vendor == X86_VENDOR_AMD) { + switch(c->x86) { + case 0x10: + case 0x11: + return 0; + } + } + return 1; +} + void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { static int selected; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f8476dfbb60d..e2db9ac5c61c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -333,6 +333,7 @@ void flush_thread(void) /* * Forget coprocessor state.. */ + tsk->fpu_counter = 0; clear_fpu(tsk); clear_used_math(); } @@ -649,8 +650,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now + * + * tsk_used_math() checks prevent calling math_state_restore(), + * which can sleep in the case of !tsk_used_math() */ - if (next_p->fpu_counter > 5) + if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2319f39988b..c6eb5c91e5f6 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -294,6 +294,7 @@ void flush_thread(void) /* * Forget coprocessor state.. */ + tsk->fpu_counter = 0; clear_fpu(tsk); clear_used_math(); } @@ -658,8 +659,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* If the task has used fpu the last 5 timeslices, just do a full * restore of the math state immediately to avoid the trap; the * chances of needing FPU soon are obviously high now + * + * tsk_used_math() checks prevent calling math_state_restore(), + * which can sleep in the case of !tsk_used_math() */ - if (next_p->fpu_counter>5) + if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index fb03ef380f0e..a7835f282936 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1303,6 +1303,9 @@ static const struct user_regset_view user_x86_64_view = { #define genregs32_get genregs_get #define genregs32_set genregs_set +#define user_i387_ia32_struct user_i387_struct +#define user32_fxsr_struct user_fxsr_struct + #endif /* CONFIG_X86_64 */ #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -1315,13 +1318,13 @@ static const struct user_regset x86_32_regsets[] = { }, [REGSET_FP] = { .core_note_type = NT_PRFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(u32), + .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = fpregs_active, .get = fpregs_get, .set = fpregs_set }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, - .n = sizeof(struct user_i387_struct) / sizeof(u32), + .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set }, diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c new file mode 100644 index 000000000000..05fbe9a0325a --- /dev/null +++ b/arch/x86/kernel/pvclock.c @@ -0,0 +1,141 @@ +/* paravirtual clock -- common code used by kvm/xen + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include <linux/kernel.h> +#include <linux/percpu.h> +#include <asm/pvclock.h> + +/* + * These are perodically updated + * xen: magic shared_info page + * kvm: gpa registered via msr + * and then copied here. + */ +struct pvclock_shadow_time { + u64 tsc_timestamp; /* TSC at last update of time vals. */ + u64 system_timestamp; /* Time, in nanosecs, since boot. */ + u32 tsc_to_nsec_mul; + int tsc_shift; + u32 version; +}; + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift) +{ + u64 product; +#ifdef __i386__ + u32 tmp1, tmp2; +#endif + + if (shift < 0) + delta >>= -shift; + else + delta <<= shift; + +#ifdef __i386__ + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "xor %5,%5 ; " + "add %4,%%eax ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) ); +#elif __x86_64__ + __asm__ ( + "mul %%rdx ; shrd $32,%%rdx,%%rax" + : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) ); +#else +#error implement me! +#endif + + return product; +} + +static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow) +{ + u64 delta = native_read_tsc() - shadow->tsc_timestamp; + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); +} + +/* + * Reads a consistent set of time-base values from hypervisor, + * into a shadow data area. + */ +static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst, + struct pvclock_vcpu_time_info *src) +{ + do { + dst->version = src->version; + rmb(); /* fetch version before data */ + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; + rmb(); /* test version after fetching data */ + } while ((src->version & 1) || (dst->version != src->version)); + + return dst->version; +} + +cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) +{ + struct pvclock_shadow_time shadow; + unsigned version; + cycle_t ret, offset; + + do { + version = pvclock_get_time_values(&shadow, src); + barrier(); + offset = pvclock_get_nsec_offset(&shadow); + ret = shadow.system_timestamp + offset; + barrier(); + } while (version != src->version); + + return ret; +} + +void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, + struct pvclock_vcpu_time_info *vcpu_time, + struct timespec *ts) +{ + u32 version; + u64 delta; + struct timespec now; + + /* get wallclock at system boot */ + do { + version = wall_clock->version; + rmb(); /* fetch version before time */ + now.tv_sec = wall_clock->sec; + now.tv_nsec = wall_clock->nsec; + rmb(); /* fetch time before checking version */ + } while ((wall_clock->version & 1) || (version != wall_clock->version)); + + delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ + delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; + + now.tv_nsec = do_div(delta, NSEC_PER_SEC); + now.tv_sec = delta; + + set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); +} diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 07c6d42ab5ff..f6be7d5f82f8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"), - DMI_MATCH(DMI_BOARD_NAME, "0WF810"), }, }, { /* Handle problems with rebooting on Dell Optiplex 745's DFF*/ diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 9615eee9b775..05191bbc68b8 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -4,6 +4,8 @@ #include <linux/acpi.h> #include <linux/bcd.h> #include <linux/mc146818rtc.h> +#include <linux/platform_device.h> +#include <linux/pnp.h> #include <asm/time.h> #include <asm/vsyscall.h> @@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void) } EXPORT_SYMBOL(native_read_tsc); + +static struct resource rtc_resources[] = { + [0] = { + .start = RTC_PORT(0), + .end = RTC_PORT(1), + .flags = IORESOURCE_IO, + }, + [1] = { + .start = RTC_IRQ, + .end = RTC_IRQ, + .flags = IORESOURCE_IRQ, + } +}; + +static struct platform_device rtc_device = { + .name = "rtc_cmos", + .id = -1, + .resource = rtc_resources, + .num_resources = ARRAY_SIZE(rtc_resources), +}; + +static __init int add_rtc_cmos(void) +{ +#ifdef CONFIG_PNP + if (!pnp_platform_devices) + platform_device_register(&rtc_device); +#else + platform_device_register(&rtc_device); +#endif /* CONFIG_PNP */ + return 0; +} +device_initcall(add_rtc_cmos); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c0c68c18a788..6f80b852a196 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -12,6 +12,7 @@ #include <asm/mpspec.h> #include <asm/apicdef.h> +#ifdef CONFIG_X86_LOCAL_APIC unsigned int num_processors; unsigned disabled_cpus __cpuinitdata; /* Processor that is doing the boot up */ @@ -23,8 +24,9 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; +#endif -#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP) +#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) /* * Copy data used in early init routines from the initial arrays to the * per cpu data areas. These arrays then become expendable and the diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 2283422af794..5a2f8e063887 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -127,7 +127,12 @@ static struct resource standard_io_resources[] = { { }, { .name = "keyboard", .start = 0x0060, - .end = 0x006f, + .end = 0x0060, + .flags = IORESOURCE_BUSY | IORESOURCE_IO +}, { + .name = "keyboard", + .start = 0x0064, + .end = 0x0064, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, { .name = "dma page reg", @@ -527,10 +532,16 @@ static void __init reserve_crashkernel(void) (unsigned long)(crash_size >> 20), (unsigned long)(crash_base >> 20), (unsigned long)(total_mem >> 20)); + + if (reserve_bootmem(crash_base, crash_size, + BOOTMEM_EXCLUSIVE) < 0) { + printk(KERN_INFO "crashkernel reservation " + "failed - memory is in use\n"); + return; + } + crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; - reserve_bootmem(crash_base, crash_size, - BOOTMEM_DEFAULT); } else printk(KERN_INFO "crashkernel reservation failed - " "you have to specify a base address\n"); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 22c14e21c97c..6dff1286ad8a 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -70,6 +70,7 @@ #include <asm/ds.h> #include <asm/topology.h> #include <asm/trampoline.h> +#include <asm/pat.h> #include <mach_apic.h> #ifdef CONFIG_PARAVIRT @@ -128,7 +129,9 @@ static struct resource standard_io_resources[] = { .flags = IORESOURCE_BUSY | IORESOURCE_IO }, { .name = "timer1", .start = 0x50, .end = 0x53, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, - { .name = "keyboard", .start = 0x60, .end = 0x6f, + { .name = "keyboard", .start = 0x60, .end = 0x60, + .flags = IORESOURCE_BUSY | IORESOURCE_IO }, + { .name = "keyboard", .start = 0x64, .end = 0x64, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, { .name = "dma page reg", .start = 0x80, .end = 0x8f, .flags = IORESOURCE_BUSY | IORESOURCE_IO }, @@ -948,7 +951,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) { if (c->x86 == 0x6 && c->x86_model >= 0xf) - set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); + set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); } static void __cpuinit init_centaur(struct cpuinfo_x86 *c) @@ -1063,25 +1066,19 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); - - clear_cpu_cap(c, X86_FEATURE_PAT); - switch (c->x86_vendor) { case X86_VENDOR_AMD: early_init_amd(c); - if (c->x86 >= 0xf && c->x86 <= 0x11) - set_cpu_cap(c, X86_FEATURE_PAT); break; case X86_VENDOR_INTEL: early_init_intel(c); - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) - set_cpu_cap(c, X86_FEATURE_PAT); break; case X86_VENDOR_CENTAUR: early_init_centaur(c); break; } + validate_pat_support(c); } /* diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 8f75893a6467..0cb7aadc87cd 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -231,7 +231,8 @@ native_smp_call_function_mask(cpumask_t mask, wmb(); /* Send a message to other CPUs */ - if (cpus_equal(mask, allbutself)) + if (cpus_equal(mask, allbutself) && + cpus_equal(cpu_online_map, cpu_callout_map)) send_IPI_allbutself(CALL_FUNCTION_VECTOR); else send_IPI_mask(mask, CALL_FUNCTION_VECTOR); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 84241a256dc8..3e1cecedde42 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -86,6 +86,7 @@ void *x86_bios_cpu_apicid_early_ptr; #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; +static int low_mappings; #endif /* State of each CPU */ @@ -299,7 +300,7 @@ static void __cpuinit smp_callin(void) /* * Activate a secondary processor. */ -void __cpuinit start_secondary(void *unused) +static void __cpuinit start_secondary(void *unused) { /* * Don't put *anything* before cpu_init(), SMP booting is too @@ -326,6 +327,12 @@ void __cpuinit start_secondary(void *unused) enable_8259A_irq(0); } +#ifdef CONFIG_X86_32 + while (low_mappings) + cpu_relax(); + __flush_tlb_all(); +#endif + /* This must be done before setting cpu_online_map */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -989,7 +996,6 @@ do_rest: #endif cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */ cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ - cpu_clear(cpu, cpu_possible_map); cpu_clear(cpu, cpu_present_map); per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID; } @@ -1040,14 +1046,20 @@ int __cpuinit native_cpu_up(unsigned int cpu) #ifdef CONFIG_X86_32 /* init low mem mapping */ clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); + min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); flush_tlb_all(); -#endif + low_mappings = 1; + + err = do_boot_cpu(apicid, cpu); + zap_low_mappings(); + low_mappings = 0; +#else err = do_boot_cpu(apicid, cpu); - if (err < 0) { +#endif + if (err) { Dprintk("do_boot_cpu failed %d\n", err); - return err; + return -EIO; } /* @@ -1177,6 +1189,7 @@ static void __init smp_cpu_index_default(void) */ void __init native_smp_prepare_cpus(unsigned int max_cpus) { + preempt_disable(); nmi_watchdog_default(); smp_cpu_index_default(); current_cpu_data = boot_cpu_data; @@ -1193,7 +1206,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); disable_smp(); - return; + goto out; } preempt_disable(); @@ -1233,6 +1246,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); +out: + preempt_enable(); } /* * Early setup to make printk work. @@ -1259,9 +1274,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus) setup_ioapic_dest(); #endif check_nmi_watchdog(); -#ifdef CONFIG_X86_32 - zap_low_mappings(); -#endif } #ifdef CONFIG_HOTPLUG_CPU @@ -1306,7 +1318,7 @@ static void remove_siblinginfo(int cpu) cpu_clear(cpu, cpu_sibling_setup_map); } -int additional_cpus __initdata = -1; +static int additional_cpus __initdata = -1; static __init int setup_additional_cpus(char *s) { diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index a86d26f036e1..d2ab52cc1d6b 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,23 +22,6 @@ #include <asm/uaccess.h> #include <asm/unistd.h> -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage int sys_pipe(unsigned long __user * fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index bd802a5e1aa3..3b360ef33817 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -17,23 +17,6 @@ #include <asm/uaccess.h> #include <asm/ia32.h> -/* - * sys_pipe() is the normal C calling standard for creating - * a pipe. It's not the way Unix traditionally does this, though. - */ -asmlinkage long sys_pipe(int __user *fildes) -{ - int fd[2]; - int error; - - error = do_pipe(fd); - if (!error) { - if (copy_to_user(fildes, fd, 2*sizeof(int))) - error = -EFAULT; - } - return error; -} - asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off) { diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index bde6f63e15d5..08d752de4eee 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -544,6 +544,7 @@ vm86_trap: #define DO_ERROR(trapnr, signr, str, name) \ void do_##name(struct pt_regs *regs, long error_code) \ { \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index e4790728b224..65b70637ad97 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -14,7 +14,10 @@ #include "mach_timer.h" -static int tsc_enabled; +/* native_sched_clock() is called before tsc_init(), so + we must start with the TSC soft disabled to prevent + erroneous rdtsc usage on !cpu_has_tsc processors */ +static int tsc_disabled = -1; /* * On some systems the TSC frequency does not @@ -28,8 +31,8 @@ EXPORT_SYMBOL_GPL(tsc_khz); static int __init tsc_setup(char *str) { printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " - "cannot disable TSC completely.\n"); - mark_tsc_unstable("user disabled TSC"); + "cannot disable TSC completely.\n"); + tsc_disabled = 1; return 1; } #else @@ -120,7 +123,7 @@ unsigned long long native_sched_clock(void) * very important for it to be as fast as the platform * can achive it. ) */ - if (unlikely(!tsc_enabled && !tsc_unstable)) + if (unlikely(tsc_disabled)) /* No locking but a rare wrong value is not a big deal: */ return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); @@ -322,7 +325,6 @@ void mark_tsc_unstable(char *reason) { if (!tsc_unstable) { tsc_unstable = 1; - tsc_enabled = 0; printk("Marking TSC unstable due to: %s.\n", reason); /* Can be called before registration */ if (clocksource_tsc.mult) @@ -336,7 +338,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) { printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", - d->ident); + d->ident); tsc_unstable = 1; return 0; } @@ -403,7 +405,7 @@ void __init tsc_init(void) { int cpu; - if (!cpu_has_tsc) + if (!cpu_has_tsc || tsc_disabled > 0) return; cpu_khz = calculate_cpu_khz(); @@ -414,6 +416,9 @@ void __init tsc_init(void) return; } + /* now allow native_sched_clock() to use rdtsc */ + tsc_disabled = 0; + printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); @@ -441,8 +446,6 @@ void __init tsc_init(void) if (check_tsc_unstable()) { clocksource_tsc.rating = 0; clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; - } else - tsc_enabled = 1; - + } clocksource_register(&clocksource_tsc); } diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index fcc16e58609e..1784b8077a12 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -227,14 +227,14 @@ void __init tsc_calibrate(void) /* hpet or pmtimer available ? */ if (!hpet && !pm1 && !pm2) { printk(KERN_INFO "TSC calibrated against PIT\n"); - return; + goto out; } /* Check, whether the sampling was disturbed by an SMI */ if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { printk(KERN_WARNING "TSC calibration disturbed by SMI, " "using PIT calibration result\n"); - return; + goto out; } tsc2 = (tsc2 - tsc1) * 1000000L; @@ -255,6 +255,7 @@ void __init tsc_calibrate(void) tsc_khz = tsc2 / tsc1; +out: for_each_possible_cpu(cpu) set_cyc2ns_scale(tsc_khz, cpu); } diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 58882f9f2637..f6c05d0410fb 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -2,6 +2,7 @@ All C exports should go in the respective C files. */ #include <linux/module.h> +#include <net/checksum.h> #include <linux/smp.h> #include <asm/processor.h> @@ -29,6 +30,8 @@ EXPORT_SYMBOL(__copy_from_user_inatomic); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(clear_page); +EXPORT_SYMBOL(csum_partial); + /* * Export string functions. We normally rely on gcc builtin for most of these, * but gcc sometimes decides not to inline them. |