summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/.gitignore1
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c16
-rw-r--r--arch/x86/kernel/acpi/realmode/Makefile5
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S38
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.h5
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S10
-rw-r--r--arch/x86/kernel/acpi/sleep.c16
-rw-r--r--arch/x86/kernel/apic_64.c2
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c21
-rw-r--r--arch/x86/kernel/cpu/common.c27
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longrun.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c15
-rw-r--r--arch/x86/kernel/efi_32.c8
-rw-r--r--arch/x86/kernel/entry_32.S1
-rw-r--r--arch/x86/kernel/geode_32.c22
-rw-r--r--arch/x86/kernel/head_32.S2
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/i387.c44
-rw-r--r--arch/x86/kernel/init_task.c1
-rw-r--r--arch/x86/kernel/io_apic_32.c12
-rw-r--r--arch/x86/kernel/kvmclock.c97
-rw-r--r--arch/x86/kernel/mfgpt_32.c2
-rw-r--r--arch/x86/kernel/mpparse.c7
-rw-r--r--arch/x86/kernel/nmi_32.c9
-rw-r--r--arch/x86/kernel/pci-dma.c20
-rw-r--r--arch/x86/kernel/pci-gart_64.c31
-rw-r--r--arch/x86/kernel/process.c36
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c6
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/pvclock.c141
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/rtc.c34
-rw-r--r--arch/x86/kernel/setup.c4
-rw-r--r--arch/x86/kernel/setup_32.c17
-rw-r--r--arch/x86/kernel/setup_64.c15
-rw-r--r--arch/x86/kernel/smp.c3
-rw-r--r--arch/x86/kernel/smpboot.c34
-rw-r--r--arch/x86/kernel/sys_i386_32.c17
-rw-r--r--arch/x86/kernel/sys_x86_64.c17
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/tsc_32.c23
-rw-r--r--arch/x86/kernel/tsc_64.c5
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c3
46 files changed, 562 insertions, 231 deletions
diff --git a/arch/x86/kernel/.gitignore b/arch/x86/kernel/.gitignore
index 4ea38a39aed4..08f4fd731469 100644
--- a/arch/x86/kernel/.gitignore
+++ b/arch/x86/kernel/.gitignore
@@ -1,2 +1,3 @@
vsyscall.lds
vsyscall_32.lds
+vmlinux.lds
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bbdacb398d48..77807d4769c9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,10 +82,9 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
+obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
-ifdef CONFIG_INPUT_PCSPKR
-obj-y += pcspeaker.o
-endif
+obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
obj-$(CONFIG_SCx200) += scx200.o
scx200-y += scx200_32.o
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 7335959b6aff..fd5ca97a2ad5 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -10,5 +10,5 @@ endif
$(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin
$(obj)/realmode/wakeup.bin: FORCE
- $(Q)$(MAKE) $(build)=$(obj)/realmode $@
+ $(Q)$(MAKE) $(build)=$(obj)/realmode
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c49ebcc6c41e..33c5216fd3e1 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -242,12 +242,19 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
static void __cpuinit acpi_register_lapic(int id, u8 enabled)
{
+ unsigned int ver = 0;
+
if (!enabled) {
++disabled_cpus;
return;
}
- generic_processor_info(id, 0);
+#ifdef CONFIG_X86_32
+ if (boot_cpu_physical_apicid != -1U)
+ ver = apic_version[boot_cpu_physical_apicid];
+#endif
+
+ generic_processor_info(id, ver);
}
static int __init
@@ -767,8 +774,13 @@ static void __init acpi_register_lapic_address(unsigned long address)
mp_lapic_addr = address;
set_fixmap_nocache(FIX_APIC_BASE, address);
- if (boot_cpu_physical_apicid == -1U)
+ if (boot_cpu_physical_apicid == -1U) {
boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+#ifdef CONFIG_X86_32
+ apic_version[boot_cpu_physical_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+#endif
+ }
}
static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
index 092900854acc..1c31cc0e9def 100644
--- a/arch/x86/kernel/acpi/realmode/Makefile
+++ b/arch/x86/kernel/acpi/realmode/Makefile
@@ -6,7 +6,8 @@
# for more details.
#
-targets := wakeup.bin wakeup.elf
+always := wakeup.bin
+targets := wakeup.elf wakeup.lds
wakeup-y += wakeup.o wakemain.o video-mode.o copy.o
@@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf := -T
CPPFLAGS_wakeup.lds += -P -C
-$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE
+$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE
$(call if_changed,ld)
OBJCOPYFLAGS_wakeup.bin := -O binary
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index f9b77fb37e5b..3355973b12ac 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -5,6 +5,7 @@
#include <asm/msr-index.h>
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/processor-flags.h>
.code16
.section ".header", "a"
@@ -24,6 +25,11 @@ pmode_gdt: .quad 0
realmode_flags: .long 0
real_magic: .long 0
trampoline_segment: .word 0
+_pad1: .byte 0
+wakeup_jmp: .byte 0xea /* ljmpw */
+wakeup_jmp_off: .word 3f
+wakeup_jmp_seg: .word 0
+wakeup_gdt: .quad 0, 0, 0
signature: .long 0x51ee1111
.text
@@ -34,11 +40,34 @@ _start:
cli
cld
+ /* Apparently some dimwit BIOS programmers don't know how to
+ program a PM to RM transition, and we might end up here with
+ junk in the data segment descriptor registers. The only way
+ to repair that is to go into PM and fix it ourselves... */
+ movw $16, %cx
+ lgdtl %cs:wakeup_gdt
+ movl %cr0, %eax
+ orb $X86_CR0_PE, %al
+ movl %eax, %cr0
+ jmp 1f
+1: ljmpw $8, $2f
+2:
+ movw %cx, %ds
+ movw %cx, %es
+ movw %cx, %ss
+ movw %cx, %fs
+ movw %cx, %gs
+
+ andb $~X86_CR0_PE, %al
+ movl %eax, %cr0
+ jmp wakeup_jmp
+3:
/* Set up segments */
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
+ lidtl wakeup_idt
movl $wakeup_stack_end, %esp
@@ -98,7 +127,14 @@ bogus_real_magic:
jmp 1b
.data
- .balign 4
+ .balign 8
+
+ /* This is the standard real-mode IDT */
+wakeup_idt:
+ .word 0xffff /* limit */
+ .long 0 /* address */
+ .word 0
+
.globl HEAP, heap_end
HEAP:
.long wakeup_heap
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.h b/arch/x86/kernel/acpi/realmode/wakeup.h
index ef8166fe8020..69d38d0b2b64 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.h
+++ b/arch/x86/kernel/acpi/realmode/wakeup.h
@@ -24,6 +24,11 @@ struct wakeup_header {
u32 realmode_flags;
u32 real_magic;
u16 trampoline_segment; /* segment with trampoline code, 64-bit only */
+ u8 _pad1;
+ u8 wakeup_jmp;
+ u16 wakeup_jmp_off;
+ u16 wakeup_jmp_seg;
+ u64 wakeup_gdt[3];
u32 signature; /* To check we have correct structure */
} __attribute__((__packed__));
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 22fab6c4be15..7da00b799cda 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -12,11 +12,6 @@ ENTRY(_start)
SECTIONS
{
- . = HEADER_OFFSET;
- .header : {
- *(.header)
- }
-
. = 0;
.text : {
*(.text*)
@@ -50,6 +45,11 @@ SECTIONS
__bss_end = .;
}
+ . = HEADER_OFFSET;
+ .header : {
+ *(.header)
+ }
+
. = ALIGN(16);
_end = .;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index afc25ee9964b..36af01f029ed 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -50,6 +50,20 @@ int acpi_save_state_mem(void)
header->video_mode = saved_video_mode;
+ header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
+ /* GDT[0]: GDT self-pointer */
+ header->wakeup_gdt[0] =
+ (u64)(sizeof(header->wakeup_gdt) - 1) +
+ ((u64)(acpi_wakeup_address +
+ ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
+ << 16);
+ /* GDT[1]: real-mode-like code segment */
+ header->wakeup_gdt[1] = (0x009bULL << 40) +
+ ((u64)acpi_wakeup_address << 16) + 0xffff;
+ /* GDT[2]: real-mode-like data segment */
+ header->wakeup_gdt[2] = (0x0093ULL << 40) +
+ ((u64)acpi_wakeup_address << 16) + 0xffff;
+
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -111,7 +125,7 @@ void __init acpi_reserve_bootmem(void)
return;
}
- acpi_wakeup_address = acpi_realmode;
+ acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
}
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 5910020c3f24..0633cfd0dc29 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -534,7 +534,7 @@ int setup_profiling_timer(unsigned int multiplier)
*/
void clear_local_APIC(void)
{
- int maxlvt = lapic_get_maxlvt();
+ int maxlvt;
u32 v;
/* APIC hasn't been mapped yet */
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 238468ae1993..c2e1ce33c7cb 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -6,6 +6,7 @@
#include <linux/cpu.h>
+#include <asm/pat.h>
#include <asm/processor.h>
struct cpuid_bit {
@@ -48,3 +49,23 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
set_cpu_cap(c, cb->feature);
}
}
+
+#ifdef CONFIG_X86_PAT
+void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
+{
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ if (c->x86 >= 0xf && c->x86 <= 0x11)
+ return;
+ break;
+ case X86_VENDOR_INTEL:
+ if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
+ return;
+ break;
+ }
+
+ pat_disable(cpu_has_pat ?
+ "PAT disabled. Not yet verified on this CPU type." :
+ "PAT not supported by CPU.");
+}
+#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 35b4f6a9c8ef..d0463a946247 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -12,6 +12,7 @@
#include <asm/mmu_context.h>
#include <asm/mtrr.h>
#include <asm/mce.h>
+#include <asm/pat.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/mpspec.h>
#include <asm/apic.h>
@@ -308,19 +309,6 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
}
- clear_cpu_cap(c, X86_FEATURE_PAT);
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- if (c->x86 >= 0xf && c->x86 <= 0x11)
- set_cpu_cap(c, X86_FEATURE_PAT);
- break;
- case X86_VENDOR_INTEL:
- if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
- set_cpu_cap(c, X86_FEATURE_PAT);
- break;
- }
-
}
/*
@@ -409,18 +397,6 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
init_scattered_cpuid_features(c);
}
- clear_cpu_cap(c, X86_FEATURE_PAT);
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- if (c->x86 >= 0xf && c->x86 <= 0x11)
- set_cpu_cap(c, X86_FEATURE_PAT);
- break;
- case X86_VENDOR_INTEL:
- if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
- set_cpu_cap(c, X86_FEATURE_PAT);
- break;
- }
}
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
@@ -651,6 +627,7 @@ void __init early_cpu_init(void)
cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
early_cpu_detect();
+ validate_pat_support(&boot_cpu_data);
}
/* Make sure %fs is initialized properly in idle threads */
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c
index af4a867a097c..777a7ff075de 100644
--- a/arch/x86/kernel/cpu/cpufreq/longrun.c
+++ b/arch/x86/kernel/cpu/cpufreq/longrun.c
@@ -245,7 +245,7 @@ static unsigned int __init longrun_determine_freqs(unsigned int *low_freq,
if ((ecx > 95) || (ecx == 0) || (eax < ebx))
return -EIO;
- edx = (eax - ebx) / (100 - ecx);
+ edx = ((eax - ebx) * 100) / (100 - ecx);
*low_freq = edx * 1000; /* back to kHz */
dprintk("low frequency is %u kHz\n", *low_freq);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 46d4034d9f37..206791eb46e3 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1127,12 +1127,23 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
* an UP version, and is deprecated by AMD.
*/
if (num_online_cpus() != 1) {
- printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
+#ifndef CONFIG_ACPI_PROCESSOR
+ printk(KERN_ERR PFX "ACPI Processor support is required "
+ "for SMP systems but is absent. Please load the "
+ "ACPI Processor module before starting this "
+ "driver.\n");
+#else
+ printk(KERN_ERR PFX "Your BIOS does not provide ACPI "
+ "_PSS objects in a way that Linux understands. "
+ "Please report this to the Linux ACPI maintainers"
+ " and complain to your BIOS vendor.\n");
+#endif
kfree(data);
return -ENODEV;
}
if (pol->cpu != 0) {
- printk(KERN_ERR PFX "No _PSS objects for CPU other than CPU0\n");
+ printk(KERN_ERR PFX "No ACPI _PSS objects for CPU other than "
+ "CPU0. Complain to your BIOS vendor.\n");
kfree(data);
return -ENODEV;
}
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c
index 5d23d85624d4..4b63c8e1f13b 100644
--- a/arch/x86/kernel/efi_32.c
+++ b/arch/x86/kernel/efi_32.c
@@ -49,13 +49,13 @@ void efi_call_phys_prelog(void)
local_irq_save(efi_rt_eflags);
/*
- * If I don't have PSE, I should just duplicate two entries in page
- * directory. If I have PSE, I just need to duplicate one entry in
+ * If I don't have PAE, I should just duplicate two entries in page
+ * directory. If I have PAE, I just need to duplicate one entry in
* page directory.
*/
cr4 = read_cr4();
- if (cr4 & X86_CR4_PSE) {
+ if (cr4 & X86_CR4_PAE) {
efi_bak_pg_dir_pointer[0].pgd =
swapper_pg_dir[pgd_index(0)].pgd;
swapper_pg_dir[0].pgd =
@@ -93,7 +93,7 @@ void efi_call_phys_epilog(void)
cr4 = read_cr4();
- if (cr4 & X86_CR4_PSE) {
+ if (cr4 & X86_CR4_PAE) {
swapper_pg_dir[pgd_index(0)].pgd =
efi_bak_pg_dir_pointer[0].pgd;
} else {
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 2a609dc3271c..c778e4fa55a2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -248,6 +248,7 @@ ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
# setting need_resched or sigpending
# between sampling and the iret
+ TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
# int/exception return?
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index 9dad6ca6cd70..9b08e852fd1a 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -161,6 +161,28 @@ void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
}
EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
+int geode_has_vsa2(void)
+{
+ static int has_vsa2 = -1;
+
+ if (has_vsa2 == -1) {
+ u16 val;
+
+ /*
+ * The VSA has virtual registers that we can query for a
+ * signature.
+ */
+ outw(VSA_VR_UNLOCK, VSA_VRC_INDEX);
+ outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX);
+
+ val = inw(VSA_VRC_DATA);
+ has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG);
+ }
+
+ return has_vsa2;
+}
+EXPORT_SYMBOL_GPL(geode_has_vsa2);
+
static int __init geode_southbridge_init(void)
{
if (!is_geode())
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b2cc73768a9d..f7357cc0162c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -189,7 +189,7 @@ default_entry:
* this stage.
*/
-#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
+#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */
xorl %ebx,%ebx /* %ebx is kept at zero */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 10a1955bb1d1..b817974ef942 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -128,7 +128,7 @@ ident_complete:
/* Fixup phys_base */
addq %rbp, phys_base(%rip)
-#ifdef CONFIG_SMP
+#ifdef CONFIG_X86_TRAMPOLINE
addq %rbp, trampoline_level4_pgt + 0(%rip)
addq %rbp, trampoline_level4_pgt + (511*8)(%rip)
#endif
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index db6839b53195..95e80e5033c3 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -56,6 +56,11 @@ void __cpuinit mxcsr_feature_mask_init(void)
void __init init_thread_xstate(void)
{
+ if (!HAVE_HWFP) {
+ xstate_size = sizeof(struct i387_soft_struct);
+ return;
+ }
+
if (cpu_has_fxsr)
xstate_size = sizeof(struct i387_fxsave_struct);
#ifdef CONFIG_X86_32
@@ -94,7 +99,7 @@ void __cpuinit fpu_init(void)
int init_fpu(struct task_struct *tsk)
{
if (tsk_used_math(tsk)) {
- if (tsk == current)
+ if (HAVE_HWFP && tsk == current)
unlazy_fpu(tsk);
return 0;
}
@@ -109,6 +114,15 @@ int init_fpu(struct task_struct *tsk)
return -ENOMEM;
}
+#ifdef CONFIG_X86_32
+ if (!HAVE_HWFP) {
+ memset(tsk->thread.xstate, 0, xstate_size);
+ finit();
+ set_stopped_child_used_math(tsk);
+ return 0;
+ }
+#endif
+
if (cpu_has_fxsr) {
struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
@@ -148,7 +162,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
int ret;
if (!cpu_has_fxsr)
- return -ENODEV;
+ return -EIO;
ret = init_fpu(target);
if (ret)
@@ -165,7 +179,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
int ret;
if (!cpu_has_fxsr)
- return -ENODEV;
+ return -EIO;
ret = init_fpu(target);
if (ret)
@@ -330,13 +344,13 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
struct user_i387_ia32_struct env;
int ret;
- if (!HAVE_HWFP)
- return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
-
ret = init_fpu(target);
if (ret)
return ret;
+ if (!HAVE_HWFP)
+ return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
+
if (!cpu_has_fxsr) {
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.xstate->fsave, 0,
@@ -360,15 +374,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
struct user_i387_ia32_struct env;
int ret;
- if (!HAVE_HWFP)
- return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
-
ret = init_fpu(target);
if (ret)
return ret;
set_stopped_child_used_math(target);
+ if (!HAVE_HWFP)
+ return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
+
if (!cpu_has_fxsr) {
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.xstate->fsave, 0, -1);
@@ -450,7 +464,6 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
{
struct task_struct *tsk = current;
- clear_fpu(tsk);
return __copy_from_user(&tsk->thread.xstate->fsave, buf,
sizeof(struct i387_fsave_struct));
}
@@ -461,7 +474,6 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
struct user_i387_ia32_struct env;
int err;
- clear_fpu(tsk);
err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
sizeof(struct i387_fxsave_struct));
/* mxcsr reserved bits must be masked to zero for security reasons */
@@ -476,6 +488,16 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
{
int err;
+ struct task_struct *tsk = current;
+
+ if (HAVE_HWFP)
+ clear_fpu(tsk);
+
+ if (!used_math()) {
+ err = init_fpu(tsk);
+ if (err)
+ return err;
+ }
if (HAVE_HWFP) {
if (cpu_has_fxsr)
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 3d01e47777db..a4f93b4120c1 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -11,7 +11,6 @@
#include <asm/desc.h>
static struct fs_struct init_fs = INIT_FS;
-static struct files_struct init_files = INIT_FILES;
static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index a40d54fc1fdd..4dc8600d9d20 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2130,14 +2130,10 @@ static inline void __init check_timer(void)
{
int apic1, pin1, apic2, pin2;
int vector;
- unsigned int ver;
unsigned long flags;
local_irq_save(flags);
- ver = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(ver);
-
/*
* get/set the timer IRQ vector:
*/
@@ -2150,15 +2146,11 @@ static inline void __init check_timer(void)
* mode for the 8259A whenever interrupts are routed
* through I/O APICs. Also IRQ0 has to be enabled in
* the 8259A which implies the virtual wire has to be
- * disabled in the local APIC. Finally timer interrupts
- * need to be acknowledged manually in the 8259A for
- * timer_interrupt() and for the i82489DX when using
- * the NMI watchdog.
+ * disabled in the local APIC.
*/
apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
init_8259A(1);
- timer_ack = !cpu_has_tsc;
- timer_ack |= (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+ timer_ack = 1;
if (timer_over_8254 > 0)
enable_8259A_irq(0);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ddee04043aeb..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
#include <linux/clocksource.h>
#include <linux/kvm_para.h>
+#include <asm/pvclock.h>
#include <asm/arch_hooks.h>
#include <asm/msr.h>
#include <asm/apic.h>
@@ -36,83 +37,47 @@ static int parse_no_kvmclock(char *arg)
early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
-#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_wall_clock wall_clock;
-static inline u64 kvm_get_delta(u64 last_tsc)
-{
- int cpu = smp_processor_id();
- u64 delta = native_read_tsc() - last_tsc;
- return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
-}
-
-static struct kvm_wall_clock wall_clock;
-static cycle_t kvm_clock_read(void);
/*
* The wallclock is the time of day when we booted. Since then, some time may
* have elapsed since the hypervisor wrote the data. So we try to account for
* that with system time
*/
-unsigned long kvm_get_wallclock(void)
+static unsigned long kvm_get_wallclock(void)
{
- u32 wc_sec, wc_nsec;
- u64 delta;
+ struct pvclock_vcpu_time_info *vcpu_time;
struct timespec ts;
- int version, nsec;
int low, high;
low = (int)__pa(&wall_clock);
high = ((u64)__pa(&wall_clock) >> 32);
+ native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
- delta = kvm_clock_read();
+ vcpu_time = &get_cpu_var(hv_clock);
+ pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
+ put_cpu_var(hv_clock);
- native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
- do {
- version = wall_clock.wc_version;
- rmb();
- wc_sec = wall_clock.wc_sec;
- wc_nsec = wall_clock.wc_nsec;
- rmb();
- } while ((wall_clock.wc_version != version) || (version & 1));
-
- delta = kvm_clock_read() - delta;
- delta += wc_nsec;
- nsec = do_div(delta, NSEC_PER_SEC);
- set_normalized_timespec(&ts, wc_sec + delta, nsec);
- /*
- * Of all mechanisms of time adjustment I've tested, this one
- * was the champion!
- */
- return ts.tv_sec + 1;
+ return ts.tv_sec;
}
-int kvm_set_wallclock(unsigned long now)
+static int kvm_set_wallclock(unsigned long now)
{
- return 0;
+ return -1;
}
-/*
- * This is our read_clock function. The host puts an tsc timestamp each time
- * it updates a new time. Without the tsc adjustment, we can have a situation
- * in which a vcpu starts to run earlier (smaller system_time), but probes
- * time later (compared to another vcpu), leading to backwards time
- */
static cycle_t kvm_clock_read(void)
{
- u64 last_tsc, now;
- int cpu;
-
- preempt_disable();
- cpu = smp_processor_id();
+ struct pvclock_vcpu_time_info *src;
+ cycle_t ret;
- last_tsc = get_clock(cpu, tsc_timestamp);
- now = get_clock(cpu, system_time);
-
- now += kvm_get_delta(last_tsc);
- preempt_enable();
-
- return now;
+ src = &get_cpu_var(hv_clock);
+ ret = pvclock_clocksource_read(src);
+ put_cpu_var(hv_clock);
+ return ret;
}
+
static struct clocksource kvm_clock = {
.name = "kvm-clock",
.read = kvm_clock_read,
@@ -123,26 +88,37 @@ static struct clocksource kvm_clock = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static int kvm_register_clock(void)
+static int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high;
low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
-
+ printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
+ cpu, high, low, txt);
return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
}
+#ifdef CONFIG_X86_LOCAL_APIC
static void kvm_setup_secondary_clock(void)
{
/*
* Now that the first cpu already had this clocksource initialized,
* we shouldn't fail.
*/
- WARN_ON(kvm_register_clock());
+ WARN_ON(kvm_register_clock("secondary cpu clock"));
/* ok, done with our trickery, call native */
setup_secondary_APIC_clock();
}
+#endif
+
+#ifdef CONFIG_SMP
+void __init kvm_smp_prepare_boot_cpu(void)
+{
+ WARN_ON(kvm_register_clock("primary cpu clock"));
+ native_smp_prepare_boot_cpu();
+}
+#endif
/*
* After the clock is registered, the host will keep writing to the
@@ -172,12 +148,17 @@ void __init kvmclock_init(void)
return;
if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
- if (kvm_register_clock())
+ if (kvm_register_clock("boot clock"))
return;
pv_time_ops.get_wallclock = kvm_get_wallclock;
pv_time_ops.set_wallclock = kvm_set_wallclock;
pv_time_ops.sched_clock = kvm_clock_read;
+#ifdef CONFIG_X86_LOCAL_APIC
pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+#endif
+#ifdef CONFIG_SMP
+ smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
+#endif
machine_ops.shutdown = kvm_shutdown;
#ifdef CONFIG_KEXEC
machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 3cad17fe026b..07c0f828f488 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -155,6 +155,7 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
wrmsr(msr, value, dummy);
return 0;
}
+EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event);
int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
{
@@ -222,6 +223,7 @@ int geode_mfgpt_alloc_timer(int timer, int domain)
/* No timers available - too bad */
return -1;
}
+EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer);
#ifdef CONFIG_GEODE_MFGPT_TIMER
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 3e2c54dc8b29..404683b94e79 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -794,6 +794,11 @@ void __init find_smp_config(void)
ACPI-based MP Configuration
-------------------------------------------------------------------------- */
+/*
+ * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
+ */
+int es7000_plat;
+
#ifdef CONFIG_ACPI
#ifdef CONFIG_X86_IO_APIC
@@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
MP_intsrc_info(&intsrc);
}
-int es7000_plat;
-
void __init mp_config_acpi_legacy_irqs(void)
{
struct mpc_config_intsrc intsrc;
diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c
index 11b14bbaa61e..84160f74eeb0 100644
--- a/arch/x86/kernel/nmi_32.c
+++ b/arch/x86/kernel/nmi_32.c
@@ -26,7 +26,6 @@
#include <asm/smp.h>
#include <asm/nmi.h>
-#include <asm/timer.h>
#include "mach_traps.h"
@@ -82,7 +81,7 @@ int __init check_nmi_watchdog(void)
prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
if (!prev_nmi_count)
- goto error;
+ return -1;
printk(KERN_INFO "Testing NMI watchdog ... ");
@@ -119,7 +118,7 @@ int __init check_nmi_watchdog(void)
if (!atomic_read(&nmi_active)) {
kfree(prev_nmi_count);
atomic_set(&nmi_active, -1);
- goto error;
+ return -1;
}
printk("OK.\n");
@@ -130,10 +129,6 @@ int __init check_nmi_watchdog(void)
kfree(prev_nmi_count);
return 0;
-error:
- timer_ack = !cpu_has_tsc;
-
- return -1;
}
static int __init setup_nmi_watchdog(char *str)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 0c37f16b6950..dc00a1331ace 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -378,6 +378,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
struct page *page;
unsigned long dma_mask = 0;
dma_addr_t bus;
+ int noretry = 0;
/* ignore region specifiers */
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
@@ -385,30 +386,37 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
return memory;
- if (!dev)
+ if (!dev) {
dev = &fallback_dev;
+ gfp |= GFP_DMA;
+ }
dma_mask = dev->coherent_dma_mask;
if (dma_mask == 0)
- dma_mask = DMA_32BIT_MASK;
+ dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
/* Device not DMA able */
if (dev->dma_mask == NULL)
return NULL;
- /* Don't invoke OOM killer */
- gfp |= __GFP_NORETRY;
+ /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
+ if (gfp & __GFP_DMA)
+ noretry = 1;
#ifdef CONFIG_X86_64
/* Why <=? Even when the mask is smaller than 4GB it is often
larger than 16MB and in this case we have a chance of
finding fitting memory in the next higher zone first. If
not retry with true GFP_DMA. -AK */
- if (dma_mask <= DMA_32BIT_MASK)
+ if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
gfp |= GFP_DMA32;
+ if (dma_mask < DMA_32BIT_MASK)
+ noretry = 1;
+ }
#endif
again:
- page = dma_alloc_pages(dev, gfp, get_order(size));
+ page = dma_alloc_pages(dev,
+ noretry ? gfp | __GFP_NORETRY : gfp, get_order(size));
if (page == NULL)
return NULL;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c07455d1695f..aa8ec928caa8 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -26,6 +26,7 @@
#include <linux/kdebug.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
+#include <linux/sysdev.h>
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h>
@@ -548,6 +549,28 @@ static __init unsigned read_aperture(struct pci_dev *dev, u32 *size)
return aper_base;
}
+static int gart_resume(struct sys_device *dev)
+{
+ return 0;
+}
+
+static int gart_suspend(struct sys_device *dev, pm_message_t state)
+{
+ return -EINVAL;
+}
+
+static struct sysdev_class gart_sysdev_class = {
+ .name = "gart",
+ .suspend = gart_suspend,
+ .resume = gart_resume,
+
+};
+
+static struct sys_device device_gart = {
+ .id = 0,
+ .cls = &gart_sysdev_class,
+};
+
/*
* Private Northbridge GATT initialization in case we cannot use the
* AGP driver for some reason.
@@ -558,7 +581,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
unsigned aper_base, new_aper_base;
struct pci_dev *dev;
void *gatt;
- int i;
+ int i, error;
printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
aper_size = aper_base = info->aper_size = 0;
@@ -606,6 +629,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
pci_write_config_dword(dev, 0x90, ctl);
}
+
+ error = sysdev_class_register(&gart_sysdev_class);
+ if (!error)
+ error = sysdev_register(&device_gart);
+ if (error)
+ panic("Could not register gart_sysdev -- would corrupt data on next suspend");
flush_gart();
printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 67e9b4a1e89d..ba370dc8685b 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -99,15 +99,6 @@ static void mwait_idle(void)
local_irq_enable();
}
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
- if (force_mwait)
- return 1;
- /* Any C1 states supported? */
- return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
/*
* On SMP it's slightly faster (but much more power-consuming!)
* to poll the ->work.need_resched flag instead of waiting for the
@@ -119,6 +110,33 @@ static void poll_idle(void)
cpu_relax();
}
+/*
+ * mwait selection logic:
+ *
+ * It depends on the CPU. For AMD CPUs that support MWAIT this is
+ * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
+ * then depend on a clock divisor and current Pstate of the core. If
+ * all cores of a processor are in halt state (C1) the processor can
+ * enter the C1E (C1 enhanced) state. If mwait is used this will never
+ * happen.
+ *
+ * idle=mwait overrides this decision and forces the usage of mwait.
+ */
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+ if (force_mwait)
+ return 1;
+
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ switch(c->x86) {
+ case 0x10:
+ case 0x11:
+ return 0;
+ }
+ }
+ return 1;
+}
+
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
{
static int selected;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f8476dfbb60d..e2db9ac5c61c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -333,6 +333,7 @@ void flush_thread(void)
/*
* Forget coprocessor state..
*/
+ tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
@@ -649,8 +650,11 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
+ *
+ * tsk_used_math() checks prevent calling math_state_restore(),
+ * which can sleep in the case of !tsk_used_math()
*/
- if (next_p->fpu_counter > 5)
+ if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
/*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index e2319f39988b..c6eb5c91e5f6 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -294,6 +294,7 @@ void flush_thread(void)
/*
* Forget coprocessor state..
*/
+ tsk->fpu_counter = 0;
clear_fpu(tsk);
clear_used_math();
}
@@ -658,8 +659,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
* chances of needing FPU soon are obviously high now
+ *
+ * tsk_used_math() checks prevent calling math_state_restore(),
+ * which can sleep in the case of !tsk_used_math()
*/
- if (next_p->fpu_counter>5)
+ if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
math_state_restore();
return prev_p;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index fb03ef380f0e..a7835f282936 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1303,6 +1303,9 @@ static const struct user_regset_view user_x86_64_view = {
#define genregs32_get genregs_get
#define genregs32_set genregs_set
+#define user_i387_ia32_struct user_i387_struct
+#define user32_fxsr_struct user_fxsr_struct
+
#endif /* CONFIG_X86_64 */
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
@@ -1315,13 +1318,13 @@ static const struct user_regset x86_32_regsets[] = {
},
[REGSET_FP] = {
.core_note_type = NT_PRFPREG,
- .n = sizeof(struct user_i387_struct) / sizeof(u32),
+ .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = fpregs_active, .get = fpregs_get, .set = fpregs_set
},
[REGSET_XFP] = {
.core_note_type = NT_PRXFPREG,
- .n = sizeof(struct user_i387_struct) / sizeof(u32),
+ .n = sizeof(struct user32_fxsr_struct) / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
},
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
+/* paravirtual clock -- common code used by kvm/xen
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <asm/pvclock.h>
+
+/*
+ * These are perodically updated
+ * xen: magic shared_info page
+ * kvm: gpa registered via msr
+ * and then copied here.
+ */
+struct pvclock_shadow_time {
+ u64 tsc_timestamp; /* TSC at last update of time vals. */
+ u64 system_timestamp; /* Time, in nanosecs, since boot. */
+ u32 tsc_to_nsec_mul;
+ int tsc_shift;
+ u32 version;
+};
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+ u64 product;
+#ifdef __i386__
+ u32 tmp1, tmp2;
+#endif
+
+ if (shift < 0)
+ delta >>= -shift;
+ else
+ delta <<= shift;
+
+#ifdef __i386__
+ __asm__ (
+ "mul %5 ; "
+ "mov %4,%%eax ; "
+ "mov %%edx,%4 ; "
+ "mul %5 ; "
+ "xor %5,%5 ; "
+ "add %4,%%eax ; "
+ "adc %5,%%edx ; "
+ : "=A" (product), "=r" (tmp1), "=r" (tmp2)
+ : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif __x86_64__
+ __asm__ (
+ "mul %%rdx ; shrd $32,%%rdx,%%rax"
+ : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+ return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+ u64 delta = native_read_tsc() - shadow->tsc_timestamp;
+ return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+ struct pvclock_vcpu_time_info *src)
+{
+ do {
+ dst->version = src->version;
+ rmb(); /* fetch version before data */
+ dst->tsc_timestamp = src->tsc_timestamp;
+ dst->system_timestamp = src->system_time;
+ dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
+ dst->tsc_shift = src->tsc_shift;
+ rmb(); /* test version after fetching data */
+ } while ((src->version & 1) || (dst->version != src->version));
+
+ return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+ struct pvclock_shadow_time shadow;
+ unsigned version;
+ cycle_t ret, offset;
+
+ do {
+ version = pvclock_get_time_values(&shadow, src);
+ barrier();
+ offset = pvclock_get_nsec_offset(&shadow);
+ ret = shadow.system_timestamp + offset;
+ barrier();
+ } while (version != src->version);
+
+ return ret;
+}
+
+void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
+ struct pvclock_vcpu_time_info *vcpu_time,
+ struct timespec *ts)
+{
+ u32 version;
+ u64 delta;
+ struct timespec now;
+
+ /* get wallclock at system boot */
+ do {
+ version = wall_clock->version;
+ rmb(); /* fetch version before time */
+ now.tv_sec = wall_clock->sec;
+ now.tv_nsec = wall_clock->nsec;
+ rmb(); /* fetch time before checking version */
+ } while ((wall_clock->version & 1) || (version != wall_clock->version));
+
+ delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
+ delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
+
+ now.tv_nsec = do_div(delta, NSEC_PER_SEC);
+ now.tv_sec = delta;
+
+ set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
+}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 07c6d42ab5ff..f6be7d5f82f8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
- DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
},
},
{ /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 9615eee9b775..05191bbc68b8 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -4,6 +4,8 @@
#include <linux/acpi.h>
#include <linux/bcd.h>
#include <linux/mc146818rtc.h>
+#include <linux/platform_device.h>
+#include <linux/pnp.h>
#include <asm/time.h>
#include <asm/vsyscall.h>
@@ -197,3 +199,35 @@ unsigned long long native_read_tsc(void)
}
EXPORT_SYMBOL(native_read_tsc);
+
+static struct resource rtc_resources[] = {
+ [0] = {
+ .start = RTC_PORT(0),
+ .end = RTC_PORT(1),
+ .flags = IORESOURCE_IO,
+ },
+ [1] = {
+ .start = RTC_IRQ,
+ .end = RTC_IRQ,
+ .flags = IORESOURCE_IRQ,
+ }
+};
+
+static struct platform_device rtc_device = {
+ .name = "rtc_cmos",
+ .id = -1,
+ .resource = rtc_resources,
+ .num_resources = ARRAY_SIZE(rtc_resources),
+};
+
+static __init int add_rtc_cmos(void)
+{
+#ifdef CONFIG_PNP
+ if (!pnp_platform_devices)
+ platform_device_register(&rtc_device);
+#else
+ platform_device_register(&rtc_device);
+#endif /* CONFIG_PNP */
+ return 0;
+}
+device_initcall(add_rtc_cmos);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c0c68c18a788..6f80b852a196 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -12,6 +12,7 @@
#include <asm/mpspec.h>
#include <asm/apicdef.h>
+#ifdef CONFIG_X86_LOCAL_APIC
unsigned int num_processors;
unsigned disabled_cpus __cpuinitdata;
/* Processor that is doing the boot up */
@@ -23,8 +24,9 @@ EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
+#endif
-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_SMP)
+#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
/*
* Copy data used in early init routines from the initial arrays to the
* per cpu data areas. These arrays then become expendable and the
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 2283422af794..5a2f8e063887 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -127,7 +127,12 @@ static struct resource standard_io_resources[] = { {
}, {
.name = "keyboard",
.start = 0x0060,
- .end = 0x006f,
+ .end = 0x0060,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
+}, {
+ .name = "keyboard",
+ .start = 0x0064,
+ .end = 0x0064,
.flags = IORESOURCE_BUSY | IORESOURCE_IO
}, {
.name = "dma page reg",
@@ -527,10 +532,16 @@ static void __init reserve_crashkernel(void)
(unsigned long)(crash_size >> 20),
(unsigned long)(crash_base >> 20),
(unsigned long)(total_mem >> 20));
+
+ if (reserve_bootmem(crash_base, crash_size,
+ BOOTMEM_EXCLUSIVE) < 0) {
+ printk(KERN_INFO "crashkernel reservation "
+ "failed - memory is in use\n");
+ return;
+ }
+
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size,
- BOOTMEM_DEFAULT);
} else
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index 22c14e21c97c..6dff1286ad8a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -70,6 +70,7 @@
#include <asm/ds.h>
#include <asm/topology.h>
#include <asm/trampoline.h>
+#include <asm/pat.h>
#include <mach_apic.h>
#ifdef CONFIG_PARAVIRT
@@ -128,7 +129,9 @@ static struct resource standard_io_resources[] = {
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
{ .name = "timer1", .start = 0x50, .end = 0x53,
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
- { .name = "keyboard", .start = 0x60, .end = 0x6f,
+ { .name = "keyboard", .start = 0x60, .end = 0x60,
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
+ { .name = "keyboard", .start = 0x64, .end = 0x64,
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
{ .name = "dma page reg", .start = 0x80, .end = 0x8f,
.flags = IORESOURCE_BUSY | IORESOURCE_IO },
@@ -948,7 +951,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
{
if (c->x86 == 0x6 && c->x86_model >= 0xf)
- set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
+ set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
}
static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
@@ -1063,25 +1066,19 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
-
- clear_cpu_cap(c, X86_FEATURE_PAT);
-
switch (c->x86_vendor) {
case X86_VENDOR_AMD:
early_init_amd(c);
- if (c->x86 >= 0xf && c->x86 <= 0x11)
- set_cpu_cap(c, X86_FEATURE_PAT);
break;
case X86_VENDOR_INTEL:
early_init_intel(c);
- if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15))
- set_cpu_cap(c, X86_FEATURE_PAT);
break;
case X86_VENDOR_CENTAUR:
early_init_centaur(c);
break;
}
+ validate_pat_support(c);
}
/*
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 8f75893a6467..0cb7aadc87cd 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -231,7 +231,8 @@ native_smp_call_function_mask(cpumask_t mask,
wmb();
/* Send a message to other CPUs */
- if (cpus_equal(mask, allbutself))
+ if (cpus_equal(mask, allbutself) &&
+ cpus_equal(cpu_online_map, cpu_callout_map))
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
else
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 84241a256dc8..3e1cecedde42 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -86,6 +86,7 @@ void *x86_bios_cpu_apicid_early_ptr;
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
+static int low_mappings;
#endif
/* State of each CPU */
@@ -299,7 +300,7 @@ static void __cpuinit smp_callin(void)
/*
* Activate a secondary processor.
*/
-void __cpuinit start_secondary(void *unused)
+static void __cpuinit start_secondary(void *unused)
{
/*
* Don't put *anything* before cpu_init(), SMP booting is too
@@ -326,6 +327,12 @@ void __cpuinit start_secondary(void *unused)
enable_8259A_irq(0);
}
+#ifdef CONFIG_X86_32
+ while (low_mappings)
+ cpu_relax();
+ __flush_tlb_all();
+#endif
+
/* This must be done before setting cpu_online_map */
set_cpu_sibling_map(raw_smp_processor_id());
wmb();
@@ -989,7 +996,6 @@ do_rest:
#endif
cpu_clear(cpu, cpu_callout_map); /* was set by do_boot_cpu() */
cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */
- cpu_clear(cpu, cpu_possible_map);
cpu_clear(cpu, cpu_present_map);
per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
}
@@ -1040,14 +1046,20 @@ int __cpuinit native_cpu_up(unsigned int cpu)
#ifdef CONFIG_X86_32
/* init low mem mapping */
clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+ min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
flush_tlb_all();
-#endif
+ low_mappings = 1;
+
+ err = do_boot_cpu(apicid, cpu);
+ zap_low_mappings();
+ low_mappings = 0;
+#else
err = do_boot_cpu(apicid, cpu);
- if (err < 0) {
+#endif
+ if (err) {
Dprintk("do_boot_cpu failed %d\n", err);
- return err;
+ return -EIO;
}
/*
@@ -1177,6 +1189,7 @@ static void __init smp_cpu_index_default(void)
*/
void __init native_smp_prepare_cpus(unsigned int max_cpus)
{
+ preempt_disable();
nmi_watchdog_default();
smp_cpu_index_default();
current_cpu_data = boot_cpu_data;
@@ -1193,7 +1206,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
disable_smp();
- return;
+ goto out;
}
preempt_disable();
@@ -1233,6 +1246,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
printk(KERN_INFO "CPU%d: ", 0);
print_cpu_info(&cpu_data(0));
setup_boot_clock();
+out:
+ preempt_enable();
}
/*
* Early setup to make printk work.
@@ -1259,9 +1274,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
setup_ioapic_dest();
#endif
check_nmi_watchdog();
-#ifdef CONFIG_X86_32
- zap_low_mappings();
-#endif
}
#ifdef CONFIG_HOTPLUG_CPU
@@ -1306,7 +1318,7 @@ static void remove_siblinginfo(int cpu)
cpu_clear(cpu, cpu_sibling_setup_map);
}
-int additional_cpus __initdata = -1;
+static int additional_cpus __initdata = -1;
static __init int setup_additional_cpus(char *s)
{
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index a86d26f036e1..d2ab52cc1d6b 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -22,23 +22,6 @@
#include <asm/uaccess.h>
#include <asm/unistd.h>
-/*
- * sys_pipe() is the normal C calling standard for creating
- * a pipe. It's not the way Unix traditionally does this, though.
- */
-asmlinkage int sys_pipe(unsigned long __user * fildes)
-{
- int fd[2];
- int error;
-
- error = do_pipe(fd);
- if (!error) {
- if (copy_to_user(fildes, fd, 2*sizeof(int)))
- error = -EFAULT;
- }
- return error;
-}
-
asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long pgoff)
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index bd802a5e1aa3..3b360ef33817 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -17,23 +17,6 @@
#include <asm/uaccess.h>
#include <asm/ia32.h>
-/*
- * sys_pipe() is the normal C calling standard for creating
- * a pipe. It's not the way Unix traditionally does this, though.
- */
-asmlinkage long sys_pipe(int __user *fildes)
-{
- int fd[2];
- int error;
-
- error = do_pipe(fd);
- if (!error) {
- if (copy_to_user(fildes, fd, 2*sizeof(int)))
- error = -EFAULT;
- }
- return error;
-}
-
asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
unsigned long fd, unsigned long off)
{
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index bde6f63e15d5..08d752de4eee 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -544,6 +544,7 @@ vm86_trap:
#define DO_ERROR(trapnr, signr, str, name) \
void do_##name(struct pt_regs *regs, long error_code) \
{ \
+ trace_hardirqs_fixup(); \
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
== NOTIFY_STOP) \
return; \
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
index e4790728b224..65b70637ad97 100644
--- a/arch/x86/kernel/tsc_32.c
+++ b/arch/x86/kernel/tsc_32.c
@@ -14,7 +14,10 @@
#include "mach_timer.h"
-static int tsc_enabled;
+/* native_sched_clock() is called before tsc_init(), so
+ we must start with the TSC soft disabled to prevent
+ erroneous rdtsc usage on !cpu_has_tsc processors */
+static int tsc_disabled = -1;
/*
* On some systems the TSC frequency does not
@@ -28,8 +31,8 @@ EXPORT_SYMBOL_GPL(tsc_khz);
static int __init tsc_setup(char *str)
{
printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
- "cannot disable TSC completely.\n");
- mark_tsc_unstable("user disabled TSC");
+ "cannot disable TSC completely.\n");
+ tsc_disabled = 1;
return 1;
}
#else
@@ -120,7 +123,7 @@ unsigned long long native_sched_clock(void)
* very important for it to be as fast as the platform
* can achive it. )
*/
- if (unlikely(!tsc_enabled && !tsc_unstable))
+ if (unlikely(tsc_disabled))
/* No locking but a rare wrong value is not a big deal: */
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
@@ -322,7 +325,6 @@ void mark_tsc_unstable(char *reason)
{
if (!tsc_unstable) {
tsc_unstable = 1;
- tsc_enabled = 0;
printk("Marking TSC unstable due to: %s.\n", reason);
/* Can be called before registration */
if (clocksource_tsc.mult)
@@ -336,7 +338,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
{
printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
- d->ident);
+ d->ident);
tsc_unstable = 1;
return 0;
}
@@ -403,7 +405,7 @@ void __init tsc_init(void)
{
int cpu;
- if (!cpu_has_tsc)
+ if (!cpu_has_tsc || tsc_disabled > 0)
return;
cpu_khz = calculate_cpu_khz();
@@ -414,6 +416,9 @@ void __init tsc_init(void)
return;
}
+ /* now allow native_sched_clock() to use rdtsc */
+ tsc_disabled = 0;
+
printk("Detected %lu.%03lu MHz processor.\n",
(unsigned long)cpu_khz / 1000,
(unsigned long)cpu_khz % 1000);
@@ -441,8 +446,6 @@ void __init tsc_init(void)
if (check_tsc_unstable()) {
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
- } else
- tsc_enabled = 1;
-
+ }
clocksource_register(&clocksource_tsc);
}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
index fcc16e58609e..1784b8077a12 100644
--- a/arch/x86/kernel/tsc_64.c
+++ b/arch/x86/kernel/tsc_64.c
@@ -227,14 +227,14 @@ void __init tsc_calibrate(void)
/* hpet or pmtimer available ? */
if (!hpet && !pm1 && !pm2) {
printk(KERN_INFO "TSC calibrated against PIT\n");
- return;
+ goto out;
}
/* Check, whether the sampling was disturbed by an SMI */
if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
printk(KERN_WARNING "TSC calibration disturbed by SMI, "
"using PIT calibration result\n");
- return;
+ goto out;
}
tsc2 = (tsc2 - tsc1) * 1000000L;
@@ -255,6 +255,7 @@ void __init tsc_calibrate(void)
tsc_khz = tsc2 / tsc1;
+out:
for_each_possible_cpu(cpu)
set_cyc2ns_scale(tsc_khz, cpu);
}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 58882f9f2637..f6c05d0410fb 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,6 +2,7 @@
All C exports should go in the respective C files. */
#include <linux/module.h>
+#include <net/checksum.h>
#include <linux/smp.h>
#include <asm/processor.h>
@@ -29,6 +30,8 @@ EXPORT_SYMBOL(__copy_from_user_inatomic);
EXPORT_SYMBOL(copy_page);
EXPORT_SYMBOL(clear_page);
+EXPORT_SYMBOL(csum_partial);
+
/*
* Export string functions. We normally rely on gcc builtin for most of these,
* but gcc sometimes decides not to inline them.