46 files changed, 988 insertions, 576 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90e092d0af0c..fa19c3819540 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
 obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o
+obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 
 ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 057ccf1d5ad4..977ed5cdeaa3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 #define HPET_RESOURCE_NAME_SIZE 9
 	hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
 
-	if (!hpet_res)
-		return 0;
-
-	memset(hpet_res, 0, sizeof(*hpet_res));
 	hpet_res->name = (void *)&hpet_res[1];
 	hpet_res->flags = IORESOURCE_MEM;
 	snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore
new file mode 100644
index 000000000000..58f1f48a58f8
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/.gitignore
@@ -0,0 +1,3 @@
+wakeup.bin
+wakeup.elf
+wakeup.lds
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df4099dc1c68..65c7857a90dd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -511,31 +511,30 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 	unsigned long flags;
 	char *vaddr;
 	int nr_pages = 2;
+	struct page *pages[2];
+	int i;
 
-	BUG_ON(len > sizeof(long));
-	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
-		- ((long)addr & ~(sizeof(long) - 1)));
-	if (kernel_text_address((unsigned long)addr)) {
-		struct page *pages[2] = { virt_to_page(addr),
-			virt_to_page(addr + PAGE_SIZE) };
-		if (!pages[1])
-			nr_pages = 1;
-		vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-		BUG_ON(!vaddr);
-		local_irq_save(flags);
-		memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
-		local_irq_restore(flags);
-		vunmap(vaddr);
+	if (!core_kernel_text((unsigned long)addr)) {
+		pages[0] = vmalloc_to_page(addr);
+		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 	} else {
-		/*
-		 * modules are in vmalloc'ed memory, always writable.
-		 */
-		local_irq_save(flags);
-		memcpy(addr, opcode, len);
-		local_irq_restore(flags);
+		pages[0] = virt_to_page(addr);
+		WARN_ON(!PageReserved(pages[0]));
+		pages[1] = virt_to_page(addr + PAGE_SIZE);
 	}
+	BUG_ON(!pages[0]);
+	if (!pages[1])
+		nr_pages = 1;
+	vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+	BUG_ON(!vaddr);
+	local_irq_save(flags);
+	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+	local_irq_restore(flags);
+	vunmap(vaddr);
 	sync_core();
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
+	for (i = 0; i < len; i++)
+		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
 	return addr;
 }
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8317401170b8..4b99b1bdeb6c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void)
 	}
 
 	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32);
+	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+				       lapic_clockevent.shift);
 	lapic_clockevent.max_delta_ns =
 		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
 	lapic_clockevent.min_delta_ns =
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index bf83157337e4..5910020c3f24 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void)
 		result / 1000 / 1000, result / 1000 % 1000);
 
 	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+				       lapic_clockevent.shift);
 	lapic_clockevent.max_delta_ns =
 		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
 	lapic_clockevent.min_delta_ns =
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f0030a0999c7..bf9290e29013 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -904,6 +904,7 @@ recalc:
 			original_pm_idle();
 		else
 			default_idle();
+		local_irq_disable();
 		jiffies_since_last_check = jiffies - last_jiffies;
 		if (jiffies_since_last_check > idle_period)
 			goto recalc;
@@ -911,6 +912,8 @@ recalc:
 
 	if (apm_idle_done)
 		apm_do_busy();
+
+	local_irq_enable();
 }
 
 /**
@@ -1189,19 +1192,6 @@ static int suspend(int vetoable)
 	int err;
 	struct apm_user	*as;
 
-	if (pm_send_all(PM_SUSPEND, (void *)3)) {
-		/* Vetoed */
-		if (vetoable) {
-			if (apm_info.connection_version > 0x100)
-				set_system_power_state(APM_STATE_REJECT);
-			err = -EBUSY;
-			ignore_sys_suspend = 0;
-			printk(KERN_WARNING "apm: suspend was vetoed.\n");
-			goto out;
-		}
-		printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
-	}
-
 	device_suspend(PMSG_SUSPEND);
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
@@ -1224,9 +1214,7 @@ static int suspend(int vetoable)
 	device_power_up();
 	local_irq_enable();
 	device_resume();
-	pm_send_all(PM_RESUME, (void *)0);
 	queue_event(APM_NORMAL_RESUME, NULL);
- out:
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
 		as->suspend_wait = 0;
@@ -1337,7 +1325,6 @@ static void check_events(void)
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
 				device_resume();
-				pm_send_all(PM_RESUME, (void *)0);
 				queue_event(event, NULL);
 			}
 			ignore_normal_resume = 0;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee7c45235e54..a0c6f8190887 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o
 obj-$(CONFIG_X86_32)	+= transmeta.o
 obj-$(CONFIG_X86_32)	+= intel.o
-obj-$(CONFIG_X86_32)	+= nexgen.o
 obj-$(CONFIG_X86_32)	+= umc.o
 
 obj-$(CONFIG_X86_MCE)	+= mcheck/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0173065dc3b7..245866828294 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
 	.c_size_cache	= amd_size_cache,
 };
 
-int __init amd_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
-	return 0;
-}
-
 cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index e2d870de837c..8db8f73503b3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -339,6 +339,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
 	struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
 	unsigned int freq;
+	unsigned int cached_freq;
 
 	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
 
@@ -347,7 +348,16 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		return 0;
 	}
 
+	cached_freq = data->freq_table[data->acpi_data->state].frequency;
 	freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
+	if (freq != cached_freq) {
+		/*
+		 * The dreaded BIOS frequency change behind our back.
+		 * Force set the frequency on next target call.
+		 */
+		data->resume = 1;
+	}
+
 	dprintk("cur freq = %u\n", freq);
 
 	return freq;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 9a699ed03598..e07e8c068ae0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -49,7 +49,7 @@ static int banks;
 static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
 static unsigned long notify_user;
 static int rip_msr;
-static int mce_bootlog = 1;
+static int mce_bootlog = -1;
 static atomic_t mce_events;
 
 static char trigger[128];
@@ -471,13 +471,15 @@ static void mce_init(void *dummy)
 static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 {
 	/* This should be disabled by the BIOS, but isn't always */
-	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
-		/* disable GART TBL walk error reporting, which trips off
-		   incorrectly with the IOMMU & 3ware & Cerberus. */
-		clear_bit(10, &bank[4]);
-		/* Lots of broken BIOS around that don't clear them
-		   by default and leave crap in there. Don't log. */
-		mce_bootlog = 0;
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		if(c->x86 == 15)
+			/* disable GART TBL walk error reporting, which trips off
+			   incorrectly with the IOMMU & 3ware & Cerberus. */
+			clear_bit(10, &bank[4]);
+		if(c->x86 <= 17 && mce_bootlog < 0)
+			/* Lots of broken BIOS around that don't clear them
+			   by default and leave crap in there. Don't log. */
+			mce_bootlog = 0;
 	}
 
 }
diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c
deleted file mode 100644
index 5d5e1c134123..000000000000
--- a/arch/x86/kernel/cpu/nexgen.c
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-/*
- *	Detect a NexGen CPU running without BIOS hypercode new enough
- *	to have CPUID. (Thanks to Herbert Oppmann)
- */
-
-static int __cpuinit deep_magic_nexgen_probe(void)
-{
-	int ret;
-
-	__asm__ __volatile__ (
-		"	movw	$0x5555, %%ax\n"
-		"	xorw	%%dx,%%dx\n"
-		"	movw	$2, %%cx\n"
-		"	divw	%%cx\n"
-		"	movl	$0, %%eax\n"
-		"	jnz	1f\n"
-		"	movl	$1, %%eax\n"
-		"1:\n"
-		: "=a" (ret) : : "cx", "dx");
-	return  ret;
-}
-
-static void __cpuinit init_nexgen(struct cpuinfo_x86 *c)
-{
-	c->x86_cache_size = 256; /* A few had 1 MB... */
-}
-
-static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c)
-{
-	/* Detect NexGen with old hypercode */
-	if (deep_magic_nexgen_probe())
-		strcpy(c->x86_vendor_id, "NexGenDriven");
-}
-
-static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
-	.c_vendor	= "Nexgen",
-	.c_ident	= { "NexGenDriven" },
-	.c_models = {
-			{ .vendor = X86_VENDOR_NEXGEN,
-			  .family = 5,
-			  .model_names = { [1] = "Nx586" }
-			},
-	},
-	.c_init		= init_nexgen,
-	.c_identify	= nexgen_identify,
-};
-
-int __init nexgen_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
-	return 0;
-}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index b943e10ad814..f9ae93adffe5 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = {
 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
 };
 
-static struct wd_ops coreduo_wd_ops = {
-	.reserve = single_msr_reserve,
-	.unreserve = single_msr_unreserve,
-	.setup = setup_intel_arch_watchdog,
-	.rearm = p6_rearm,
-	.stop = single_msr_stop_watchdog,
-	.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
-	.evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
-};
-
 static void probe_nmi_watchdog(void)
 {
 	switch (boot_cpu_data.x86_vendor) {
@@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void)
 		/* Work around Core Duo (Yonah) errata AE49 where perfctr1
 		   doesn't have a working enable bit. */
 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
-			wd_ops = &coreduo_wd_ops;
-			break;
+			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
+			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
 		}
 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 			wd_ops = &intel_arch_wd_ops;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2251d0ae9570..268553817909 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
 #include <asm/smp.h>
+#include <asm/reboot.h>
 
 #include <mach_ipi.h>
 
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void)
 }
 #endif
 
-void machine_crash_shutdown(struct pt_regs *regs)
+void native_machine_crash_shutdown(struct pt_regs *regs)
 {
 	/* This function is only called after the system
 	 * has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index cbd42e51cb08..645ee5e32a27 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name)
 		strncpy(r->name, name, sizeof(r->name) - 1);
 }
 
-void __init early_res_to_bootmem(void)
+void __init free_early(unsigned long start, unsigned long end)
+{
+	struct early_res *r;
+	int i, j;
+
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (start == r->start && end == r->end)
+			break;
+	}
+	if (i >= MAX_EARLY_RES || !early_res[i].end)
+		panic("free_early on not reserved area: %lx-%lx!", start, end);
+
+	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+		;
+
+	memcpy(&early_res[i], &early_res[i + 1],
+	       (j - 1 - i) * sizeof(struct early_res));
+
+	early_res[j - 1].end = 0;
+}
+
+void __init early_res_to_bootmem(unsigned long start, unsigned long end)
 {
 	int i;
+	unsigned long final_start, final_end;
 	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 		struct early_res *r = &early_res[i];
-		printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
-			r->start, r->end - 1, r->name);
-		reserve_bootmem_generic(r->start, r->end - r->start);
+		final_start = max(start, r->start);
+		final_end = min(end, r->end);
+		if (final_start >= final_end)
+			continue;
+		printk(KERN_INFO "  early res: %d [%lx-%lx] %s\n", i,
+			final_start, final_end - 1, r->name);
+		reserve_bootmem_generic(final_start, final_end - final_start);
 	}
 }
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0f8934fc303..2a609dc3271c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
 irq_return:
 	INTERRUPT_RETURN
 .section .fixup,"ax"
-iret_exc:
+ENTRY(iret_exc)
 	pushl $0			# no error code
 	pushl $do_iret_error
 	jmp error_code
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 
 #ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+   entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+	RING0_INT_FRAME
+	addl $5*4, %esp		/* remove xen-provided frame */
+	jmp sysenter_past_esp
+
 ENTRY(xen_hypervisor_callback)
 	CFI_STARTPROC
 	pushl $0
@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback)
 	cmpl $xen_iret_end_crit,%eax
 	jae  1f
 
-	call xen_iret_crit_fixup
+	jmp  xen_iret_crit_fixup
 
+ENTRY(xen_do_upcall)
 1:	mov %esp, %eax
 	call xen_evtchn_do_upcall
 	jmp  ret_from_intr
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 9546ef408b92..021624c83583 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
 	else
 #endif
 
-	if (cpus_weight(cpu_possible_map) <= 8)
+	if (num_possible_cpus() <= 8)
 		genapic = &apic_flat;
 	else
 		genapic = &apic_physflat;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 993c76773256..e25c57b8aa84 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/percpu.h>
 #include <linux/start_kernel.h>
+#include <linux/io.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -22,6 +23,7 @@
 #include <asm/sections.h>
 #include <asm/kdebug.h>
 #include <asm/e820.h>
+#include <asm/bios_ebda.h>
 
 static void __init zap_identity_mappings(void)
 {
@@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-#define BIOS_EBDA_SEGMENT 0x40E
 #define BIOS_LOWMEM_KILOBYTES 0x413
 
 /*
@@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void)
 	lowmem <<= 10;
 
 	/* start of EBDA area */
-	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
-	ebda_addr <<= 4;
+	ebda_addr = get_bios_ebda();
 
 	/* Fixup: bios puts an EBDA in the top 64K segment */
 	/* of conventional memory, but does not adjust lowmem. */
@@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void)
 	reserve_early(lowmem, 0x100000, "BIOS reserved");
 }
 
+static void __init reserve_setup_data(void)
+{
+	struct setup_data *data;
+	unsigned long pa_data;
+	char buf[32];
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, sizeof(*data));
+		sprintf(buf, "setup data %x", data->type);
+		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		pa_data = data->next;
+		early_iounmap(data, sizeof(*data));
+	}
+}
+
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
@@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 #endif
 
 	reserve_ebda_region();
+	reserve_setup_data();
 
 	/*
 	 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 36652ea1a265..9007f9ea64ee 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void)
 	hpet_freq = 1000000000000000ULL;
 	do_div(hpet_freq, hpet_period);
 	hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
-				      NSEC_PER_SEC, 32);
+				      NSEC_PER_SEC, hpet_clockevent.shift);
 	/* Calculate the min / max delta */
 	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
 							   &hpet_clockevent);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 8540abe86ade..c1b5e3ece1f2 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -115,7 +115,8 @@ void __init setup_pit_timer(void)
 	 * IO_APIC has been initialized.
 	 */
 	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
-	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
+	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
+				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
 		clockevent_delta2ns(0x7FFF, &pit_clockevent);
 	pit_clockevent.min_delta_ns =
@@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void)
 	    pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
+						   clocksource_pit.shift);
 	return clocksource_register(&clocksource_pit);
 }
 arch_initcall(init_pit_clocksource);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 2e2f42074e18..a40d54fc1fdd 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2068,7 +2068,7 @@ static void __init setup_nmi(void)
  * cycles as some i82489DX-based boards have glue logic that keeps the
  * 8259A interrupt line asserted until INTA.  --macro
  */
-static inline void unlock_ExtINT_logic(void)
+static inline void __init unlock_ExtINT_logic(void)
 {
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
@@ -2444,6 +2444,7 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
+	clear_bit(irq_vector[irq], used_vectors);
 	irq_vector[irq] = 0;
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 9ba11d07920f..ef1a8dfcc529 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1599,7 +1599,7 @@ static void __init setup_nmi(void)
  * cycles as some i82489DX-based boards have glue logic that keeps the
  * 8259A interrupt line asserted until INTA.  --macro
  */
-static inline void unlock_ExtINT_logic(void)
+static inline void __init unlock_ExtINT_logic(void)
 {
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 6ea67b76a214..00bda7bcda63 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 			: "=a" (arg1), "=d" (arg2), "=b" (bx)
 			:  "0" (irq),   "1" (desc),  "2" (isp),
 			   "D" (desc->handle_irq)
-			: "memory", "cc"
+			: "memory", "cc", "ecx"
 		);
 	} else
 #endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 73354302fda7..c03205991718 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -6,23 +6,171 @@
  *
  * This file is released under the GPLv2.
  */
-
 #include <linux/debugfs.h>
+#include <linux/uaccess.h>
 #include <linux/stat.h>
 #include <linux/init.h>
+#include <linux/io.h>
+#include <linux/mm.h>
 
 #include <asm/setup.h>
 
 #ifdef CONFIG_DEBUG_BOOT_PARAMS
+struct setup_data_node {
+	u64 paddr;
+	u32 type;
+	u32 len;
+};
+
+static ssize_t
+setup_data_read(struct file *file, char __user *user_buf, size_t count,
+		loff_t *ppos)
+{
+	struct setup_data_node *node = file->private_data;
+	unsigned long remain;
+	loff_t pos = *ppos;
+	struct page *pg;
+	void *p;
+	u64 pa;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= node->len)
+		return 0;
+
+	if (count > node->len - pos)
+		count = node->len - pos;
+	pa = node->paddr + sizeof(struct setup_data) + pos;
+	pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
+	if (PageHighMem(pg)) {
+		p = ioremap_cache(pa, count);
+		if (!p)
+			return -ENXIO;
+	} else {
+		p = __va(pa);
+	}
+
+	remain = copy_to_user(user_buf, p, count);
+
+	if (PageHighMem(pg))
+		iounmap(p);
+
+	if (remain)
+		return -EFAULT;
+
+	*ppos = pos + count;
+
+	return count;
+}
+
+static int setup_data_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static const struct file_operations fops_setup_data = {
+	.read =		setup_data_read,
+	.open =		setup_data_open,
+};
+
+static int __init
+create_setup_data_node(struct dentry *parent, int no,
+		       struct setup_data_node *node)
+{
+	struct dentry *d, *type, *data;
+	char buf[16];
+	int error;
+
+	sprintf(buf, "%d", no);
+	d = debugfs_create_dir(buf, parent);
+	if (!d) {
+		error = -ENOMEM;
+		goto err_return;
+	}
+	type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
+	if (!type) {
+		error = -ENOMEM;
+		goto err_dir;
+	}
+	data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
+	if (!data) {
+		error = -ENOMEM;
+		goto err_type;
+	}
+	return 0;
+
+err_type:
+	debugfs_remove(type);
+err_dir:
+	debugfs_remove(d);
+err_return:
+	return error;
+}
+
+static int __init create_setup_data_nodes(struct dentry *parent)
+{
+	struct setup_data_node *node;
+	struct setup_data *data;
+	int error, no = 0;
+	struct dentry *d;
+	struct page *pg;
+	u64 pa_data;
+
+	d = debugfs_create_dir("setup_data", parent);
+	if (!d) {
+		error = -ENOMEM;
+		goto err_return;
+	}
+
+	pa_data = boot_params.hdr.setup_data;
+
+	while (pa_data) {
+		node = kmalloc(sizeof(*node), GFP_KERNEL);
+		if (!node) {
+			error = -ENOMEM;
+			goto err_dir;
+		}
+		pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
+		if (PageHighMem(pg)) {
+			data = ioremap_cache(pa_data, sizeof(*data));
+			if (!data) {
+				error = -ENXIO;
+				goto err_dir;
+			}
+		} else {
+			data = __va(pa_data);
+		}
+
+		node->paddr = pa_data;
+		node->type = data->type;
+		node->len = data->len;
+		error = create_setup_data_node(d, no, node);
+		pa_data = data->next;
+
+		if (PageHighMem(pg))
+			iounmap(data);
+		if (error)
+			goto err_dir;
+		no++;
+	}
+	return 0;
+
+err_dir:
+	debugfs_remove(d);
+err_return:
+	return error;
+}
+
 static struct debugfs_blob_wrapper boot_params_blob = {
-	.data = &boot_params,
-	.size = sizeof(boot_params),
+	.data		= &boot_params,
+	.size		= sizeof(boot_params),
 };
 
 static int __init boot_params_kdebugfs_init(void)
 {
-	int error;
 	struct dentry *dbp, *version, *data;
+	int error;
 
 	dbp = debugfs_create_dir("boot_params", NULL);
 	if (!dbp) {
@@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void)
 		error = -ENOMEM;
 		goto err_version;
 	}
+	error = create_setup_data_nodes(dbp);
+	if (error)
+		goto err_data;
 	return 0;
+
+err_data:
+	debugfs_remove(data);
 err_version:
 	debugfs_remove(version);
 err_dir:
@@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void)
 
 	return error;
 }
-
 arch_initcall(arch_kdebugfs_init);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
new file mode 100644
index 000000000000..8b7a3cf37d2b
--- /dev/null
+++ b/arch/x86/kernel/kvm.c
@@ -0,0 +1,248 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright IBM Corporation, 2007
+ *   Authors: Anthony Liguori <aliguori@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+
+#define MMU_QUEUE_SIZE 1024
+
+struct kvm_para_state {
+	u8 mmu_queue[MMU_QUEUE_SIZE];
+	int mmu_queue_len;
+	enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static struct kvm_para_state *kvm_para_state(void)
+{
+	return &per_cpu(para_state, raw_smp_processor_id());
+}
+
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static void kvm_mmu_op(void *buffer, unsigned len)
+{
+	int r;
+	unsigned long a1, a2;
+
+	do {
+		a1 = __pa(buffer);
+		a2 = 0;   /* on i386 __pa() always returns <4G */
+		r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
+		buffer += r;
+		len -= r;
+	} while (len);
+}
+
+static void mmu_queue_flush(struct kvm_para_state *state)
+{
+	if (state->mmu_queue_len) {
+		kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
+		state->mmu_queue_len = 0;
+	}
+}
+
+static void kvm_deferred_mmu_op(void *buffer, int len)
+{
+	struct kvm_para_state *state = kvm_para_state();
+
+	if (state->mode != PARAVIRT_LAZY_MMU) {
+		kvm_mmu_op(buffer, len);
+		return;
+	}
+	if (state->mmu_queue_len + len > sizeof state->mmu_queue)
+		mmu_queue_flush(state);
+	memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
+	state->mmu_queue_len += len;
+}
+
+static void kvm_mmu_write(void *dest, u64 val)
+{
+	__u64 pte_phys;
+	struct kvm_mmu_op_write_pte wpte;
+
+#ifdef CONFIG_HIGHPTE
+	struct page *page;
+	unsigned long dst = (unsigned long) dest;
+
+	page = kmap_atomic_to_page(dest);
+	pte_phys = page_to_pfn(page);
+	pte_phys <<= PAGE_SHIFT;
+	pte_phys += (dst & ~(PAGE_MASK));
+#else
+	pte_phys = (unsigned long)__pa(dest);
+#endif
+	wpte.header.op = KVM_MMU_OP_WRITE_PTE;
+	wpte.pte_val = val;
+	wpte.pte_phys = pte_phys;
+
+	kvm_deferred_mmu_op(&wpte, sizeof wpte);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	kvm_mmu_write(pmdp, pmd_val(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+			  unsigned long addr, pte_t *ptep)
+{
+	kvm_mmu_write(ptep, 0);
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+	kvm_mmu_write(pmdp, 0);
+}
+#endif
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+	kvm_mmu_write(pudp, pud_val(pud));
+}
+
+#if PAGETABLE_LEVELS == 4
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	kvm_mmu_write(pgdp, pgd_val(pgd));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+	struct kvm_mmu_op_flush_tlb ftlb = {
+		.header.op = KVM_MMU_OP_FLUSH_TLB,
+	};
+
+	kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+	struct kvm_mmu_op_release_pt rpt = {
+		.header.op = KVM_MMU_OP_RELEASE_PT,
+		.pt_phys = (u64)pfn << PAGE_SHIFT,
+	};
+
+	kvm_mmu_op(&rpt, sizeof rpt);
+}
+
+static void kvm_enter_lazy_mmu(void)
+{
+	struct kvm_para_state *state = kvm_para_state();
+
+	paravirt_enter_lazy_mmu();
+	state->mode = paravirt_get_lazy_mode();
+}
+
+static void kvm_leave_lazy_mmu(void)
+{
+	struct kvm_para_state *state = kvm_para_state();
+
+	mmu_queue_flush(state);
+	paravirt_leave_lazy(paravirt_get_lazy_mode());
+	state->mode = paravirt_get_lazy_mode();
+}
+
+static void paravirt_ops_setup(void)
+{
+	pv_info.name = "KVM";
+	pv_info.paravirt_enabled = 1;
+
+	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
+		pv_cpu_ops.io_delay = kvm_io_delay;
+
+	if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
+		pv_mmu_ops.set_pte = kvm_set_pte;
+		pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+		pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+		pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+		pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+		pv_mmu_ops.pte_clear = kvm_pte_clear;
+		pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+		pv_mmu_ops.set_pud = kvm_set_pud;
+#if PAGETABLE_LEVELS == 4
+		pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+#endif
+		pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+		pv_mmu_ops.release_pte = kvm_release_pt;
+		pv_mmu_ops.release_pmd = kvm_release_pt;
+		pv_mmu_ops.release_pud = kvm_release_pt;
+
+		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
+		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
+	}
+}
+
+void __init kvm_guest_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	paravirt_ops_setup();
+}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 000000000000..ddee04043aeb
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,187 @@
+/*  KVM paravirtual clock driver. A clocksource implementation
+    Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/clocksource.h>
+#include <linux/kvm_para.h>
+#include <asm/arch_hooks.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <linux/percpu.h>
+#include <asm/reboot.h>
+
+#define KVM_SCALE 22
+
+static int kvmclock = 1;
+
+static int parse_no_kvmclock(char *arg)
+{
+	kvmclock = 0;
+	return 0;
+}
+early_param("no-kvmclock", parse_no_kvmclock);
+
+/* The hypervisor will put information about time periodically here */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
+#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+
+static inline u64 kvm_get_delta(u64 last_tsc)
+{
+	int cpu = smp_processor_id();
+	u64 delta = native_read_tsc() - last_tsc;
+	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
+}
+
+static struct kvm_wall_clock wall_clock;
+static cycle_t kvm_clock_read(void);
+/*
+ * The wallclock is the time of day when we booted. Since then, some time may
+ * have elapsed since the hypervisor wrote the data. So we try to account for
+ * that with system time
+ */
+unsigned long kvm_get_wallclock(void)
+{
+	u32 wc_sec, wc_nsec;
+	u64 delta;
+	struct timespec ts;
+	int version, nsec;
+	int low, high;
+
+	low = (int)__pa(&wall_clock);
+	high = ((u64)__pa(&wall_clock) >> 32);
+
+	delta = kvm_clock_read();
+
+	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
+	do {
+		version = wall_clock.wc_version;
+		rmb();
+		wc_sec = wall_clock.wc_sec;
+		wc_nsec = wall_clock.wc_nsec;
+		rmb();
+	} while ((wall_clock.wc_version != version) || (version & 1));
+
+	delta = kvm_clock_read() - delta;
+	delta += wc_nsec;
+	nsec = do_div(delta, NSEC_PER_SEC);
+	set_normalized_timespec(&ts, wc_sec + delta, nsec);
+	/*
+	 * Of all mechanisms of time adjustment I've tested, this one
+	 * was the champion!
+	 */
+	return ts.tv_sec + 1;
+}
+
+int kvm_set_wallclock(unsigned long now)
+{
+	return 0;
+}
+
+/*
+ * This is our read_clock function. The host puts an tsc timestamp each time
+ * it updates a new time. Without the tsc adjustment, we can have a situation
+ * in which a vcpu starts to run earlier (smaller system_time), but probes
+ * time later (compared to another vcpu), leading to backwards time
+ */
+static cycle_t kvm_clock_read(void)
+{
+	u64 last_tsc, now;
+	int cpu;
+
+	preempt_disable();
+	cpu = smp_processor_id();
+
+	last_tsc = get_clock(cpu, tsc_timestamp);
+	now = get_clock(cpu, system_time);
+
+	now += kvm_get_delta(last_tsc);
+	preempt_enable();
+
+	return now;
+}
+static struct clocksource kvm_clock = {
+	.name = "kvm-clock",
+	.read = kvm_clock_read,
+	.rating = 400,
+	.mask = CLOCKSOURCE_MASK(64),
+	.mult = 1 << KVM_SCALE,
+	.shift = KVM_SCALE,
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int kvm_register_clock(void)
+{
+	int cpu = smp_processor_id();
+	int low, high;
+	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
+	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+
+	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+}
+
+static void kvm_setup_secondary_clock(void)
+{
+	/*
+	 * Now that the first cpu already had this clocksource initialized,
+	 * we shouldn't fail.
+	 */
+	WARN_ON(kvm_register_clock());
+	/* ok, done with our trickery, call native */
+	setup_secondary_APIC_clock();
+}
+
+/*
+ * After the clock is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written. So before any
+ * kind of shutdown from our side, we unregister the clock by writting anything
+ * that does not have the 'enable' bit set in the msr
+ */
+#ifdef CONFIG_KEXEC
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+	native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+	native_machine_crash_shutdown(regs);
+}
+#endif
+
+static void kvm_shutdown(void)
+{
+	native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+	native_machine_shutdown();
+}
+
+void __init kvmclock_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+		if (kvm_register_clock())
+			return;
+		pv_time_ops.get_wallclock = kvm_get_wallclock;
+		pv_time_ops.set_wallclock = kvm_set_wallclock;
+		pv_time_ops.sched_clock = kvm_clock_read;
+		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+		machine_ops.shutdown  = kvm_shutdown;
+#ifdef CONFIG_KEXEC
+		machine_ops.crash_shutdown  = kvm_crash_shutdown;
+#endif
+		clocksource_register(&kvm_clock);
+	}
+}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index b402c0f3f192..3cad17fe026b 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -63,7 +63,7 @@ static int __init mfgpt_fix(char *s)
 
 	/* The following udocumented bit resets the MFGPT timers */
 	val = 0xFF; dummy = 0;
-	wrmsr(0x5140002B, val, dummy);
+	wrmsr(MSR_MFGPT_SETUP, val, dummy);
 	return 1;
 }
 __setup("mfgptfix", mfgpt_fix);
@@ -127,17 +127,17 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
 		 * 6; that is, resets for 7 and 8 will be ignored.  Is this
 		 * a problem?   -dilinger
 		 */
-		msr = MFGPT_NR_MSR;
+		msr = MSR_MFGPT_NR;
 		mask = 1 << (timer + 24);
 		break;
 
 	case MFGPT_EVENT_NMI:
-		msr = MFGPT_NR_MSR;
+		msr = MSR_MFGPT_NR;
 		mask = 1 << (timer + shift);
 		break;
 
 	case MFGPT_EVENT_IRQ:
-		msr = MFGPT_IRQ_MSR;
+		msr = MSR_MFGPT_IRQ;
 		mask = 1 << (timer + shift);
 		break;
 
@@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void)
 	geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
 
 	/* Set up the clock event */
-	mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32);
+	mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
+				       mfgpt_clockevent.shift);
 	mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
 			&mfgpt_clockevent);
 	mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70744e344fa1..3e2c54dc8b29 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -686,13 +686,11 @@ void __init get_smp_config(void)
 static int __init smp_scan_config(unsigned long base, unsigned long length,
 				  unsigned reserve)
 {
-	extern void __bad_mpf_size(void);
 	unsigned int *bp = phys_to_virt(base);
 	struct intel_mp_floating *mpf;
 
 	Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
-	if (sizeof(*mpf) != 16)
-		__bad_mpf_size();
+	BUILD_BUG_ON(sizeof(*mpf) != 16);
 
 	while (length > 0) {
 		mpf = (struct intel_mp_floating *)bp;
@@ -801,7 +799,6 @@ void __init find_smp_config(void)
 #ifdef	CONFIG_X86_IO_APIC
 
 #define MP_ISA_BUS		0
-#define MP_MAX_IOAPIC_PIN	127
 
 extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
 
@@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi)
 	return -1;
 }
 
-static u8 uniq_ioapic_id(u8 id)
+static u8 __init uniq_ioapic_id(u8 id)
 {
 #ifdef CONFIG_X86_32
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
@@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	/* APIC ID */
 	intsrc.mpc_dstirq = pin;	/* INTIN# */
 
-	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
-		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
-		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
-		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
-	mp_irqs[mp_irq_entries] = intsrc;
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!\n");
+	MP_intsrc_info(&intsrc);
 }
 
 int es7000_plat;
@@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void)
 		intsrc.mpc_srcbusirq = i;	/* Identity mapped */
 		intsrc.mpc_dstirq = i;
 
-		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
-			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
-			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
-			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
-			intsrc.mpc_dstirq);
-
-		mp_irqs[mp_irq_entries] = intsrc;
-		if (++mp_irq_entries == MAX_IRQ_SOURCES)
-			panic("Max # of irq sources exceeded!\n");
+		MP_intsrc_info(&intsrc);
 	}
 }
 
 int mp_register_gsi(u32 gsi, int triggering, int polarity)
 {
-	int ioapic = -1;
-	int ioapic_pin = 0;
-	int idx, bit = 0;
+	int ioapic;
+	int ioapic_pin;
 #ifdef CONFIG_X86_32
 #define MAX_GSI_NUM	4096
 #define IRQ_COMPRESSION_START	64
@@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 	 * with redundant pin->gsi mappings (but unique PCI devices);
 	 * we only program the IOAPIC on the first.
 	 */
-	bit = ioapic_pin % 32;
-	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
-	if (idx > 3) {
+	if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
 		printk(KERN_ERR "Invalid reference to IOAPIC pin "
 		       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
 		       ioapic_pin);
 		return gsi;
 	}
-	if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+	if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
 		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
 			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
 #ifdef CONFIG_X86_32
@@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 #endif
 	}
 
-	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit);
+	set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
 #ifdef CONFIG_X86_32
 	/*
 	 * For GSI >= 64, use IRQ compression
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3733412d1357..74f0c5ea2a03 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.flush_tlb_single = native_flush_tlb_single,
 	.flush_tlb_others = native_flush_tlb_others,
 
-	.alloc_pt = paravirt_nop,
-	.alloc_pd = paravirt_nop,
-	.alloc_pd_clone = paravirt_nop,
-	.release_pt = paravirt_nop,
-	.release_pd = paravirt_nop,
+	.alloc_pte = paravirt_nop,
+	.alloc_pmd = paravirt_nop,
+	.alloc_pmd_clone = paravirt_nop,
+	.alloc_pud = paravirt_nop,
+	.release_pte = paravirt_nop,
+	.release_pmd = paravirt_nop,
+	.release_pud = paravirt_nop,
 
 	.set_pte = native_set_pte,
 	.set_pte_at = native_set_pte_at,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2edee22e9c30..e28ec497e142 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -43,6 +43,7 @@
 #include <asm/system.h>
 #include <asm/dma.h>
 #include <asm/rio.h>
+#include <asm/bios_ebda.h>
 
 #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
 int use_calgary __read_mostly = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3004d716539d..67e9b4a1e89d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -4,6 +4,8 @@
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/pm.h>
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -42,3 +44,118 @@ void arch_task_cache_init(void)
 				  __alignof__(union thread_xstate),
 				  SLAB_PANIC, NULL);
 }
+
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+	smp_mb();
+	/* kick all the CPUs so that they exit out of pm_idle */
+	smp_call_function(do_nothing, NULL, 0, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+	if (!need_resched()) {
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__mwait(ax, cx);
+	}
+}
+
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+	if (!need_resched()) {
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else
+		local_irq_enable();
+}
+
+
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+	if (force_mwait)
+		return 1;
+	/* Any C1 states supported? */
+	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+	local_irq_enable();
+	cpu_relax();
+}
+
+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+{
+	static int selected;
+
+	if (selected)
+		return;
+#ifdef CONFIG_X86_SMP
+	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+			" performance may degrade.\n");
+	}
+#endif
+	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
+		/*
+		 * Skip, if setup has overridden idle.
+		 * One CPU supports mwait => All CPUs supports mwait
+		 */
+		if (!pm_idle) {
+			printk(KERN_INFO "using mwait in idle threads.\n");
+			pm_idle = mwait_idle;
+		}
+	}
+	selected = 1;
+}
+
+static int __init idle_setup(char *str)
+{
+	if (!strcmp(str, "poll")) {
+		printk("using polling idle threads.\n");
+		pm_idle = poll_idle;
+	} else if (!strcmp(str, "mwait"))
+		force_mwait = 1;
+	else
+		return -1;
+
+	boot_option_idle_override = 1;
+	return 0;
+}
+early_param("idle", idle_setup);
+
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77de848bd1fb..f8476dfbb60d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,12 +111,10 @@ void default_idle(void)
 		 */
 		smp_mb();
 
-		local_irq_disable();
-		if (!need_resched()) {
+		if (!need_resched())
 			safe_halt();	/* enables interrupts racelessly */
-			local_irq_disable();
-		}
-		local_irq_enable();
+		else
+			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
 	} else {
 		local_irq_enable();
@@ -128,17 +126,6 @@ void default_idle(void)
 EXPORT_SYMBOL(default_idle);
 #endif
 
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
-	local_irq_enable();
-	cpu_relax();
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <asm/nmi.h>
 /* We don't actually take CPU down, just spin without interrupts. */
@@ -196,6 +183,7 @@ void cpu_idle(void)
 			if (cpu_is_offline(cpu))
 				play_dead();
 
+			local_irq_disable();
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
 			idle();
 		}
@@ -206,104 +194,6 @@ void cpu_idle(void)
 	}
 }
 
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
-	smp_mb();
-	/* kick all the CPUs so that they exit out of pm_idle */
-	smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__sti_mwait(ax, cx);
-		else
-			local_irq_enable();
-	} else
-		local_irq_enable();
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
-	local_irq_enable();
-	mwait_idle_with_hints(0, 0);
-}
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
-	if (force_mwait)
-		return 1;
-	/* Any C1 states supported? */
-	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
-	static int selected;
-
-	if (selected)
-		return;
-#ifdef CONFIG_X86_SMP
-	if (pm_idle == poll_idle && smp_num_siblings > 1) {
-		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
-			" performance may degrade.\n");
-	}
-#endif
-	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-		/*
-		 * Skip, if setup has overridden idle.
-		 * One CPU supports mwait => All CPUs supports mwait
-		 */
-		if (!pm_idle) {
-			printk(KERN_INFO "using mwait in idle threads.\n");
-			pm_idle = mwait_idle;
-		}
-	}
-	selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
-	if (!strcmp(str, "poll")) {
-		printk("using polling idle threads.\n");
-		pm_idle = poll_idle;
-	} else if (!strcmp(str, "mwait"))
-		force_mwait = 1;
-	else
-		return -1;
-
-	boot_option_idle_override = 1;
-	return 0;
-}
-early_param("idle", idle_setup);
-
 void __show_registers(struct pt_regs *regs, int all)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 131c2ee7ac56..e2319f39988b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -106,26 +106,13 @@ void default_idle(void)
 	 * test NEED_RESCHED:
 	 */
 	smp_mb();
-	local_irq_disable();
-	if (!need_resched()) {
+	if (!need_resched())
 		safe_halt();	/* enables interrupts racelessly */
-		local_irq_disable();
-	}
-	local_irq_enable();
+	else
+		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
 }
 
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
-	local_irq_enable();
-	cpu_relax();
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
@@ -192,110 +179,6 @@ void cpu_idle(void)
 	}
 }
 
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
-	smp_mb();
-	/* kick all the CPUs so that they exit out of pm_idle */
-	smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__mwait(ax, cx);
-	}
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
-	if (!need_resched()) {
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__sti_mwait(0, 0);
-		else
-			local_irq_enable();
-	} else {
-		local_irq_enable();
-	}
-}
-
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
-	if (force_mwait)
-		return 1;
-	/* Any C1 states supported? */
-	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
-	static int selected;
-
-	if (selected)
-		return;
-#ifdef CONFIG_X86_SMP
-	if (pm_idle == poll_idle && smp_num_siblings > 1) {
-		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
-			" performance may degrade.\n");
-	}
-#endif
-	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-		/*
-		 * Skip, if setup has overridden idle.
-		 * One CPU supports mwait => All CPUs supports mwait
-		 */
-		if (!pm_idle) {
-			printk(KERN_INFO "using mwait in idle threads.\n");
-			pm_idle = mwait_idle;
-		}
-	}
-	selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
-	if (!strcmp(str, "poll")) {
-		printk("using polling idle threads.\n");
-		pm_idle = poll_idle;
-	} else if (!strcmp(str, "mwait"))
-		force_mwait = 1;
-	else
-		return -1;
-
-	boot_option_idle_override = 1;
-	return 0;
-}
-early_param("idle", idle_setup);
-
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs * regs)
 {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 559c1b027417..fb03ef380f0e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1207,97 +1207,16 @@ static int genregs32_set(struct task_struct *target,
 	return ret;
 }
 
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
 {
-	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
-	compat_siginfo_t __user *si32 = compat_ptr(data);
-	siginfo_t ssi;
-	int ret;
-
-	if (request == PTRACE_SETSIGINFO) {
-		memset(&ssi, 0, sizeof(siginfo_t));
-		ret = copy_siginfo_from_user32(&ssi, si32);
-		if (ret)
-			return ret;
-		if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
-			return -EFAULT;
-	}
-	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
-	if (ret)
-		return ret;
-	if (request == PTRACE_GETSIGINFO) {
-		if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
-			return -EFAULT;
-		ret = copy_siginfo_to_user32(si32, &ssi);
-	}
-	return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
-	struct task_struct *child;
-	struct pt_regs *childregs;
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
 	void __user *datap = compat_ptr(data);
 	int ret;
 	__u32 val;
 
 	switch (request) {
-	case PTRACE_TRACEME:
-	case PTRACE_ATTACH:
-	case PTRACE_KILL:
-	case PTRACE_CONT:
-	case PTRACE_SINGLESTEP:
-	case PTRACE_SINGLEBLOCK:
-	case PTRACE_DETACH:
-	case PTRACE_SYSCALL:
-	case PTRACE_OLDSETOPTIONS:
-	case PTRACE_SETOPTIONS:
-	case PTRACE_SET_THREAD_AREA:
-	case PTRACE_GET_THREAD_AREA:
-#ifdef X86_BTS
-	case PTRACE_BTS_CONFIG:
-	case PTRACE_BTS_STATUS:
-	case PTRACE_BTS_SIZE:
-	case PTRACE_BTS_GET:
-	case PTRACE_BTS_CLEAR:
-	case PTRACE_BTS_DRAIN:
-#endif
-		return sys_ptrace(request, pid, addr, data);
-
-	default:
-		return -EINVAL;
-
-	case PTRACE_PEEKTEXT:
-	case PTRACE_PEEKDATA:
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEUSR:
-	case PTRACE_PEEKUSR:
-	case PTRACE_GETREGS:
-	case PTRACE_SETREGS:
-	case PTRACE_SETFPREGS:
-	case PTRACE_GETFPREGS:
-	case PTRACE_SETFPXREGS:
-	case PTRACE_GETFPXREGS:
-	case PTRACE_GETEVENTMSG:
-		break;
-
-	case PTRACE_SETSIGINFO:
-	case PTRACE_GETSIGINFO:
-		return ptrace32_siginfo(request, pid, addr, data);
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child))
-		return PTR_ERR(child);
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out;
-
-	childregs = task_pt_regs(child);
-
-	switch (request) {
 	case PTRACE_PEEKUSR:
 		ret = getreg32(child, addr, &val);
 		if (ret == 0)
@@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 					     sizeof(struct user32_fxsr_struct),
 					     datap);
 
+	case PTRACE_GET_THREAD_AREA:
+	case PTRACE_SET_THREAD_AREA:
+		return arch_ptrace(child, request, addr, data);
+
 	default:
 		return compat_ptrace_request(child, request, addr, data);
 	}
 
- out:
-	put_task_struct(child);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 19c9386ac118..a4a838306b2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -8,6 +8,7 @@
 #include <asm/apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
+#include <asm/pgtable.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 
@@ -15,7 +16,6 @@
 # include <linux/dmi.h>
 # include <linux/ctype.h>
 # include <linux/mc146818rtc.h>
-# include <asm/pgtable.h>
 #else
 # include <asm/iommu.h>
 #endif
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
 	/* Remap the kernel at virtual address zero, as well as offset zero
 	   from the kernel segment.  This assumes the kernel segment starts at
 	   virtual address PAGE_OFFSET. */
-	memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+	memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 		sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
 
 	/*
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void)
 	}
 }
 
-static void native_machine_shutdown(void)
+void native_machine_shutdown(void)
 {
 	/* Stop the cpus and apics */
 #ifdef CONFIG_SMP
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = {
 	.shutdown = native_machine_shutdown,
 	.emergency_restart = native_machine_emergency_restart,
 	.restart = native_machine_restart,
-	.halt = native_machine_halt
+	.halt = native_machine_halt,
+#ifdef CONFIG_KEXEC
+	.crash_shutdown = native_machine_crash_shutdown,
+#endif
 };
 
 void machine_power_off(void)
@@ -498,3 +501,9 @@ void machine_halt(void)
 	machine_ops.halt();
 }
 
+#ifdef CONFIG_KEXEC
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	machine_ops.crash_shutdown(regs);
+}
+#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 455d3c80960b..2283422af794 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
 #include <linux/pfn.h>
 #include <linux/pci.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <video/edid.h>
 
@@ -389,7 +390,6 @@ unsigned long __init find_max_low_pfn(void)
 	return max_low_pfn;
 }
 
-#define BIOS_EBDA_SEGMENT 0x40E
 #define BIOS_LOWMEM_KILOBYTES 0x413
 
 /*
@@ -420,8 +420,7 @@ static void __init reserve_ebda_region(void)
 	lowmem <<= 10;
 
 	/* start of EBDA area */
-	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
-	ebda_addr <<= 4;
+	ebda_addr = get_bios_ebda();
 
 	/* Fixup: bios puts an EBDA in the top 64K segment */
 	/* of conventional memory, but does not adjust lowmem. */
@@ -822,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
 
 	max_low_pfn = setup_memory();
 
+#ifdef CONFIG_KVM_CLOCK
+	kvmclock_init();
+#endif
+
 #ifdef CONFIG_VMI
 	/*
 	 * Must be after max_low_pfn is determined, and before kernel
@@ -829,6 +832,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	vmi_init();
 #endif
+	kvm_guest_init();
 
 	/*
 	 * NOTE: before this point _nobody_ is allowed to allocate
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c2ec3dcb6b99..a94fb959a87a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
 #include <linux/ctype.h>
 #include <linux/uaccess.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -116,7 +117,7 @@ extern int root_mountflags;
 
 char __initdata command_line[COMMAND_LINE_SIZE];
 
-struct resource standard_io_resources[] = {
+static struct resource standard_io_resources[] = {
 	{ .name = "dma1", .start = 0x00, .end = 0x1f,
 		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
 	{ .name = "pic1", .start = 0x20, .end = 0x21,
@@ -190,6 +191,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
+	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
 	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
 #endif
@@ -264,6 +266,28 @@ void __attribute__((weak)) __init memory_setup(void)
        machine_specific_memory_setup();
 }
 
+static void __init parse_setup_data(void)
+{
+	struct setup_data *data;
+	unsigned long pa_data;
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, PAGE_SIZE);
+		switch (data->type) {
+		default:
+			break;
+		}
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+		free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+		pa_data = data->next;
+		early_iounmap(data, PAGE_SIZE);
+	}
+}
+
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
@@ -316,6 +340,8 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+	parse_setup_data();
+
 	parse_early_param();
 
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
@@ -359,6 +385,10 @@ void __init setup_arch(char **cmdline_p)
 
 	io_delay_init();
 
+#ifdef CONFIG_KVM_CLOCK
+	kvmclock_init();
+#endif
+
 #ifdef CONFIG_SMP
 	/* setup to use the early static init tables during kernel startup */
 	x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
@@ -397,8 +427,6 @@ void __init setup_arch(char **cmdline_p)
 	contig_initmem_init(0, end_pfn);
 #endif
 
-	early_res_to_bootmem();
-
 	dma32_reserve_bootmem();
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -465,6 +493,8 @@ void __init setup_arch(char **cmdline_p)
 	init_apic_mappings();
 	ioapic_init_mappings();
 
+	kvm_guest_init();
+
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
 	 */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index f1b117930837..8e05e7f7bd40 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	regs->ss = __USER_DS;
 	regs->cs = __USER_CS;
 
-	/*
-	 * Clear TF when entering the signal handler, but
-	 * notify any tracer that was single-stepping it.
-	 * The tracer may want to single-step inside the
-	 * handler too.
-	 */
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 0;
 
 give_sigsegv:
@@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->ss = __USER_DS;
 	regs->cs = __USER_CS;
 
-	/*
-	 * Clear TF when entering the signal handler, but
-	 * notify any tracer that was single-stepping it.
-	 * The tracer may want to single-step inside the
-	 * handler too.
-	 */
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 0;
 
 give_sigsegv:
@@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (ret)
 		return ret;
 
+	/*
+	 * Clear the direction flag as per the ABI for function entry.
+	 */
+	regs->flags &= ~X86_EFLAGS_DF;
+
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->flags &= ~X86_EFLAGS_TF;
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 827179c5b32a..ccb2a4560c2d 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -285,14 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	   even if the handler happens to be interrupting 32-bit code. */
 	regs->cs = __USER_CS;
 
-	/* This, by contrast, has nothing to do with segment registers -
-	   see include/asm-x86_64/uaccess.h for details. */
-	set_fs(USER_DS);
-
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 0;
 
 give_sigsegv:
@@ -380,6 +372,28 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
 	if (ret == 0) {
+		/*
+		 * This has nothing to do with segment registers,
+		 * despite the name.  This magic affects uaccess.h
+		 * macros' behavior.  Reset it to the normal setting.
+		 */
+		set_fs(USER_DS);
+
+		/*
+		 * Clear the direction flag as per the ABI for function entry.
+		 */
+		regs->flags &= ~X86_EFLAGS_DF;
+
+		/*
+		 * Clear TF when entering the signal handler, but
+		 * notify any tracer that was single-stepping it.
+		 * The tracer may want to single-step inside the
+		 * handler too.
+		 */
+		regs->flags &= ~X86_EFLAGS_TF;
+		if (test_thread_flag(TIF_SINGLESTEP))
+			ptrace_notify(SIGTRAP);
+
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ade371f9663a..04c662ba18f1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 
 #ifdef CONFIG_X86_32
 	/* init low mem mapping */
-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 	flush_tlb_all();
 #endif
 
@@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	check_tsc_sync_source(cpu);
 	local_irq_restore(flags);
 
-	while (!cpu_isset(cpu, cpu_online_map)) {
+	while (!cpu_online(cpu)) {
 		cpu_relax();
 		touch_nmi_watchdog();
 	}
@@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void)
 	int i;
 	struct cpuinfo_x86 *c;
 
-	for_each_cpu_mask(i, cpu_possible_map) {
+	for_each_possible_cpu(i) {
 		c = &cpu_data(i);
 		/* mark all to hotplug */
 		c->cpu_index = NR_CPUS;
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 6878a9c2df5d..ae751094eba9 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <asm/io.h>
+#include <asm/bios_ebda.h>
 #include <asm/mach-summit/mach_mpparse.h>
 
 static struct rio_table_hdr *rio_table_hdr __initdata;
@@ -140,8 +141,8 @@ void __init setup_summit(void)
 	int			i, next_wpeg, next_bus = 0;
 
 	/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
-	ptr = *(unsigned short *)phys_to_virt(0x40Eul);
-	ptr = (unsigned long)phys_to_virt(ptr << 4);
+	ptr = get_bios_ebda();
+	ptr = (unsigned long)phys_to_virt(ptr);
 
 	rio_table_hdr = NULL;
 	offset = 0x180;
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index df224a8774cb..a1f07d793202 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -195,9 +195,9 @@ static int __cpuinit init_smp_flush(void)
 {
 	int i;
 
-	for_each_cpu_mask(i, cpu_possible_map) {
+	for_each_possible_cpu(i)
 		spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
-	}
+
 	return 0;
 }
 core_initcall(init_smp_flush);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 64580679861e..d8ccc3c6552f 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -33,7 +33,7 @@
 
 /* We can free up trampoline after bootup if cpu hotplug is not supported. */
 #ifndef CONFIG_HOTPLUG_CPU
-.section ".init.data","aw",@progbits
+.section ".cpuinit.data","aw",@progbits
 #else
 .section .rodata,"a",@progbits
 #endif
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 471e694d6713..bde6f63e15d5 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 
 void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
 {
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 12affe1f9bce..956f38927aa7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
 	 * pdes need to be zeroed.
 	 */
 	if (type & VMI_PAGE_CLONE)
-		limit = USER_PTRS_PER_PGD;
+		limit = KERNEL_PGD_BOUNDARY;
 	for (i = 0; i < limit; i++)
 		BUG_ON(ptr[i]);
 }
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
 }
 #endif
 
-static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
 {
 	vmi_set_page_type(pfn, VMI_PAGE_L1);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
-static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
 {
  	/*
 	 * This call comes in very early, before mem_map is setup.
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
 }
 
-static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
 {
  	vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
 	vmi_check_page_type(clonepfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
 }
 
-static void vmi_release_pt(u32 pfn)
+static void vmi_release_pte(u32 pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L1);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
-static void vmi_release_pd(u32 pfn)
+static void vmi_release_pmd(u32 pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
 
 	vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
 	if (vmi_ops.allocate_page) {
-		pv_mmu_ops.alloc_pt = vmi_allocate_pt;
-		pv_mmu_ops.alloc_pd = vmi_allocate_pd;
-		pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+		pv_mmu_ops.alloc_pte = vmi_allocate_pte;
+		pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
+		pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
 	}
 
 	vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
 	if (vmi_ops.release_page) {
-		pv_mmu_ops.release_pt = vmi_release_pt;
-		pv_mmu_ops.release_pd = vmi_release_pd;
+		pv_mmu_ops.release_pte = vmi_release_pte;
+		pv_mmu_ops.release_pmd = vmi_release_pmd;
 	}
 
 	/* Set linear is needed in all cases */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index b7ab3c335fae..fad3674b06a5 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -209,12 +209,6 @@ SECTIONS
 	EXIT_DATA
   }
 
-/* vdso blob that is mapped into user space */
-  vdso_start = . ;
-  .vdso  : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
-  . = ALIGN(PAGE_SIZE);
-  vdso_end = .;
-
 #ifdef CONFIG_BLK_DEV_INITRD
   . = ALIGN(PAGE_SIZE);
   __initramfs_start = .;