438 files changed, 27668 insertions, 10238 deletions
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index f10d2eddd2c3..b04f1feb1dda 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -994,7 +994,7 @@ marvel_agp_configure(alpha_agp_info *agp)
 		 * rate, but warn the user.
 		 */
 		printk("%s: unknown PLL setting RNGB=%lx (PLL6_CTL=%016lx)\n",
-		       __FUNCTION__, IO7_PLL_RNGB(agp_pll), agp_pll);
+		       __func__, IO7_PLL_RNGB(agp_pll), agp_pll);
 		break;
 	}
 
@@ -1044,13 +1044,13 @@ marvel_agp_translate(alpha_agp_info *agp, dma_addr_t addr)
 
 	if (addr < agp->aperture.bus_base ||
 	    addr >= agp->aperture.bus_base + agp->aperture.size) {
-		printk("%s: addr out of range\n", __FUNCTION__);
+		printk("%s: addr out of range\n", __func__);
 		return -EINVAL;
 	}
 
 	pte = aper->arena->ptes[baddr >> PAGE_SHIFT];
 	if (!(pte & 1)) {
-		printk("%s: pte not valid\n", __FUNCTION__);
+		printk("%s: pte not valid\n", __func__);
 		return -EINVAL;
 	} 
 	return (pte >> 1) << PAGE_SHIFT;
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index f5ca5255eb06..c0750291b44a 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -336,10 +336,7 @@ t2_direct_map_window1(unsigned long base, unsigned long length)
 
 #if DEBUG_PRINT_FINAL_SETTINGS
 	printk("%s: setting WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n",
-	       __FUNCTION__,
-	       *(vulp)T2_WBASE1,
-	       *(vulp)T2_WMASK1,
-	       *(vulp)T2_TBASE1);
+	       __func__, *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1);
 #endif
 }
 
@@ -366,10 +363,7 @@ t2_sg_map_window2(struct pci_controller *hose,
 
 #if DEBUG_PRINT_FINAL_SETTINGS
 	printk("%s: setting WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n",
-	       __FUNCTION__,
-	       *(vulp)T2_WBASE2,
-	       *(vulp)T2_WMASK2,
-	       *(vulp)T2_TBASE2);
+	       __func__, *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2);
 #endif
 }
 
@@ -377,15 +371,15 @@ static void __init
 t2_save_configuration(void)
 {
 #if DEBUG_PRINT_INITIAL_SETTINGS
-	printk("%s: HAE_1 was 0x%lx\n", __FUNCTION__, srm_hae); /* HW is 0 */
-	printk("%s: HAE_2 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_2);
-	printk("%s: HAE_3 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_3);
-	printk("%s: HAE_4 was 0x%lx\n", __FUNCTION__, *(vulp)T2_HAE_4);
-	printk("%s: HBASE was 0x%lx\n", __FUNCTION__, *(vulp)T2_HBASE);
+	printk("%s: HAE_1 was 0x%lx\n", __func__, srm_hae); /* HW is 0 */
+	printk("%s: HAE_2 was 0x%lx\n", __func__, *(vulp)T2_HAE_2);
+	printk("%s: HAE_3 was 0x%lx\n", __func__, *(vulp)T2_HAE_3);
+	printk("%s: HAE_4 was 0x%lx\n", __func__, *(vulp)T2_HAE_4);
+	printk("%s: HBASE was 0x%lx\n", __func__, *(vulp)T2_HBASE);
 
-	printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __FUNCTION__, 
+	printk("%s: WBASE1=0x%lx WMASK1=0x%lx TBASE1=0x%lx\n", __func__,
 	       *(vulp)T2_WBASE1, *(vulp)T2_WMASK1, *(vulp)T2_TBASE1);
-	printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __FUNCTION__, 
+	printk("%s: WBASE2=0x%lx WMASK2=0x%lx TBASE2=0x%lx\n", __func__,
 	       *(vulp)T2_WBASE2, *(vulp)T2_WMASK2, *(vulp)T2_TBASE2);
 #endif
 
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 819326627b96..319fcb74611e 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -365,21 +365,21 @@ void __init
 titan_init_arch(void)
 {
 #if 0
-	printk("%s: titan_init_arch()\n", __FUNCTION__);
-	printk("%s: CChip registers:\n", __FUNCTION__);
-	printk("%s: CSR_CSC 0x%lx\n", __FUNCTION__, TITAN_cchip->csc.csr);
-	printk("%s: CSR_MTR 0x%lx\n", __FUNCTION__, TITAN_cchip->mtr.csr);
-	printk("%s: CSR_MISC 0x%lx\n", __FUNCTION__, TITAN_cchip->misc.csr);
-	printk("%s: CSR_DIM0 0x%lx\n", __FUNCTION__, TITAN_cchip->dim0.csr);
-	printk("%s: CSR_DIM1 0x%lx\n", __FUNCTION__, TITAN_cchip->dim1.csr);
-	printk("%s: CSR_DIR0 0x%lx\n", __FUNCTION__, TITAN_cchip->dir0.csr);
-	printk("%s: CSR_DIR1 0x%lx\n", __FUNCTION__, TITAN_cchip->dir1.csr);
-	printk("%s: CSR_DRIR 0x%lx\n", __FUNCTION__, TITAN_cchip->drir.csr);
-
-	printk("%s: DChip registers:\n", __FUNCTION__);
-	printk("%s: CSR_DSC 0x%lx\n", __FUNCTION__, TITAN_dchip->dsc.csr);
-	printk("%s: CSR_STR 0x%lx\n", __FUNCTION__, TITAN_dchip->str.csr);
-	printk("%s: CSR_DREV 0x%lx\n", __FUNCTION__, TITAN_dchip->drev.csr);
+	printk("%s: titan_init_arch()\n", __func__);
+	printk("%s: CChip registers:\n", __func__);
+	printk("%s: CSR_CSC 0x%lx\n", __func__, TITAN_cchip->csc.csr);
+	printk("%s: CSR_MTR 0x%lx\n", __func__, TITAN_cchip->mtr.csr);
+	printk("%s: CSR_MISC 0x%lx\n", __func__, TITAN_cchip->misc.csr);
+	printk("%s: CSR_DIM0 0x%lx\n", __func__, TITAN_cchip->dim0.csr);
+	printk("%s: CSR_DIM1 0x%lx\n", __func__, TITAN_cchip->dim1.csr);
+	printk("%s: CSR_DIR0 0x%lx\n", __func__, TITAN_cchip->dir0.csr);
+	printk("%s: CSR_DIR1 0x%lx\n", __func__, TITAN_cchip->dir1.csr);
+	printk("%s: CSR_DRIR 0x%lx\n", __func__, TITAN_cchip->drir.csr);
+
+	printk("%s: DChip registers:\n", __func__);
+	printk("%s: CSR_DSC 0x%lx\n", __func__, TITAN_dchip->dsc.csr);
+	printk("%s: CSR_STR 0x%lx\n", __func__, TITAN_dchip->str.csr);
+	printk("%s: CSR_DREV 0x%lx\n", __func__, TITAN_dchip->drev.csr);
 #endif
 
 	boot_cpuid = __hard_smp_processor_id();
@@ -700,13 +700,13 @@ titan_agp_translate(alpha_agp_info *agp, dma_addr_t addr)
 
 	if (addr < agp->aperture.bus_base ||
 	    addr >= agp->aperture.bus_base + agp->aperture.size) {
-		printk("%s: addr out of range\n", __FUNCTION__);
+		printk("%s: addr out of range\n", __func__);
 		return -EINVAL;
 	}
 
 	pte = aper->arena->ptes[baddr >> PAGE_SHIFT];
 	if (!(pte & 1)) {
-		printk("%s: pte not valid\n", __FUNCTION__);
+		printk("%s: pte not valid\n", __func__);
 		return -EINVAL;
 	}
 
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index ef91e09590d4..5e7c28f92f19 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -241,8 +241,6 @@ tsunami_probe_write(volatile unsigned long *vaddr)
 #define tsunami_probe_read(ADDR) 1
 #endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
 
-#define FN __FUNCTION__
-
 static void __init
 tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
 {
@@ -383,27 +381,27 @@ tsunami_init_arch(void)
 	/* NXMs just don't matter to Tsunami--unless they make it
 	   choke completely. */
 	tmp = (unsigned long)(TSUNAMI_cchip - 1);
-	printk("%s: probing bogus address:  0x%016lx\n", FN, bogus_addr);
+	printk("%s: probing bogus address:  0x%016lx\n", __func__, bogus_addr);
 	printk("\tprobe %s\n",
 	       tsunami_probe_write((unsigned long *)bogus_addr)
 	       ? "succeeded" : "failed");
 #endif /* NXM_MACHINE_CHECKS_ON_TSUNAMI */
 
 #if 0
-	printk("%s: CChip registers:\n", FN);
-	printk("%s: CSR_CSC 0x%lx\n", FN, TSUNAMI_cchip->csc.csr);
-	printk("%s: CSR_MTR 0x%lx\n", FN, TSUNAMI_cchip.mtr.csr);
-	printk("%s: CSR_MISC 0x%lx\n", FN, TSUNAMI_cchip->misc.csr);
-	printk("%s: CSR_DIM0 0x%lx\n", FN, TSUNAMI_cchip->dim0.csr);
-	printk("%s: CSR_DIM1 0x%lx\n", FN, TSUNAMI_cchip->dim1.csr);
-	printk("%s: CSR_DIR0 0x%lx\n", FN, TSUNAMI_cchip->dir0.csr);
-	printk("%s: CSR_DIR1 0x%lx\n", FN, TSUNAMI_cchip->dir1.csr);
-	printk("%s: CSR_DRIR 0x%lx\n", FN, TSUNAMI_cchip->drir.csr);
+	printk("%s: CChip registers:\n", __func__);
+	printk("%s: CSR_CSC 0x%lx\n", __func__, TSUNAMI_cchip->csc.csr);
+	printk("%s: CSR_MTR 0x%lx\n", __func__, TSUNAMI_cchip.mtr.csr);
+	printk("%s: CSR_MISC 0x%lx\n", __func__, TSUNAMI_cchip->misc.csr);
+	printk("%s: CSR_DIM0 0x%lx\n", __func__, TSUNAMI_cchip->dim0.csr);
+	printk("%s: CSR_DIM1 0x%lx\n", __func__, TSUNAMI_cchip->dim1.csr);
+	printk("%s: CSR_DIR0 0x%lx\n", __func__, TSUNAMI_cchip->dir0.csr);
+	printk("%s: CSR_DIR1 0x%lx\n", __func__, TSUNAMI_cchip->dir1.csr);
+	printk("%s: CSR_DRIR 0x%lx\n", __func__, TSUNAMI_cchip->drir.csr);
 
 	printk("%s: DChip registers:\n");
-	printk("%s: CSR_DSC 0x%lx\n", FN, TSUNAMI_dchip->dsc.csr);
-	printk("%s: CSR_STR 0x%lx\n", FN, TSUNAMI_dchip->str.csr);
-	printk("%s: CSR_DREV 0x%lx\n", FN, TSUNAMI_dchip->drev.csr);
+	printk("%s: CSR_DSC 0x%lx\n", __func__, TSUNAMI_dchip->dsc.csr);
+	printk("%s: CSR_STR 0x%lx\n", __func__, TSUNAMI_dchip->str.csr);
+	printk("%s: CSR_DREV 0x%lx\n", __func__, TSUNAMI_dchip->drev.csr);
 #endif
 	/* With multiple PCI busses, we play with I/O as physical addrs.  */
 	ioport_resource.end = ~0UL;
diff --git a/arch/alpha/kernel/module.c b/arch/alpha/kernel/module.c
index 026ba9af6d6a..ebc3c894b5a2 100644
--- a/arch/alpha/kernel/module.c
+++ b/arch/alpha/kernel/module.c
@@ -120,6 +120,12 @@ module_frob_arch_sections(Elf64_Ehdr *hdr, Elf64_Shdr *sechdrs,
 
 	nsyms = symtab->sh_size / sizeof(Elf64_Sym);
 	chains = kcalloc(nsyms, sizeof(struct got_entry), GFP_KERNEL);
+	if (!chains) {
+		printk(KERN_ERR
+		       "module %s: no memory for symbol chain buffer\n",
+		       me->name);
+		return -ENOMEM;
+	}
 
 	got->sh_size = 0;
 	got->sh_addralign = 8;
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 78357798b6fd..baf57563b14c 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -208,7 +208,7 @@ pdev_save_srm_config(struct pci_dev *dev)
 
 	tmp = kmalloc(sizeof(*tmp), GFP_KERNEL);
 	if (!tmp) {
-		printk(KERN_ERR "%s: kmalloc() failed!\n", __FUNCTION__);
+		printk(KERN_ERR "%s: kmalloc() failed!\n", __func__);
 		return;
 	}
 	tmp->next = srm_saved_configs;
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index dd6e334ab9e1..2179c602032a 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -79,25 +79,21 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 
 #ifdef CONFIG_DISCONTIGMEM
 
-        if (!NODE_DATA(nid) ||
-            (NULL == (arena = alloc_bootmem_node(NODE_DATA(nid),
-                                                 sizeof(*arena))))) {
-                printk("%s: couldn't allocate arena from node %d\n"
-                       "    falling back to system-wide allocation\n",
-                       __FUNCTION__, nid);
-                arena = alloc_bootmem(sizeof(*arena));
-        }
-
-        if (!NODE_DATA(nid) ||
-            (NULL == (arena->ptes = __alloc_bootmem_node(NODE_DATA(nid),
-                                                         mem_size,
-                                                         align,
-                                                         0)))) {
-                printk("%s: couldn't allocate arena ptes from node %d\n"
-                       "    falling back to system-wide allocation\n",
-                       __FUNCTION__, nid);
-                arena->ptes = __alloc_bootmem(mem_size, align, 0);
-        }
+	arena = alloc_bootmem_node(NODE_DATA(nid), sizeof(*arena));
+	if (!NODE_DATA(nid) || !arena) {
+		printk("%s: couldn't allocate arena from node %d\n"
+		       "    falling back to system-wide allocation\n",
+		       __func__, nid);
+		arena = alloc_bootmem(sizeof(*arena));
+	}
+
+	arena->ptes = __alloc_bootmem_node(NODE_DATA(nid), mem_size, align, 0);
+	if (!NODE_DATA(nid) || !arena->ptes) {
+		printk("%s: couldn't allocate arena ptes from node %d\n"
+		       "    falling back to system-wide allocation\n",
+		       __func__, nid);
+		arena->ptes = __alloc_bootmem(mem_size, align, 0);
+	}
 
 #else /* CONFIG_DISCONTIGMEM */
 
diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c
index 63c2073401ee..2525692db0ab 100644
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -755,7 +755,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
 	if (atomic_read(&data.unstarted_count) > 0) {
 		long start_time = jiffies;
 		printk(KERN_ERR "%s: initial timeout -- trying long wait\n",
-		       __FUNCTION__);
+		       __func__);
 		timeout = jiffies + 30 * HZ;
 		while (atomic_read(&data.unstarted_count) > 0
 		       && time_before(jiffies, timeout))
@@ -764,7 +764,7 @@ smp_call_function_on_cpu (void (*func) (void *info), void *info, int retry,
 			long delta = jiffies - start_time;
 			printk(KERN_ERR 
 			       "%s: response %ld.%ld seconds into long wait\n",
-			       __FUNCTION__, delta / HZ,
+			       __func__, delta / HZ,
 			       (100 * (delta - ((delta / HZ) * HZ))) / HZ);
 		}
 	}
diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c
index f7dd081d57ff..78ad7cd1bbd6 100644
--- a/arch/alpha/kernel/srm_env.c
+++ b/arch/alpha/kernel/srm_env.c
@@ -199,7 +199,7 @@ srm_env_init(void)
 		printk(KERN_INFO "%s: This Alpha system doesn't "
 				"know about SRM (or you've booted "
 				"SRM->MILO->Linux, which gets "
-				"misdetected)...\n", __FUNCTION__);
+				"misdetected)...\n", __func__);
 		return -ENODEV;
 	}
 
diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c
index d187d01d2a17..e53a1e1c2f21 100644
--- a/arch/alpha/kernel/sys_alcor.c
+++ b/arch/alpha/kernel/sys_alcor.c
@@ -259,7 +259,7 @@ alcor_init_pci(void)
 	if (dev && dev->devfn == PCI_DEVFN(6,0)) {
 		alpha_mv.sys.cia.gru_int_req_bits = XLT_GRU_INT_REQ_BITS; 
 		printk(KERN_INFO "%s: Detected AS500 or XLT motherboard.\n",
-		       __FUNCTION__);
+		       __func__);
 	}
 	pci_dev_put(dev);
 }
diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c
index 922143ea1cdb..828449cd2636 100644
--- a/arch/alpha/kernel/sys_marvel.c
+++ b/arch/alpha/kernel/sys_marvel.c
@@ -80,7 +80,7 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
 	if (!(io7 = marvel_find_io7(pid))) {
 		printk(KERN_ERR 
 		       "%s for nonexistent io7 -- vec %x, pid %d\n",
-		       __FUNCTION__, irq, pid);
+		       __func__, irq, pid);
 		return NULL;
 	}
 
@@ -90,7 +90,7 @@ io7_get_irq_ctl(unsigned int irq, struct io7 **pio7)
 	if (irq >= 0x180) {
 		printk(KERN_ERR 
 		       "%s for invalid irq -- pid %d adjusted irq %x\n",
-		       __FUNCTION__, pid, irq);
+		       __func__, pid, irq);
 		return NULL;
 	}
 
@@ -110,8 +110,8 @@ io7_enable_irq(unsigned int irq)
 
 	ctl = io7_get_irq_ctl(irq, &io7);
 	if (!ctl || !io7) {
-		printk(KERN_ERR "%s: get_ctl failed for irq %x\n", 
-		       __FUNCTION__, irq);
+		printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
+		       __func__, irq);
 		return;
 	}
 		
@@ -130,8 +130,8 @@ io7_disable_irq(unsigned int irq)
 
 	ctl = io7_get_irq_ctl(irq, &io7);
 	if (!ctl || !io7) {
-		printk(KERN_ERR "%s: get_ctl failed for irq %x\n", 
-		       __FUNCTION__, irq);
+		printk(KERN_ERR "%s: get_ctl failed for irq %x\n",
+		       __func__, irq);
 		return;
 	}
 		
diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c
index 906019cfa681..99a7f19da13a 100644
--- a/arch/alpha/kernel/sys_sable.c
+++ b/arch/alpha/kernel/sys_sable.c
@@ -454,7 +454,7 @@ sable_lynx_enable_irq(unsigned int irq)
 	spin_unlock(&sable_lynx_irq_lock);
 #if 0
 	printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n",
-	       __FUNCTION__, mask, bit, irq);
+	       __func__, mask, bit, irq);
 #endif
 }
 
@@ -470,7 +470,7 @@ sable_lynx_disable_irq(unsigned int irq)
 	spin_unlock(&sable_lynx_irq_lock);
 #if 0
 	printk("%s: mask 0x%lx bit 0x%x irq 0x%x\n",
-	       __FUNCTION__, mask, bit, irq);
+	       __func__, mask, bit, irq);
 #endif
 }
 
@@ -524,7 +524,7 @@ sable_lynx_srm_device_interrupt(unsigned long vector)
 	irq = sable_lynx_irq_swizzle->mask_to_irq[bit];
 #if 0
 	printk("%s: vector 0x%lx bit 0x%x irq 0x%x\n",
-	       __FUNCTION__, vector, bit, irq);
+	       __func__, vector, bit, irq);
 #endif
 	handle_irq(irq);
 }
diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c
index ee7b9009ebb4..d4327e461c22 100644
--- a/arch/alpha/kernel/sys_sio.c
+++ b/arch/alpha/kernel/sys_sio.c
@@ -89,7 +89,7 @@ sio_pci_route(void)
 	/* First, ALWAYS read and print the original setting. */
 	pci_bus_read_config_dword(pci_isa_hose->bus, PCI_DEVFN(7, 0), 0x60,
 				  &orig_route_tab);
-	printk("%s: PIRQ original 0x%x new 0x%x\n", __FUNCTION__,
+	printk("%s: PIRQ original 0x%x new 0x%x\n", __func__,
 	       orig_route_tab, alpha_mv.sys.sio.route_tab);
 
 #if defined(ALPHA_RESTORE_SRM_SETUP)
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index 2dc7f9fed213..dc57790250d2 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -8,6 +8,7 @@
  * This file initializes the trap entry points
  */
 
+#include <linux/jiffies.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/tty.h>
@@ -770,7 +771,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
 	      unsigned long reg, struct pt_regs *regs)
 {
 	static int cnt = 0;
-	static long last_time = 0;
+	static unsigned long last_time;
 
 	unsigned long tmp1, tmp2, tmp3, tmp4;
 	unsigned long fake_reg, *reg_addr = &fake_reg;
@@ -781,7 +782,7 @@ do_entUnaUser(void __user * va, unsigned long opcode,
 	   with the unaliged access.  */
 
 	if (!test_thread_flag (TIF_UAC_NOPRINT)) {
-		if (cnt >= 5 && jiffies - last_time > 5*HZ) {
+		if (cnt >= 5 && time_after(jiffies, last_time + 5 * HZ)) {
 			cnt = 0;
 		}
 		if (++cnt < 5) {
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index d8d253285a94..b786e68914d4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
 config ARM
 	bool
 	default y
+	select HAVE_IDE
 	select RTC_LIB
 	select SYS_SUPPORTS_APM_EMULATION
 	select HAVE_OPROFILE
@@ -223,7 +224,6 @@ config ARCH_CLPS7500
 	select TIMER_ACORN
 	select ISA
 	select NO_IOPORT
-	select HAVE_IDE
 	help
 	  Support for the Cirrus Logic PS7500FE system-on-a-chip.
 
@@ -236,7 +236,6 @@ config ARCH_CO285
 	bool "Co-EBSA285"
 	select FOOTBRIDGE
 	select FOOTBRIDGE_ADDIN
-	select HAVE_IDE
 	help
 	  Support for Intel's EBSA285 companion chip.
 
@@ -262,7 +261,6 @@ config ARCH_EP93XX
 config ARCH_FOOTBRIDGE
 	bool "FootBridge"
 	select FOOTBRIDGE
-	select HAVE_IDE
 	help
 	  Support for systems based on the DC21285 companion chip
 	  ("FootBridge"), such as the Simtec CATS and the Rebel NetWinder.
@@ -301,7 +299,6 @@ config ARCH_IOP32X
 	depends on MMU
 	select PLAT_IOP
 	select PCI
-	select HAVE_IDE
 	help
 	  Support for Intel's 80219 and IOP32X (XScale) family of
 	  processors.
@@ -311,14 +308,12 @@ config ARCH_IOP33X
 	depends on MMU
 	select PLAT_IOP
 	select PCI
-	select HAVE_IDE
 	help
 	  Support for Intel's IOP33X (XScale) family of processors.
 
 config ARCH_IXP23XX
  	bool "IXP23XX-based"
 	depends on MMU
-	select HAVE_IDE
  	select PCI
 	help
 	  Support for Intel's IXP23xx (XScale) family of processors.
@@ -336,14 +331,12 @@ config ARCH_IXP4XX
 	select GENERIC_GPIO
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
-	select HAVE_IDE
 	help
 	  Support for Intel's IXP4XX (XScale) family of processors.
 
 config ARCH_L7200
 	bool "LinkUp-L7200"
 	select FIQ
-	select HAVE_IDE
 	help
 	  Say Y here if you intend to run this kernel on a LinkUp Systems
 	  L7200 Software Development Board which uses an ARM720T processor.
@@ -400,7 +393,6 @@ config ARCH_PXA
 	depends on MMU
 	select ARCH_MTD_XIP
 	select GENERIC_GPIO
-	select HAVE_IDE
 	select HAVE_GPIO_LIB
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
@@ -416,7 +408,6 @@ config ARCH_RPC
 	select ARCH_MAY_HAVE_PC_FDC
 	select ISA_DMA_API
 	select NO_IOPORT
-	select HAVE_IDE
 	help
 	  On the Acorn Risc-PC, Linux can support the internal IDE disk and
 	  CD-ROM interface, serial and parallel port, and the floppy drive.
@@ -432,7 +423,6 @@ config ARCH_SA1100
 	select GENERIC_TIME
 	select GENERIC_CLOCKEVENTS
 	select TICK_ONESHOT
-	select HAVE_IDE
 	select HAVE_GPIO_LIB
 	help
 	  Support for StrongARM 11x0 based boards.
@@ -440,7 +430,6 @@ config ARCH_SA1100
 config ARCH_S3C2410
 	bool "Samsung S3C2410, S3C2412, S3C2413, S3C2440, S3C2442, S3C2443"
 	select GENERIC_GPIO
-	select HAVE_IDE
 	help
 	  Samsung S3C2410X CPU based systems, such as the Simtec Electronics
 	  BAST (<http://www.simtec.co.uk/products/EB110ITX/>), the IPAQ 1940 or
@@ -448,7 +437,6 @@ config ARCH_S3C2410
 
 config ARCH_SHARK
 	bool "Shark"
-	select HAVE_IDE
 	select ISA
 	select ISA_DMA
 	select PCI
@@ -458,7 +446,6 @@ config ARCH_SHARK
 
 config ARCH_LH7A40X
 	bool "Sharp LH7A40X"
-	select HAVE_IDE
 	help
 	  Say Y here for systems based on one of the Sharp LH7A40X
 	  System on a Chip processors.  These CPUs include an ARM922T
diff --git a/arch/arm/mach-at91/at91sam9261_devices.c b/arch/arm/mach-at91/at91sam9261_devices.c
index 37cd547855b1..728bb8f39441 100644
--- a/arch/arm/mach-at91/at91sam9261_devices.c
+++ b/arch/arm/mach-at91/at91sam9261_devices.c
@@ -539,6 +539,17 @@ void __init at91_add_device_lcdc(struct atmel_lcdfb_info *data)
 	at91_set_B_periph(AT91_PIN_PB28, 0);	/* LCDD23 */
 #endif
 
+	if (ARRAY_SIZE(lcdc_resources) > 2) {
+		void __iomem *fb;
+		struct resource *fb_res = &lcdc_resources[2];
+		size_t fb_len = fb_res->end - fb_res->start + 1;
+
+		fb = ioremap_writecombine(fb_res->start, fb_len);
+		if (fb) {
+			memset(fb, 0, fb_len);
+			iounmap(fb, fb_len);
+		}
+	}
 	lcdc_data = *data;
 	platform_device_register(&at91_lcdc_device);
 }
diff --git a/arch/arm/mach-at91/at91sam9rl_devices.c b/arch/arm/mach-at91/at91sam9rl_devices.c
index dbb9a5fc2090..054689804e77 100644
--- a/arch/arm/mach-at91/at91sam9rl_devices.c
+++ b/arch/arm/mach-at91/at91sam9rl_devices.c
@@ -381,6 +381,20 @@ void __init at91_add_device_lcdc(struct atmel_lcdfb_info *data)
 	at91_set_B_periph(AT91_PIN_PC24, 0);	/* LCDD22 */
 	at91_set_B_periph(AT91_PIN_PC25, 0);	/* LCDD23 */
 
+#ifdef CONFIG_FB_INTSRAM
+	{
+		void __iomem *fb;
+		struct resource *fb_res = &lcdc_resources[2];
+		size_t fb_len = fb_res->end - fb_res->start + 1;
+
+		fb = ioremap_writecombine(fb_res->start, fb_len);
+		if (fb) {
+			memset(fb, 0, fb_len);
+			iounmap(fb, fb_len);
+		}
+	}
+#endif
+
 	lcdc_data = *data;
 	platform_device_register(&at91_lcdc_device);
 }
diff --git a/arch/avr32/kernel/setup.c b/arch/avr32/kernel/setup.c
index 2687b730e2d0..ce48c14f4349 100644
--- a/arch/avr32/kernel/setup.c
+++ b/arch/avr32/kernel/setup.c
@@ -274,6 +274,8 @@ static int __init early_parse_fbmem(char *p)
 			printk(KERN_WARNING
 			       "Failed to allocate framebuffer memory\n");
 			fbmem_size = 0;
+		} else {
+			memset(__va(fbmem_start), 0, fbmem_size);
 		}
 	}
 
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 4207a2b52750..5b06ffa15e34 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -27,7 +27,6 @@ show_mem(void)
 
 	printk("\nMem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	i = max_mapnr;
 	while (i-- > 0) {
 		total++;
diff --git a/arch/frv/kernel/pm.c b/arch/frv/kernel/pm.c
index c57ce3f1f2e2..73f3aeefd203 100644
--- a/arch/frv/kernel/pm.c
+++ b/arch/frv/kernel/pm.c
@@ -163,14 +163,11 @@ static int sysctl_pm_do_suspend(ctl_table *ctl, int write, struct file *filp,
 	if ((mode != 1) && (mode != 5))
 		return -EINVAL;
 
-	retval = pm_send_all(PM_SUSPEND, (void *)3);
-
 	if (retval == 0) {
 		if (mode == 5)
 		    retval = pm_do_bus_sleep();
 		else
 		    retval = pm_do_suspend();
-		pm_send_all(PM_RESUME, (void *)0);
 	}
 
 	return retval;
@@ -183,9 +180,6 @@ static int try_set_cmode(int new_cmode)
 	if (!(clock_cmodes_permitted & (1<<new_cmode)))
 		return -EINVAL;
 
-	/* tell all the drivers we're suspending */
-	pm_send_all(PM_SUSPEND, (void *)3);
-
 	/* now change cmode */
 	local_irq_disable();
 	frv_dma_pause_all();
@@ -201,8 +195,6 @@ static int try_set_cmode(int new_cmode)
 	frv_dma_resume_all();
 	local_irq_enable();
 
-	/* tell all the drivers we're resuming */
-	pm_send_all(PM_RESUME, (void *)0);
 	return 0;
 }
 
diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c
index 7089c2428b3f..a40df80b2ebd 100644
--- a/arch/frv/kernel/traps.c
+++ b/arch/frv/kernel/traps.c
@@ -49,7 +49,7 @@ asmlinkage void insn_access_error(unsigned long esfr1, unsigned long epcr0, unsi
 	info.si_signo	= SIGSEGV;
 	info.si_code	= SEGV_ACCERR;
 	info.si_errno	= 0;
-	info.si_addr	= (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
+	info.si_addr	= (void __user *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
 
 	force_sig_info(info.si_signo, &info, current);
 } /* end insn_access_error() */
@@ -73,7 +73,7 @@ asmlinkage void illegal_instruction(unsigned long esfr1, unsigned long epcr0, un
 		      epcr0, esr0, esfr1);
 
 	info.si_errno	= 0;
-	info.si_addr	= (void *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
+	info.si_addr	= (void __user *) ((epcr0 & EPCR0_V) ? (epcr0 & EPCR0_PC) : __frame->pc);
 
 	switch (__frame->tbr & TBR_TT) {
 	case TBR_TT_ILLEGAL_INSTR:
@@ -111,7 +111,8 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 				 unsigned long esr0)
 {
 	static DEFINE_SPINLOCK(atomic_op_lock);
-	unsigned long x, y, z, *p;
+	unsigned long x, y, z;
+	unsigned long __user *p;
 	mm_segment_t oldfs;
 	siginfo_t info;
 	int ret;
@@ -128,7 +129,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * u32 __atomic_user_cmpxchg32(u32 *ptr, u32 test, u32 new)
 		 */
 	case TBR_TT_ATOMIC_CMPXCHG32:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		x = __frame->gr9;
 		y = __frame->gr10;
 
@@ -158,7 +159,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * u32 __atomic_kernel_xchg32(void *v, u32 new)
 		 */
 	case TBR_TT_ATOMIC_XCHG32:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		y = __frame->gr9;
 
 		for (;;) {
@@ -181,7 +182,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * ulong __atomic_kernel_XOR_return(ulong i, ulong *v)
 		 */
 	case TBR_TT_ATOMIC_XOR:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		x = __frame->gr9;
 
 		for (;;) {
@@ -205,7 +206,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * ulong __atomic_kernel_OR_return(ulong i, ulong *v)
 		 */
 	case TBR_TT_ATOMIC_OR:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		x = __frame->gr9;
 
 		for (;;) {
@@ -229,7 +230,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * ulong __atomic_kernel_AND_return(ulong i, ulong *v)
 		 */
 	case TBR_TT_ATOMIC_AND:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		x = __frame->gr9;
 
 		for (;;) {
@@ -253,7 +254,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * int __atomic_user_sub_return(atomic_t *v, int i)
 		 */
 	case TBR_TT_ATOMIC_SUB:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		x = __frame->gr9;
 
 		for (;;) {
@@ -277,7 +278,7 @@ asmlinkage void atomic_operation(unsigned long esfr1, unsigned long epcr0,
 		 * int __atomic_user_add_return(atomic_t *v, int i)
 		 */
 	case TBR_TT_ATOMIC_ADD:
-		p = (unsigned long *) __frame->gr8;
+		p = (unsigned long __user *) __frame->gr8;
 		x = __frame->gr9;
 
 		for (;;) {
@@ -322,7 +323,7 @@ error:
 	info.si_signo	= SIGSEGV;
 	info.si_code	= SEGV_ACCERR;
 	info.si_errno	= 0;
-	info.si_addr	= (void *) __frame->pc;
+	info.si_addr	= (void __user *) __frame->pc;
 
 	force_sig_info(info.si_signo, &info, current);
 }
@@ -343,7 +344,7 @@ asmlinkage void media_exception(unsigned long msr0, unsigned long msr1)
 	info.si_signo	= SIGFPE;
 	info.si_code	= FPE_MDAOVF;
 	info.si_errno	= 0;
-	info.si_addr	= (void *) __frame->pc;
+	info.si_addr	= (void __user *) __frame->pc;
 
 	force_sig_info(info.si_signo, &info, current);
 } /* end media_exception() */
@@ -383,7 +384,7 @@ asmlinkage void memory_access_exception(unsigned long esr0,
 	info.si_addr	= NULL;
 
 	if ((esr0 & (ESRx_VALID | ESR0_EAV)) == (ESRx_VALID | ESR0_EAV))
-		info.si_addr = (void *) ear0;
+		info.si_addr = (void __user *) ear0;
 
 	force_sig_info(info.si_signo, &info, current);
 
@@ -412,7 +413,7 @@ asmlinkage void data_access_error(unsigned long esfr1, unsigned long esr15, unsi
 	info.si_signo	= SIGSEGV;
 	info.si_code	= SEGV_ACCERR;
 	info.si_errno	= 0;
-	info.si_addr	= (void *)
+	info.si_addr	= (void __user *)
 		(((esr15 & (ESRx_VALID|ESR15_EAV)) == (ESRx_VALID|ESR15_EAV)) ? ear15 : 0);
 
 	force_sig_info(info.si_signo, &info, current);
@@ -446,7 +447,7 @@ asmlinkage void division_exception(unsigned long esfr1, unsigned long esr0, unsi
 	info.si_signo	= SIGFPE;
 	info.si_code	= FPE_INTDIV;
 	info.si_errno	= 0;
-	info.si_addr	= (void *) __frame->pc;
+	info.si_addr	= (void __user *) __frame->pc;
 
 	force_sig_info(info.si_signo, &info, current);
 } /* end division_exception() */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index cd13e138bd03..3aa6c821449a 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -19,6 +19,7 @@ config IA64
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_KVM
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
@@ -589,6 +590,8 @@ config MSPEC
 
 source "fs/Kconfig"
 
+source "arch/ia64/kvm/Kconfig"
+
 source "lib/Kconfig"
 
 #
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f1645c4f7039..ec4cca477f49 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) 	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1)	+= arch/ia64/dig/
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
+core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
new file mode 100644
index 000000000000..7914e4828504
--- /dev/null
+++ b/arch/ia64/kvm/Kconfig
@@ -0,0 +1,49 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+	bool
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	depends on HAVE_KVM || IA64
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM && EXPERIMENTAL
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	---help---
+	  Support hosting fully virtualized guest machines using hardware
+	  virtualization extensions.  You will need a fairly recent
+	  processor equipped with virtualization extensions. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_INTEL
+	tristate "KVM for Intel Itanium 2 processors support"
+	depends on KVM && m
+	---help---
+	  Provides support for KVM on Itanium 2 processors equipped with the VT
+	  extensions.
+
+config KVM_TRACE
+       bool
+
+endif # VIRTUALIZATION
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
new file mode 100644
index 000000000000..52353397a1a4
--- /dev/null
+++ b/arch/ia64/kvm/Makefile
@@ -0,0 +1,58 @@
+#This Make file is to generate asm-offsets.h and build source.
+#
+
+#Generate asm-offsets.h for vmm module build
+offsets-file := asm-offsets.h
+
+always  := $(offsets-file)
+targets := $(offsets-file)
+targets += arch/ia64/kvm/asm-offsets.s
+clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S)
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-y
+	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
+endef
+
+quiet_cmd_offsets = GEN     $@
+define cmd_offsets
+	(set -e; \
+	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
+	 echo "#define __ASM_KVM_OFFSETS_H__"; \
+	 echo "/*"; \
+	 echo " * DO NOT MODIFY."; \
+	 echo " *"; \
+	 echo " * This file was generated by Makefile"; \
+	 echo " *"; \
+	 echo " */"; \
+	 echo ""; \
+	 sed -ne $(sed-y) $<; \
+	 echo ""; \
+	 echo "#endif" ) > $@
+endef
+# We use internal rules to avoid the "is up to date" message from make
+arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c
+	$(call if_changed_dep,cc_s_c)
+
+$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
+	$(call cmd,offsets)
+
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
+EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+
+kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
+obj-$(CONFIG_KVM) += kvm.o
+
+FORCE : $(obj)/$(offsets-file)
+EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
+	vtlb.o process.o
+#Add link memcpy and memset to avoid possible structure assignment error
+kvm-intel-objs += ../lib/memset.o ../lib/memcpy.o
+obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
new file mode 100644
index 000000000000..4e3dc13a619c
--- /dev/null
+++ b/arch/ia64/kvm/asm-offsets.c
@@ -0,0 +1,251 @@
+/*
+ * asm-offsets.c Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ *
+ * Anthony Xu    <anthony.xu@intel.com>
+ * Xiantao Zhang <xiantao.zhang@intel.com>
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/autoconf.h>
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+#define task_struct kvm_vcpu
+
+#define DEFINE(sym, val) \
+	asm volatile("\n->" #sym " (%0) " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : :)
+
+#define OFFSET(_sym, _str, _mem) \
+    DEFINE(_sym, offsetof(_str, _mem));
+
+void foo(void)
+{
+	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
+	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
+
+	BLANK();
+
+	DEFINE(VMM_VCPU_META_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu,
+				arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_VRR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vrr[0]));
+	DEFINE(VMM_VPD_IRR0_OFFSET,
+			offsetof(struct vpd, irr[0]));
+	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_check));
+	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VPD_VHPI_OFFSET,
+			offsetof(struct vpd, vhpi));
+	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vsa_base));
+	DEFINE(VMM_VCPU_VPD_OFFSET,
+			offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VCPU_IRQ_CHECK,
+			offsetof(struct kvm_vcpu, arch.irq_check));
+	DEFINE(VMM_VCPU_TIMER_PENDING,
+			offsetof(struct kvm_vcpu, arch.timer_pending));
+	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
+			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
+			offsetof(struct kvm_vcpu, arch.itc_offset));
+	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
+			offsetof(struct kvm_vcpu, arch.last_itc));
+	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
+			offsetof(struct kvm_vcpu, arch.saved_gp));
+
+	BLANK();
+
+	DEFINE(VMM_PT_REGS_B6_OFFSET,
+				offsetof(struct kvm_pt_regs, b6));
+	DEFINE(VMM_PT_REGS_B7_OFFSET,
+				offsetof(struct kvm_pt_regs, b7));
+	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_csd));
+	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ssd));
+	DEFINE(VMM_PT_REGS_R8_OFFSET,
+				offsetof(struct kvm_pt_regs, r8));
+	DEFINE(VMM_PT_REGS_R9_OFFSET,
+				offsetof(struct kvm_pt_regs, r9));
+	DEFINE(VMM_PT_REGS_R10_OFFSET,
+				offsetof(struct kvm_pt_regs, r10));
+	DEFINE(VMM_PT_REGS_R11_OFFSET,
+				offsetof(struct kvm_pt_regs, r11));
+	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ipsr));
+	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_iip));
+	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
+				offsetof(struct kvm_pt_regs, cr_ifs));
+	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_unat));
+	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_pfs));
+	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rsc));
+	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_rnat));
+
+	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_bspstore));
+	DEFINE(VMM_PT_REGS_PR_OFFSET,
+				offsetof(struct kvm_pt_regs, pr));
+	DEFINE(VMM_PT_REGS_B0_OFFSET,
+				offsetof(struct kvm_pt_regs, b0));
+	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
+				offsetof(struct kvm_pt_regs, loadrs));
+	DEFINE(VMM_PT_REGS_R1_OFFSET,
+				offsetof(struct kvm_pt_regs, r1));
+	DEFINE(VMM_PT_REGS_R12_OFFSET,
+				offsetof(struct kvm_pt_regs, r12));
+	DEFINE(VMM_PT_REGS_R13_OFFSET,
+				offsetof(struct kvm_pt_regs, r13));
+	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_fpsr));
+	DEFINE(VMM_PT_REGS_R15_OFFSET,
+				offsetof(struct kvm_pt_regs, r15));
+	DEFINE(VMM_PT_REGS_R14_OFFSET,
+				offsetof(struct kvm_pt_regs, r14));
+	DEFINE(VMM_PT_REGS_R2_OFFSET,
+				offsetof(struct kvm_pt_regs, r2));
+	DEFINE(VMM_PT_REGS_R3_OFFSET,
+				offsetof(struct kvm_pt_regs, r3));
+	DEFINE(VMM_PT_REGS_R16_OFFSET,
+				offsetof(struct kvm_pt_regs, r16));
+	DEFINE(VMM_PT_REGS_R17_OFFSET,
+				offsetof(struct kvm_pt_regs, r17));
+	DEFINE(VMM_PT_REGS_R18_OFFSET,
+				offsetof(struct kvm_pt_regs, r18));
+	DEFINE(VMM_PT_REGS_R19_OFFSET,
+				offsetof(struct kvm_pt_regs, r19));
+	DEFINE(VMM_PT_REGS_R20_OFFSET,
+				offsetof(struct kvm_pt_regs, r20));
+	DEFINE(VMM_PT_REGS_R21_OFFSET,
+				offsetof(struct kvm_pt_regs, r21));
+	DEFINE(VMM_PT_REGS_R22_OFFSET,
+				offsetof(struct kvm_pt_regs, r22));
+	DEFINE(VMM_PT_REGS_R23_OFFSET,
+				offsetof(struct kvm_pt_regs, r23));
+	DEFINE(VMM_PT_REGS_R24_OFFSET,
+				offsetof(struct kvm_pt_regs, r24));
+	DEFINE(VMM_PT_REGS_R25_OFFSET,
+				offsetof(struct kvm_pt_regs, r25));
+	DEFINE(VMM_PT_REGS_R26_OFFSET,
+				offsetof(struct kvm_pt_regs, r26));
+	DEFINE(VMM_PT_REGS_R27_OFFSET,
+				offsetof(struct kvm_pt_regs, r27));
+	DEFINE(VMM_PT_REGS_R28_OFFSET,
+				offsetof(struct kvm_pt_regs, r28));
+	DEFINE(VMM_PT_REGS_R29_OFFSET,
+				offsetof(struct kvm_pt_regs, r29));
+	DEFINE(VMM_PT_REGS_R30_OFFSET,
+				offsetof(struct kvm_pt_regs, r30));
+	DEFINE(VMM_PT_REGS_R31_OFFSET,
+				offsetof(struct kvm_pt_regs, r31));
+	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
+				offsetof(struct kvm_pt_regs, ar_ccv));
+	DEFINE(VMM_PT_REGS_F6_OFFSET,
+				offsetof(struct kvm_pt_regs, f6));
+	DEFINE(VMM_PT_REGS_F7_OFFSET,
+				offsetof(struct kvm_pt_regs, f7));
+	DEFINE(VMM_PT_REGS_F8_OFFSET,
+				offsetof(struct kvm_pt_regs, f8));
+	DEFINE(VMM_PT_REGS_F9_OFFSET,
+				offsetof(struct kvm_pt_regs, f9));
+	DEFINE(VMM_PT_REGS_F10_OFFSET,
+				offsetof(struct kvm_pt_regs, f10));
+	DEFINE(VMM_PT_REGS_F11_OFFSET,
+				offsetof(struct kvm_pt_regs, f11));
+	DEFINE(VMM_PT_REGS_R4_OFFSET,
+				offsetof(struct kvm_pt_regs, r4));
+	DEFINE(VMM_PT_REGS_R5_OFFSET,
+				offsetof(struct kvm_pt_regs, r5));
+	DEFINE(VMM_PT_REGS_R6_OFFSET,
+				offsetof(struct kvm_pt_regs, r6));
+	DEFINE(VMM_PT_REGS_R7_OFFSET,
+				offsetof(struct kvm_pt_regs, r7));
+	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
+				offsetof(struct kvm_pt_regs, eml_unat));
+	DEFINE(VMM_VCPU_IIPA_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_iipa));
+	DEFINE(VMM_VCPU_OPCODE_OFFSET,
+				offsetof(struct kvm_vcpu, arch.opcode));
+	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
+	DEFINE(VMM_VCPU_ISR_OFFSET,
+				offsetof(struct kvm_vcpu, arch.cr_isr));
+	DEFINE(VMM_PT_REGS_R16_SLOT,
+				(((offsetof(struct kvm_pt_regs, r16)
+				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
+	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
+				offsetof(struct kvm_vcpu, arch.mode_flags));
+	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
+	BLANK();
+
+	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
+	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
+	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
+			offsetof(struct kvm_vcpu, arch.insvc[0]));
+	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
+	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
+
+	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
+	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
+	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
+	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
+	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
+	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
+	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
+	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
+	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
+	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
+	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
+	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
+	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
+	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
+	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
+	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
+	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
+	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
+	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
+	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
+	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
+	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
+	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
+	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
+	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
+	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
+	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
+	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
+	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
+	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
+	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
+	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
+	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
+	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
+	BLANK();
+}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
new file mode 100644
index 000000000000..6df073240135
--- /dev/null
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -0,0 +1,1806 @@
+
+/*
+ * kvm_ia64.c: Basic KVM suppport On Itanium series processors
+ *
+ *
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/percpu.h>
+#include <linux/gfp.h>
+#include <linux/fs.h>
+#include <linux/smp.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/bitops.h>
+#include <linux/hrtimer.h>
+#include <linux/uaccess.h>
+
+#include <asm/pgtable.h>
+#include <asm/gcc_intrin.h>
+#include <asm/pal.h>
+#include <asm/cacheflush.h>
+#include <asm/div64.h>
+#include <asm/tlb.h>
+
+#include "misc.h"
+#include "vti.h"
+#include "iodev.h"
+#include "ioapic.h"
+#include "lapic.h"
+
+static unsigned long kvm_vmm_base;
+static unsigned long kvm_vsa_base;
+static unsigned long kvm_vm_buffer;
+static unsigned long kvm_vm_buffer_size;
+unsigned long kvm_vmm_gp;
+
+static long vp_env_info;
+
+static struct kvm_vmm_info *kvm_vmm_info;
+
+static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ NULL }
+};
+
+
+struct fdesc{
+    unsigned long ip;
+    unsigned long gp;
+};
+
+static void kvm_flush_icache(unsigned long start, unsigned long len)
+{
+	int l;
+
+	for (l = 0; l < (len + 32); l += 32)
+		ia64_fc(start + l);
+
+	ia64_sync_i();
+	ia64_srlz_i();
+}
+
+static void kvm_flush_tlb_all(void)
+{
+	unsigned long i, j, count0, count1, stride0, stride1, addr;
+	long flags;
+
+	addr    = local_cpu_data->ptce_base;
+	count0  = local_cpu_data->ptce_count[0];
+	count1  = local_cpu_data->ptce_count[1];
+	stride0 = local_cpu_data->ptce_stride[0];
+	stride1 = local_cpu_data->ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();			/* srlz.i implies srlz.d */
+}
+
+long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
+			(u64)opt_handler);
+
+	return iprv.status;
+}
+
+static  DEFINE_SPINLOCK(vp_lock);
+
+void kvm_arch_hardware_enable(void *garbage)
+{
+	long  status;
+	long  tmp_base;
+	unsigned long pte;
+	unsigned long saved_psr;
+	int slot;
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+				PAGE_KERNEL));
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (slot < 0)
+		return;
+	local_irq_restore(saved_psr);
+
+	spin_lock(&vp_lock);
+	status = ia64_pal_vp_init_env(kvm_vsa_base ?
+				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
+			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
+	if (status != 0) {
+		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
+		return ;
+	}
+
+	if (!kvm_vsa_base) {
+		kvm_vsa_base = tmp_base;
+		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
+	}
+	spin_unlock(&vp_lock);
+	ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+
+	long status;
+	int slot;
+	unsigned long pte;
+	unsigned long saved_psr;
+	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
+
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
+				PAGE_KERNEL));
+
+	local_irq_save(saved_psr);
+	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (slot < 0)
+		return;
+	local_irq_restore(saved_psr);
+
+	status = ia64_pal_vp_exit_env(host_iva);
+	if (status)
+		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
+				status);
+	ia64_ptr_entry(0x3, slot);
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	*(int *)rtn = 0;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_IRQCHIP:
+	case KVM_CAP_USER_MEMORY:
+
+		r = 1;
+		break;
+	default:
+		r = 0;
+	}
+	return r;
+
+}
+
+static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu,
+					gpa_t addr)
+{
+	struct kvm_io_device *dev;
+
+	dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr);
+
+	return dev;
+}
+
+static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+	kvm_run->hw.hardware_exit_reason = 1;
+	return 0;
+}
+
+static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct kvm_mmio_req *p;
+	struct kvm_io_device *mmio_dev;
+
+	p = kvm_get_vcpu_ioreq(vcpu);
+
+	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
+		goto mmio;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr;
+	vcpu->mmio_size = kvm_run->mmio.len = p->size;
+	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
+
+	if (vcpu->mmio_is_write)
+		memcpy(vcpu->mmio_data, &p->data, p->size);
+	memcpy(kvm_run->mmio.data, &p->data, p->size);
+	kvm_run->exit_reason = KVM_EXIT_MMIO;
+	return 0;
+mmio:
+	mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr);
+	if (mmio_dev) {
+		if (!p->dir)
+			kvm_iodevice_write(mmio_dev, p->addr, p->size,
+						&p->data);
+		else
+			kvm_iodevice_read(mmio_dev, p->addr, p->size,
+						&p->data);
+
+	} else
+		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
+	p->state = STATE_IORESP_READY;
+
+	return 1;
+}
+
+static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL)
+		return kvm_pal_emul(vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 2;
+		return 0;
+	}
+}
+
+static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		kvm_sal_emul(vcpu);
+		return 1;
+	} else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = 3;
+		return 0;
+	}
+
+}
+
+/*
+ *  offset: address offset to IPI space.
+ *  value:  deliver value.
+ */
+static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
+				uint64_t vector)
+{
+	switch (dm) {
+	case SAPIC_FIXED:
+		kvm_apic_set_irq(vcpu, vector, 0);
+		break;
+	case SAPIC_NMI:
+		kvm_apic_set_irq(vcpu, 2, 0);
+		break;
+	case SAPIC_EXTINT:
+		kvm_apic_set_irq(vcpu, 0, 0);
+		break;
+	case SAPIC_INIT:
+	case SAPIC_PMI:
+	default:
+		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
+		break;
+	}
+}
+
+static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
+			unsigned long eid)
+{
+	union ia64_lid lid;
+	int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		if (kvm->vcpus[i]) {
+			lid.val = VCPU_LID(kvm->vcpus[i]);
+			if (lid.id == id && lid.eid == eid)
+				return kvm->vcpus[i];
+		}
+	}
+
+	return NULL;
+}
+
+static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm_vcpu *target_vcpu;
+	struct kvm_pt_regs *regs;
+	union ia64_ipi_a addr = p->u.ipi_data.addr;
+	union ia64_ipi_d data = p->u.ipi_data.data;
+
+	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
+	if (!target_vcpu)
+		return handle_vm_error(vcpu, kvm_run);
+
+	if (!target_vcpu->arch.launched) {
+		regs = vcpu_regs(target_vcpu);
+
+		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
+		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
+
+		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		if (waitqueue_active(&target_vcpu->wq))
+			wake_up_interruptible(&target_vcpu->wq);
+	} else {
+		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
+		if (target_vcpu != vcpu)
+			kvm_vcpu_kick(target_vcpu);
+	}
+
+	return 1;
+}
+
+struct call_data {
+	struct kvm_ptc_g ptc_g_data;
+	struct kvm_vcpu *vcpu;
+};
+
+static void vcpu_global_purge(void *info)
+{
+	struct call_data *p = (struct call_data *)info;
+	struct kvm_vcpu *vcpu = p->vcpu;
+
+	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
+		return;
+
+	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
+	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
+		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
+							p->ptc_g_data;
+	} else {
+		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
+		vcpu->arch.ptc_g_count = 0;
+		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+	}
+}
+
+static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
+	struct kvm *kvm = vcpu->kvm;
+	struct call_data call_data;
+	int i;
+	call_data.ptc_g_data = p->u.ptc_g_data;
+
+	for (i = 0; i < KVM_MAX_VCPUS; i++) {
+		if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state ==
+						KVM_MP_STATE_UNINITIALIZED ||
+					vcpu == kvm->vcpus[i])
+			continue;
+
+		if (waitqueue_active(&kvm->vcpus[i]->wq))
+			wake_up_interruptible(&kvm->vcpus[i]->wq);
+
+		if (kvm->vcpus[i]->cpu != -1) {
+			call_data.vcpu = kvm->vcpus[i];
+			smp_call_function_single(kvm->vcpus[i]->cpu,
+					vcpu_global_purge, &call_data, 0, 1);
+		} else
+			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
+
+	}
+	return 1;
+}
+
+static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+
+	ktime_t kt;
+	long itc_diff;
+	unsigned long vcpu_now_itc;
+
+	unsigned long expires;
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset;
+
+	if (time_after(vcpu_now_itc, vpd->itm)) {
+		vcpu->arch.timer_check = 1;
+		return 1;
+	}
+	itc_diff = vpd->itm - vcpu_now_itc;
+	if (itc_diff < 0)
+		itc_diff = -itc_diff;
+
+	expires = div64_64(itc_diff, cyc_per_usec);
+	kt = ktime_set(0, 1000 * expires);
+	vcpu->arch.ht_active = 1;
+	hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
+
+	if (irqchip_in_kernel(vcpu->kvm)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		kvm_vcpu_block(vcpu);
+		hrtimer_cancel(p_ht);
+		vcpu->arch.ht_active = 0;
+
+		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
+			return -EINTR;
+		return 1;
+	} else {
+		printk(KERN_ERR"kvm: Unsupported userspace halt!");
+		return 0;
+	}
+}
+
+static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+	return 0;
+}
+
+static int handle_external_interrupt(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run)
+{
+	return 1;
+}
+
+static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
+		struct kvm_run *kvm_run) = {
+	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
+	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
+	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
+	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
+	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
+	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
+	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
+	[EXIT_REASON_IPI]		    = handle_ipi,
+	[EXIT_REASON_PTC_G]		    = handle_global_purge,
+
+};
+
+static const int kvm_vti_max_exit_handlers =
+		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
+
+static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu)
+{
+}
+
+static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_exit_data;
+
+	p_exit_data = kvm_get_exit_data(vcpu);
+	return p_exit_data->exit_reason;
+}
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+	u32 exit_reason = kvm_get_exit_reason(vcpu);
+	vcpu->arch.last_exit = exit_reason;
+
+	if (exit_reason < kvm_vti_max_exit_handlers
+			&& kvm_vti_exit_handlers[exit_reason])
+		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
+	else {
+		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
+		kvm_run->hw.hardware_exit_reason = exit_reason;
+	}
+	return 0;
+}
+
+static inline void vti_set_rr6(unsigned long rr6)
+{
+	ia64_set_rr(RR6, rr6);
+	ia64_srlz_i();
+}
+
+static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+	unsigned long pte;
+	struct kvm *kvm = vcpu->kvm;
+	int r;
+
+	/*Insert a pair of tr to map vmm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vmm_tr_slot = r;
+	/*Insert a pairt of tr to map data of vm*/
+	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
+	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
+					pte, KVM_VM_DATA_SHIFT);
+	if (r < 0)
+		goto out;
+	vcpu->arch.vm_tr_slot = r;
+	r = 0;
+out:
+	return r;
+
+}
+
+static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
+{
+
+	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
+	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
+
+}
+
+static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
+{
+	int cpu = smp_processor_id();
+
+	if (vcpu->arch.last_run_cpu != cpu ||
+			per_cpu(last_vcpu, cpu) != vcpu) {
+		per_cpu(last_vcpu, cpu) = vcpu;
+		vcpu->arch.last_run_cpu = cpu;
+		kvm_flush_tlb_all();
+	}
+
+	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
+	vti_set_rr6(vcpu->arch.vmm_rr);
+	return kvm_insert_vmm_mapping(vcpu);
+}
+static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
+{
+	kvm_purge_vmm_mapping(vcpu);
+	vti_set_rr6(vcpu->arch.host_rr6);
+}
+
+static int  vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	union context *host_ctx, *guest_ctx;
+	int r;
+
+	/*Get host and guest context with guest address space.*/
+	host_ctx = kvm_get_host_context(vcpu);
+	guest_ctx = kvm_get_guest_context(vcpu);
+
+	r = kvm_vcpu_pre_transition(vcpu);
+	if (r < 0)
+		goto out;
+	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
+	kvm_vcpu_post_transition(vcpu);
+	r = 0;
+out:
+	return r;
+}
+
+static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+
+again:
+	preempt_disable();
+
+	kvm_prepare_guest_switch(vcpu);
+	local_irq_disable();
+
+	if (signal_pending(current)) {
+		local_irq_enable();
+		preempt_enable();
+		r = -EINTR;
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		goto out;
+	}
+
+	vcpu->guest_mode = 1;
+	kvm_guest_enter();
+
+	r = vti_vcpu_run(vcpu, kvm_run);
+	if (r < 0) {
+		local_irq_enable();
+		preempt_enable();
+		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+		goto out;
+	}
+
+	vcpu->arch.launched = 1;
+	vcpu->guest_mode = 0;
+	local_irq_enable();
+
+	/*
+	 * We must have an instruction between local_irq_enable() and
+	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
+	 * the interrupt shadow.  The stat.exits increment will do nicely.
+	 * But we need to prevent reordering, hence this barrier():
+	 */
+	barrier();
+
+	kvm_guest_exit();
+
+	preempt_enable();
+
+	r = kvm_handle_exit(kvm_run, vcpu);
+
+	if (r > 0) {
+		if (!need_resched())
+			goto again;
+	}
+
+out:
+	if (r > 0) {
+		kvm_resched(vcpu);
+		goto again;
+	}
+
+	return r;
+}
+
+static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
+
+	if (!vcpu->mmio_is_write)
+		memcpy(&p->data, vcpu->mmio_data, 8);
+	p->state = STATE_IORESP_READY;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int r;
+	sigset_t sigsaved;
+
+	vcpu_load(vcpu);
+
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
+		kvm_vcpu_block(vcpu);
+		vcpu_put(vcpu);
+		return -EAGAIN;
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (vcpu->mmio_needed) {
+		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
+		kvm_set_mmio_data(vcpu);
+		vcpu->mmio_read_completed = 1;
+		vcpu->mmio_needed = 0;
+	}
+	r = __vcpu_run(vcpu, kvm_run);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	vcpu_put(vcpu);
+	return r;
+}
+
+/*
+ * Allocate 16M memory for every vm to hold its specific data.
+ * Its memory map is defined in kvm_host.h.
+ */
+static struct kvm *kvm_alloc_kvm(void)
+{
+
+	struct kvm *kvm;
+	uint64_t  vm_base;
+
+	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
+
+	if (!vm_base)
+		return ERR_PTR(-ENOMEM);
+	printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base);
+
+	/* Zero all pages before use! */
+	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+
+	kvm = (struct kvm *)(vm_base + KVM_VM_OFS);
+	kvm->arch.vm_base = vm_base;
+
+	return kvm;
+}
+
+struct kvm_io_range {
+	unsigned long start;
+	unsigned long size;
+	unsigned long type;
+};
+
+static const struct kvm_io_range io_ranges[] = {
+	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
+	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
+	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
+	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
+	{PIB_START, PIB_SIZE, GPFN_PIB},
+};
+
+static void kvm_build_io_pmt(struct kvm *kvm)
+{
+	unsigned long i, j;
+
+	/* Mark I/O ranges */
+	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
+							i++) {
+		for (j = io_ranges[i].start;
+				j < io_ranges[i].start + io_ranges[i].size;
+				j += PAGE_SIZE)
+			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
+					io_ranges[i].type, 0);
+	}
+
+}
+
+/*Use unused rids to virtualize guest rid.*/
+#define GUEST_PHYSICAL_RR0	0x1739
+#define GUEST_PHYSICAL_RR4	0x2739
+#define VMM_INIT_RR		0x1660
+
+static void kvm_init_vm(struct kvm *kvm)
+{
+	long vm_base;
+
+	BUG_ON(!kvm);
+
+	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
+	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
+	kvm->arch.vmm_init_rr = VMM_INIT_RR;
+
+	vm_base = kvm->arch.vm_base;
+	if (vm_base) {
+		kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS;
+		kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS;
+		kvm->arch.vpd_base  = vm_base + KVM_VPD_OFS;
+	}
+
+	/*
+	 *Fill P2M entries for MMIO/IO ranges
+	 */
+	kvm_build_io_pmt(kvm);
+
+}
+
+struct  kvm *kvm_arch_create_vm(void)
+{
+	struct kvm *kvm = kvm_alloc_kvm();
+
+	if (IS_ERR(kvm))
+		return ERR_PTR(-ENOMEM);
+	kvm_init_vm(kvm);
+
+	return kvm;
+
+}
+
+static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
+					struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
+				sizeof(struct kvm_ioapic_state));
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
+{
+	int r;
+
+	r = 0;
+	switch (chip->chip_id) {
+	case KVM_IRQCHIP_IOAPIC:
+		memcpy(ioapic_irqchip(kvm),
+				&chip->chip.ioapic,
+				sizeof(struct kvm_ioapic_state));
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
+#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	int r;
+
+	vcpu_load(vcpu);
+
+	for (i = 0; i < 16; i++) {
+		vpd->vgr[i] = regs->vpd.vgr[i];
+		vpd->vbgr[i] = regs->vpd.vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		vpd->vcr[i] = regs->vpd.vcr[i];
+	vpd->vhpi = regs->vpd.vhpi;
+	vpd->vnat = regs->vpd.vnat;
+	vpd->vbnat = regs->vpd.vbnat;
+	vpd->vpsr = regs->vpd.vpsr;
+
+	vpd->vpr = regs->vpd.vpr;
+
+	r = -EFAULT;
+	r = copy_from_user(&vcpu->arch.guest, regs->saved_guest,
+						sizeof(union context));
+	if (r)
+		goto out;
+	r = copy_from_user(vcpu + 1, regs->saved_stack +
+			sizeof(struct kvm_vcpu),
+			IA64_STK_OFFSET - sizeof(struct kvm_vcpu));
+	if (r)
+		goto out;
+	vcpu->arch.exit_data =
+		((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data;
+
+	RESTORE_REGS(mp_state);
+	RESTORE_REGS(vmm_rr);
+	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
+	RESTORE_REGS(itr_regions);
+	RESTORE_REGS(dtr_regions);
+	RESTORE_REGS(tc_regions);
+	RESTORE_REGS(irq_check);
+	RESTORE_REGS(itc_check);
+	RESTORE_REGS(timer_check);
+	RESTORE_REGS(timer_pending);
+	RESTORE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		vcpu->arch.vrr[i] = regs->vrr[i];
+		vcpu->arch.ibr[i] = regs->ibr[i];
+		vcpu->arch.dbr[i] = regs->dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		vcpu->arch.insvc[i] = regs->insvc[i];
+	RESTORE_REGS(xtp);
+	RESTORE_REGS(metaphysical_rr0);
+	RESTORE_REGS(metaphysical_rr4);
+	RESTORE_REGS(metaphysical_saved_rr0);
+	RESTORE_REGS(metaphysical_saved_rr4);
+	RESTORE_REGS(fp_psr);
+	RESTORE_REGS(saved_gp);
+
+	vcpu->arch.irq_new_pending = 1;
+	vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC);
+	set_bit(KVM_REQ_RESUME, &vcpu->requests);
+
+	vcpu_put(vcpu);
+	r = 0;
+out:
+	return r;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r = -EINVAL;
+
+	switch (ioctl) {
+	case KVM_SET_MEMORY_REGION: {
+		struct kvm_memory_region kvm_mem;
+		struct kvm_userspace_memory_region kvm_userspace_mem;
+
+		r = -EFAULT;
+		if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem))
+			goto out;
+		kvm_userspace_mem.slot = kvm_mem.slot;
+		kvm_userspace_mem.flags = kvm_mem.flags;
+		kvm_userspace_mem.guest_phys_addr =
+					kvm_mem.guest_phys_addr;
+		kvm_userspace_mem.memory_size = kvm_mem.memory_size;
+		r = kvm_vm_ioctl_set_memory_region(kvm,
+					&kvm_userspace_mem, 0);
+		if (r)
+			goto out;
+		break;
+		}
+	case KVM_CREATE_IRQCHIP:
+		r = -EFAULT;
+		r = kvm_ioapic_init(kvm);
+		if (r)
+			goto out;
+		break;
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level irq_event;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+			goto out;
+		if (irqchip_in_kernel(kvm)) {
+			mutex_lock(&kvm->lock);
+			kvm_ioapic_set_irq(kvm->arch.vioapic,
+						irq_event.irq,
+						irq_event.level);
+			mutex_unlock(&kvm->lock);
+			r = 0;
+		}
+		break;
+		}
+	case KVM_GET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &chip, sizeof chip))
+				goto out;
+		r = 0;
+		break;
+		}
+	case KVM_SET_IRQCHIP: {
+		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
+		struct kvm_irqchip chip;
+
+		r = -EFAULT;
+		if (copy_from_user(&chip, argp, sizeof chip))
+				goto out;
+		r = -ENXIO;
+		if (!irqchip_in_kernel(kvm))
+			goto out;
+		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+		}
+	default:
+		;
+	}
+out:
+	return r;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+		struct kvm_sregs *sregs)
+{
+	return -EINVAL;
+
+}
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+		struct kvm_translation *tr)
+{
+
+	return -EINVAL;
+}
+
+static int kvm_alloc_vmm_area(void)
+{
+	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
+		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
+				get_order(KVM_VMM_SIZE));
+		if (!kvm_vmm_base)
+			return -ENOMEM;
+
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
+
+		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
+				kvm_vmm_base, kvm_vm_buffer);
+	}
+
+	return 0;
+}
+
+static void kvm_free_vmm_area(void)
+{
+	if (kvm_vmm_base) {
+		/*Zero this area before free to avoid bits leak!!*/
+		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
+		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
+		kvm_vmm_base  = 0;
+		kvm_vm_buffer = 0;
+		kvm_vsa_base = 0;
+	}
+}
+
+/*
+ * Make sure that a cpu that is being hot-unplugged does not have any vcpus
+ * cached on it. Leave it as blank for IA64.
+ */
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+static int vti_init_vpd(struct kvm_vcpu *vcpu)
+{
+	int i;
+	union cpuid3_t cpuid3;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (IS_ERR(vpd))
+		return PTR_ERR(vpd);
+
+	/* CPUID init */
+	for (i = 0; i < 5; i++)
+		vpd->vcpuid[i] = ia64_get_cpuid(i);
+
+	/* Limit the CPUID number to 5 */
+	cpuid3.value = vpd->vcpuid[3];
+	cpuid3.number = 4;	/* 5 - 1 */
+	vpd->vcpuid[3] = cpuid3.value;
+
+	/*Set vac and vdc fields*/
+	vpd->vac.a_from_int_cr = 1;
+	vpd->vac.a_to_int_cr = 1;
+	vpd->vac.a_from_psr = 1;
+	vpd->vac.a_from_cpuid = 1;
+	vpd->vac.a_cover = 1;
+	vpd->vac.a_bsw = 1;
+	vpd->vac.a_int = 1;
+	vpd->vdc.d_vmsw = 1;
+
+	/*Set virtual buffer*/
+	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
+
+	return 0;
+}
+
+static int vti_create_vp(struct kvm_vcpu *vcpu)
+{
+	long ret;
+	struct vpd *vpd = vcpu->arch.vpd;
+	unsigned long  vmm_ivt;
+
+	vmm_ivt = kvm_vmm_info->vmm_ivt;
+
+	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
+
+	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
+
+	if (ret) {
+		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void init_ptce_info(struct kvm_vcpu *vcpu)
+{
+	ia64_ptce_info_t ptce = {0};
+
+	ia64_get_ptce(&ptce);
+	vcpu->arch.ptce_base = ptce.base;
+	vcpu->arch.ptce_count[0] = ptce.count[0];
+	vcpu->arch.ptce_count[1] = ptce.count[1];
+	vcpu->arch.ptce_stride[0] = ptce.stride[0];
+	vcpu->arch.ptce_stride[1] = ptce.stride[1];
+}
+
+static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
+{
+	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
+
+	if (hrtimer_cancel(p_ht))
+		hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS);
+}
+
+static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
+{
+	struct kvm_vcpu *vcpu;
+	wait_queue_head_t *q;
+
+	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
+	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
+		goto out;
+
+	q = &vcpu->wq;
+	if (waitqueue_active(q)) {
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		wake_up_interruptible(q);
+	}
+out:
+	vcpu->arch.timer_check = 1;
+	return HRTIMER_NORESTART;
+}
+
+#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu *v;
+	int r;
+	int i;
+	long itc_offset;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	union context *p_ctx = &vcpu->arch.guest;
+	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
+
+	/*Init vcpu context for first run.*/
+	if (IS_ERR(vmm_vcpu))
+		return PTR_ERR(vmm_vcpu);
+
+	if (vcpu->vcpu_id == 0) {
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+		/*Set entry address for first run.*/
+		regs->cr_iip = PALE_RESET_ENTRY;
+
+		/*Initilize itc offset for vcpus*/
+		itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC);
+		for (i = 0; i < MAX_VCPU_NUM; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+			v->arch.itc_offset = itc_offset;
+			v->arch.last_itc = 0;
+		}
+	} else
+		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
+
+	r = -ENOMEM;
+	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
+	if (!vcpu->arch.apic)
+		goto out;
+	vcpu->arch.apic->vcpu = vcpu;
+
+	p_ctx->gr[1] = 0;
+	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET);
+	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
+	p_ctx->psr = 0x1008522000UL;
+	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
+	p_ctx->caller_unat = 0;
+	p_ctx->pr = 0x0;
+	p_ctx->ar[36] = 0x0; /*unat*/
+	p_ctx->ar[19] = 0x0; /*rnat*/
+	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
+				((sizeof(struct kvm_vcpu)+15) & ~15);
+	p_ctx->ar[64] = 0x0; /*pfs*/
+	p_ctx->cr[0] = 0x7e04UL;
+	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
+	p_ctx->cr[8] = 0x3c;
+
+	/*Initilize region register*/
+	p_ctx->rr[0] = 0x30;
+	p_ctx->rr[1] = 0x30;
+	p_ctx->rr[2] = 0x30;
+	p_ctx->rr[3] = 0x30;
+	p_ctx->rr[4] = 0x30;
+	p_ctx->rr[5] = 0x30;
+	p_ctx->rr[7] = 0x30;
+
+	/*Initilize branch register 0*/
+	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
+
+	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
+	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
+	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
+
+	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vcpu->arch.hlt_timer.function = hlt_timer_fn;
+
+	vcpu->arch.last_run_cpu = -1;
+	vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vsa_base = kvm_vsa_base;
+	vcpu->arch.__gp = kvm_vmm_gp;
+	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
+	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id);
+	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id);
+	init_ptce_info(vcpu);
+
+	r = 0;
+out:
+	return r;
+}
+
+static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
+{
+	unsigned long psr;
+	int r;
+
+	local_irq_save(psr);
+	r = kvm_insert_vmm_mapping(vcpu);
+	if (r)
+		goto fail;
+	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
+	if (r)
+		goto fail;
+
+	r = vti_init_vpd(vcpu);
+	if (r) {
+		printk(KERN_DEBUG"kvm: vpd init error!!\n");
+		goto uninit;
+	}
+
+	r = vti_create_vp(vcpu);
+	if (r)
+		goto uninit;
+
+	kvm_purge_vmm_mapping(vcpu);
+	local_irq_restore(psr);
+
+	return 0;
+uninit:
+	kvm_vcpu_uninit(vcpu);
+fail:
+	return r;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+		unsigned int id)
+{
+	struct kvm_vcpu *vcpu;
+	unsigned long vm_base = kvm->arch.vm_base;
+	int r;
+	int cpu;
+
+	r = -ENOMEM;
+	if (!vm_base) {
+		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
+		goto fail;
+	}
+	vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id);
+	vcpu->kvm = kvm;
+
+	cpu = get_cpu();
+	vti_vcpu_load(vcpu, cpu);
+	r = vti_vcpu_setup(vcpu, id);
+	put_cpu();
+
+	if (r) {
+		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
+		goto fail;
+	}
+
+	return vcpu;
+fail:
+	return ERR_PTR(r);
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+		struct kvm_debug_guest *dbg)
+{
+	return -EINVAL;
+}
+
+static void free_kvm(struct kvm *kvm)
+{
+	unsigned long vm_base = kvm->arch.vm_base;
+
+	if (vm_base) {
+		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
+		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
+	}
+
+}
+
+static void kvm_release_vm_pages(struct kvm *kvm)
+{
+	struct kvm_memory_slot *memslot;
+	int i, j;
+	unsigned long base_gfn;
+
+	for (i = 0; i < kvm->nmemslots; i++) {
+		memslot = &kvm->memslots[i];
+		base_gfn = memslot->base_gfn;
+
+		for (j = 0; j < memslot->npages; j++) {
+			if (memslot->rmap[j])
+				put_page((struct page *)memslot->rmap[j]);
+		}
+	}
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	kfree(kvm->arch.vioapic);
+	kvm_release_vm_pages(kvm);
+	kvm_free_physmem(kvm);
+	free_kvm(kvm);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	if (cpu != vcpu->cpu) {
+		vcpu->cpu = cpu;
+		if (vcpu->arch.ht_active)
+			kvm_migrate_hlt_timer(vcpu);
+	}
+}
+
+#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+	int r;
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+	vcpu_load(vcpu);
+
+	for (i = 0; i < 16; i++) {
+		regs->vpd.vgr[i] = vpd->vgr[i];
+		regs->vpd.vbgr[i] = vpd->vbgr[i];
+	}
+	for (i = 0; i < 128; i++)
+		regs->vpd.vcr[i] = vpd->vcr[i];
+	regs->vpd.vhpi = vpd->vhpi;
+	regs->vpd.vnat = vpd->vnat;
+	regs->vpd.vbnat = vpd->vbnat;
+	regs->vpd.vpsr = vpd->vpsr;
+	regs->vpd.vpr = vpd->vpr;
+
+	r = -EFAULT;
+	r = copy_to_user(regs->saved_guest, &vcpu->arch.guest,
+					sizeof(union context));
+	if (r)
+		goto out;
+	r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET);
+	if (r)
+		goto out;
+	SAVE_REGS(mp_state);
+	SAVE_REGS(vmm_rr);
+	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
+	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
+	SAVE_REGS(itr_regions);
+	SAVE_REGS(dtr_regions);
+	SAVE_REGS(tc_regions);
+	SAVE_REGS(irq_check);
+	SAVE_REGS(itc_check);
+	SAVE_REGS(timer_check);
+	SAVE_REGS(timer_pending);
+	SAVE_REGS(last_itc);
+	for (i = 0; i < 8; i++) {
+		regs->vrr[i] = vcpu->arch.vrr[i];
+		regs->ibr[i] = vcpu->arch.ibr[i];
+		regs->dbr[i] = vcpu->arch.dbr[i];
+	}
+	for (i = 0; i < 4; i++)
+		regs->insvc[i] = vcpu->arch.insvc[i];
+	regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC);
+	SAVE_REGS(xtp);
+	SAVE_REGS(metaphysical_rr0);
+	SAVE_REGS(metaphysical_rr4);
+	SAVE_REGS(metaphysical_saved_rr0);
+	SAVE_REGS(metaphysical_saved_rr4);
+	SAVE_REGS(fp_psr);
+	SAVE_REGS(saved_gp);
+	vcpu_put(vcpu);
+	r = 0;
+out:
+	return r;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+
+	hrtimer_cancel(&vcpu->arch.hlt_timer);
+	kfree(vcpu->arch.apic);
+}
+
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+		struct kvm_userspace_memory_region *mem,
+		struct kvm_memory_slot old,
+		int user_alloc)
+{
+	unsigned long i;
+	struct page *page;
+	int npages = mem->memory_size >> PAGE_SHIFT;
+	struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
+	unsigned long base_gfn = memslot->base_gfn;
+
+	for (i = 0; i < npages; i++) {
+		page = gfn_to_page(kvm, base_gfn + i);
+		kvm_set_pmt_entry(kvm, base_gfn + i,
+				page_to_pfn(page) << PAGE_SHIFT,
+				_PAGE_AR_RWX|_PAGE_MA_WB);
+		memslot->rmap[i] = (unsigned long)page;
+	}
+
+	return 0;
+}
+
+
+long kvm_arch_dev_ioctl(struct file *filp,
+		unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_uninit(vcpu);
+}
+
+static int vti_cpu_has_kvm_support(void)
+{
+	long  avail = 1, status = 1, control = 1;
+	long ret;
+
+	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
+	if (ret)
+		goto out;
+
+	if (!(avail & PAL_PROC_VM_BIT))
+		goto out;
+
+	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
+
+	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
+	if (ret)
+		goto out;
+	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
+
+	if (!(vp_env_info & VP_OPCODE)) {
+		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
+				"vm_env_info:0x%lx\n", vp_env_info);
+	}
+
+	return 1;
+out:
+	return 0;
+}
+
+static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
+						struct module *module)
+{
+	unsigned long module_base;
+	unsigned long vmm_size;
+
+	unsigned long vmm_offset, func_offset, fdesc_offset;
+	struct fdesc *p_fdesc;
+
+	BUG_ON(!module);
+
+	if (!kvm_vmm_base) {
+		printk("kvm: kvm area hasn't been initilized yet!!\n");
+		return -EFAULT;
+	}
+
+	/*Calculate new position of relocated vmm module.*/
+	module_base = (unsigned long)module->module_core;
+	vmm_size = module->core_size;
+	if (unlikely(vmm_size > KVM_VMM_SIZE))
+		return -EFAULT;
+
+	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
+	kvm_flush_icache(kvm_vmm_base, vmm_size);
+
+	/*Recalculate kvm_vmm_info based on new VMM*/
+	vmm_offset = vmm_info->vmm_ivt - module_base;
+	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
+	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
+			kvm_vmm_info->vmm_ivt);
+
+	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
+	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
+							fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
+			KVM_VMM_BASE+func_offset);
+
+	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
+	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
+			fdesc_offset);
+	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
+	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
+	p_fdesc->ip = KVM_VMM_BASE + func_offset;
+	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
+
+	kvm_vmm_gp = p_fdesc->gp;
+
+	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
+						kvm_vmm_info->vmm_entry);
+	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
+						KVM_VMM_BASE + func_offset);
+
+	return 0;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	int r;
+	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
+
+	if (!vti_cpu_has_kvm_support()) {
+		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
+		r = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (kvm_vmm_info) {
+		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
+		r = -EEXIST;
+		goto out;
+	}
+
+	r = -ENOMEM;
+	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
+	if (!kvm_vmm_info)
+		goto out;
+
+	if (kvm_alloc_vmm_area())
+		goto out_free0;
+
+	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
+	if (r)
+		goto out_free1;
+
+	return 0;
+
+out_free1:
+	kvm_free_vmm_area();
+out_free0:
+	kfree(kvm_vmm_info);
+out:
+	return r;
+}
+
+void kvm_arch_exit(void)
+{
+	kvm_free_vmm_area();
+	kfree(kvm_vmm_info);
+	kvm_vmm_info = NULL;
+}
+
+static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
+		struct kvm_dirty_log *log)
+{
+	struct kvm_memory_slot *memslot;
+	int r, i;
+	long n, base;
+	unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS
+					+ KVM_MEM_DIRTY_LOG_OFS);
+
+	r = -EINVAL;
+	if (log->slot >= KVM_MEMORY_SLOTS)
+		goto out;
+
+	memslot = &kvm->memslots[log->slot];
+	r = -ENOENT;
+	if (!memslot->dirty_bitmap)
+		goto out;
+
+	n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+	base = memslot->base_gfn / BITS_PER_LONG;
+
+	for (i = 0; i < n/sizeof(long); ++i) {
+		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
+		dirty_bitmap[base + i] = 0;
+	}
+	r = 0;
+out:
+	return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+		struct kvm_dirty_log *log)
+{
+	int r;
+	int n;
+	struct kvm_memory_slot *memslot;
+	int is_dirty = 0;
+
+	spin_lock(&kvm->arch.dirty_log_lock);
+
+	r = kvm_ia64_sync_dirty_log(kvm, log);
+	if (r)
+		goto out;
+
+	r = kvm_get_dirty_log(kvm, log, &is_dirty);
+	if (r)
+		goto out;
+
+	/* If nothing is dirty, don't bother messing with page tables. */
+	if (is_dirty) {
+		kvm_flush_remote_tlbs(kvm);
+		memslot = &kvm->memslots[log->slot];
+		n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
+		memset(memslot->dirty_bitmap, 0, n);
+	}
+	r = 0;
+out:
+	spin_unlock(&kvm->arch.dirty_log_lock);
+	return r;
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+static void vcpu_kick_intr(void *info)
+{
+#ifdef DEBUG
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info;
+	printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu);
+#endif
+}
+
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+	int ipi_pcpu = vcpu->cpu;
+
+	if (waitqueue_active(&vcpu->wq))
+		wake_up_interruptible(&vcpu->wq);
+
+	if (vcpu->guest_mode)
+		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+}
+
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig)
+{
+
+	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+	if (!test_and_set_bit(vec, &vpd->irr[0])) {
+		vcpu->arch.irq_new_pending = 1;
+		 if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
+			kvm_vcpu_kick(vcpu);
+		else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+			if (waitqueue_active(&vcpu->wq))
+				wake_up_interruptible(&vcpu->wq);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+{
+	return apic->vcpu->vcpu_id == dest;
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
+				       unsigned long bitmap)
+{
+	struct kvm_vcpu *lvcpu = kvm->vcpus[0];
+	int i;
+
+	for (i = 1; i < KVM_MAX_VCPUS; i++) {
+		if (!kvm->vcpus[i])
+			continue;
+		if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp)
+			lvcpu = kvm->vcpus[i];
+	}
+
+	return lvcpu;
+}
+
+static int find_highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+
+	return -1;
+}
+
+int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
+
+    if (vpd->irr[0] & (1UL << NMI_VECTOR))
+		return NMI_VECTOR;
+    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+    return find_highest_bits((int *)&vpd->irr[0]);
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	if (kvm_highest_pending_irq(vcpu) != -1)
+		return 1;
+	return 0;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
new file mode 100644
index 000000000000..091f936c4485
--- /dev/null
+++ b/arch/ia64/kvm/kvm_fw.c
@@ -0,0 +1,500 @@
+/*
+ * PAL/SAL call delegation
+ *
+ * Copyright (c) 2004 Li Susie <susie.li@intel.com>
+ * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
+ * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/smp.h>
+
+#include "vti.h"
+#include "misc.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/tlb.h>
+
+/*
+ * Handy macros to make sure that the PAL return values start out
+ * as something meaningful.
+ */
+#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
+	{						\
+		x.status = PAL_STATUS_UNIMPLEMENTED;	\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+	}
+
+#define INIT_PAL_STATUS_SUCCESS(x)			\
+	{						\
+		x.status = PAL_STATUS_SUCCESS;		\
+		x.v0 = 0;				\
+		x.v1 = 0;				\
+		x.v2 = 0;				\
+    }
+
+static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
+		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
+	struct exit_ctl_data *p;
+
+	if (vcpu) {
+		p = &vcpu->arch.exit_data;
+		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+			*gr28 = p->u.pal_data.gr28;
+			*gr29 = p->u.pal_data.gr29;
+			*gr30 = p->u.pal_data.gr30;
+			*gr31 = p->u.pal_data.gr31;
+			return ;
+		}
+	}
+	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
+}
+
+static void set_pal_result(struct kvm_vcpu *vcpu,
+		struct ia64_pal_retval result) {
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
+		p->u.pal_data.ret = result;
+		return ;
+	}
+	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
+}
+
+static void set_sal_result(struct kvm_vcpu *vcpu,
+		struct sal_ret_values result) {
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
+		p->u.sal_data.ret = result;
+		return ;
+	}
+	printk(KERN_WARNING"Failed to set sal result!!\n");
+}
+
+struct cache_flush_args {
+	u64 cache_type;
+	u64 operation;
+	u64 progress;
+	long status;
+};
+
+cpumask_t cpu_cache_coherent_map;
+
+static void remote_pal_cache_flush(void *data)
+{
+	struct cache_flush_args *args = data;
+	long status;
+	u64 progress = args->progress;
+
+	status = ia64_pal_cache_flush(args->cache_type, args->operation,
+					&progress, NULL);
+	if (status != 0)
+	args->status = status;
+}
+
+static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
+{
+	u64 gr28, gr29, gr30, gr31;
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	struct cache_flush_args args = {0, 0, 0, 0};
+	long psr;
+
+	gr28 = gr29 = gr30 = gr31 = 0;
+	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
+
+	if (gr31 != 0)
+		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
+
+	/* Always call Host Pal in int=1 */
+	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
+	args.cache_type = gr29;
+	args.operation = gr30;
+	smp_call_function(remote_pal_cache_flush,
+				(void *)&args, 1, 1);
+	if (args.status != 0)
+		printk(KERN_ERR"pal_cache_flush error!,"
+				"status:0x%lx\n", args.status);
+	/*
+	 * Call Host PAL cache flush
+	 * Clear psr.ic when call PAL_CACHE_FLUSH
+	 */
+	local_irq_save(psr);
+	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
+						&result.v0);
+	local_irq_restore(psr);
+	if (result.status != 0)
+		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
+				"in1:%lx,in2:%lx\n",
+				vcpu, result.status, gr29, gr30);
+
+#if 0
+	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
+		cpus_setall(vcpu->arch.cache_coherent_map);
+		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
+		cpus_setall(cpu_cache_coherent_map);
+		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
+	}
+#endif
+	return result;
+}
+
+struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
+	return result;
+}
+
+static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
+
+	/*
+	 * PAL_FREQ_BASE may not be implemented in some platforms,
+	 * call SAL instead.
+	 */
+	if (result.v0 == 0) {
+		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
+							&result.v0,
+							&result.v1);
+		result.v2 = 0;
+	}
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
+	return result;
+}
+
+static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_SUCCESS(result);
+	return result;
+}
+
+static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
+{
+
+	struct ia64_pal_retval result = {0, 0, 0, 0};
+	long in0, in1, in2, in3;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
+			&result.v2, in2);
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
+{
+
+	pal_cache_config_info_t ci;
+	long status;
+	unsigned long in0, in1, in2, in3, r9, r10;
+
+	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
+	status = ia64_pal_cache_config_info(in1, in2, &ci);
+	r9 = ci.pcci_info_1.pcci1_data;
+	r10 = ci.pcci_info_2.pcci2_data;
+	return ((struct ia64_pal_retval){status, r9, r10, 0});
+}
+
+#define GUEST_IMPL_VA_MSB	59
+#define GUEST_RID_BITS		18
+
+static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
+{
+
+	pal_vm_info_1_u_t vminfo1;
+	pal_vm_info_2_u_t vminfo2;
+	struct ia64_pal_retval result;
+
+	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
+	if (!result.status) {
+		vminfo1.pvi1_val = result.v0;
+		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
+		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
+		result.v0 = vminfo1.pvi1_val;
+		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
+		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
+		result.v1 = vminfo2.pvi2_val;
+	}
+
+	return result;
+}
+
+static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
+{
+	struct ia64_pal_retval result;
+
+	INIT_PAL_STATUS_UNIMPLEMENTED(result);
+
+	return result;
+}
+
+static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
+{
+	u64 index = 0;
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+	if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
+		index = p->u.pal_data.gr28;
+
+	return index;
+}
+
+int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+
+	u64 gr28;
+	struct ia64_pal_retval result;
+	int ret = 1;
+
+	gr28 = kvm_get_pal_call_index(vcpu);
+	/*printk("pal_call index:%lx\n",gr28);*/
+	switch (gr28) {
+	case PAL_CACHE_FLUSH:
+		result = pal_cache_flush(vcpu);
+		break;
+	case PAL_CACHE_SUMMARY:
+		result = pal_cache_summary(vcpu);
+		break;
+	case PAL_HALT_LIGHT:
+	{
+		vcpu->arch.timer_pending = 1;
+		INIT_PAL_STATUS_SUCCESS(result);
+		if (kvm_highest_pending_irq(vcpu) == -1)
+			ret = kvm_emulate_halt(vcpu);
+
+	}
+		break;
+
+	case PAL_FREQ_RATIOS:
+		result = pal_freq_ratios(vcpu);
+		break;
+
+	case PAL_FREQ_BASE:
+		result = pal_freq_base(vcpu);
+		break;
+
+	case PAL_LOGICAL_TO_PHYSICAL :
+		result = pal_logical_to_physica(vcpu);
+		break;
+
+	case PAL_VM_SUMMARY :
+		result = pal_vm_summary(vcpu);
+		break;
+
+	case PAL_VM_INFO :
+		result = pal_vm_info(vcpu);
+		break;
+	case PAL_PLATFORM_ADDR :
+		result = pal_platform_addr(vcpu);
+		break;
+	case PAL_CACHE_INFO:
+		result = pal_cache_info(vcpu);
+		break;
+	case PAL_PTCE_INFO:
+		INIT_PAL_STATUS_SUCCESS(result);
+		result.v1 = (1L << 32) | 1L;
+		break;
+	case PAL_VM_PAGE_SIZE:
+		result.status = ia64_pal_vm_page_size(&result.v0,
+							&result.v1);
+		break;
+	case PAL_RSE_INFO:
+		result.status = ia64_pal_rse_info(&result.v0,
+					(pal_hints_u_t *)&result.v1);
+		break;
+	case PAL_PROC_GET_FEATURES:
+		result = pal_proc_get_features(vcpu);
+		break;
+	case PAL_DEBUG_INFO:
+		result.status = ia64_pal_debug_info(&result.v0,
+							&result.v1);
+		break;
+	case PAL_VERSION:
+		result.status = ia64_pal_version(
+				(pal_version_u_t *)&result.v0,
+				(pal_version_u_t *)&result.v1);
+
+		break;
+	case PAL_FIXED_ADDR:
+		result.status = PAL_STATUS_SUCCESS;
+		result.v0 = vcpu->vcpu_id;
+		break;
+	default:
+		INIT_PAL_STATUS_UNIMPLEMENTED(result);
+		printk(KERN_WARNING"kvm: Unsupported pal call,"
+					" index:0x%lx\n", gr28);
+	}
+	set_pal_result(vcpu, result);
+	return ret;
+}
+
+static struct sal_ret_values sal_emulator(struct kvm *kvm,
+				long index, unsigned long in1,
+				unsigned long in2, unsigned long in3,
+				unsigned long in4, unsigned long in5,
+				unsigned long in6, unsigned long in7)
+{
+	unsigned long r9  = 0;
+	unsigned long r10 = 0;
+	long r11 = 0;
+	long status;
+
+	status = 0;
+	switch (index) {
+	case SAL_FREQ_BASE:
+		status = ia64_sal_freq_base(in1, &r9, &r10);
+		break;
+	case SAL_PCI_CONFIG_READ:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_READ\n");
+		break;
+	case SAL_PCI_CONFIG_WRITE:
+		printk(KERN_WARNING"kvm: Not allowed to call here!"
+			" SAL_PCI_CONFIG_WRITE\n");
+		break;
+	case SAL_SET_VECTORS:
+		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
+			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
+				status = -2;
+			} else {
+				kvm->arch.rdv_sal_data.boot_ip = in2;
+				kvm->arch.rdv_sal_data.boot_gp = in3;
+			}
+			printk("Rendvous called! iip:%lx\n\n", in2);
+		} else
+			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
+							"ignored...\n", in1);
+		break;
+	case SAL_GET_STATE_INFO:
+		/* No more info.  */
+		status = -5;
+		r9 = 0;
+		break;
+	case SAL_GET_STATE_INFO_SIZE:
+		/* Return a dummy size.  */
+		status = 0;
+		r9 = 128;
+		break;
+	case SAL_CLEAR_STATE_INFO:
+		/* Noop.  */
+		break;
+	case SAL_MC_RENDEZ:
+		printk(KERN_WARNING
+			"kvm: called SAL_MC_RENDEZ. ignored...\n");
+		break;
+	case SAL_MC_SET_PARAMS:
+		printk(KERN_WARNING
+			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
+		break;
+	case SAL_CACHE_FLUSH:
+		if (1) {
+			/*Flush using SAL.
+			This method is faster but has a side
+			effect on other vcpu running on
+			this cpu.  */
+			status = ia64_sal_cache_flush(in1);
+		} else {
+			/*Maybe need to implement the method
+			without side effect!*/
+			status = 0;
+		}
+		break;
+	case SAL_CACHE_INIT:
+		printk(KERN_WARNING
+			"kvm: called SAL_CACHE_INIT.  ignored...\n");
+		break;
+	case SAL_UPDATE_PAL:
+		printk(KERN_WARNING
+			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
+		break;
+	default:
+		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
+						" index:%ld\n", index);
+		status = -1;
+		break;
+	}
+	return ((struct sal_ret_values) {status, r9, r10, r11});
+}
+
+static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
+		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
+
+	struct exit_ctl_data *p;
+
+	p = kvm_get_exit_data(vcpu);
+
+	if (p) {
+		if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+			*in0 = p->u.sal_data.in0;
+			*in1 = p->u.sal_data.in1;
+			*in2 = p->u.sal_data.in2;
+			*in3 = p->u.sal_data.in3;
+			*in4 = p->u.sal_data.in4;
+			*in5 = p->u.sal_data.in5;
+			*in6 = p->u.sal_data.in6;
+			*in7 = p->u.sal_data.in7;
+			return ;
+		}
+	}
+	*in0 = 0;
+}
+
+void kvm_sal_emul(struct kvm_vcpu *vcpu)
+{
+
+	struct sal_ret_values result;
+	u64 index, in1, in2, in3, in4, in5, in6, in7;
+
+	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
+			&in3, &in4, &in5, &in6, &in7);
+	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
+					in4, in5, in6, in7);
+	set_sal_result(vcpu, result);
+}
diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
new file mode 100644
index 000000000000..13980d9b8bcf
--- /dev/null
+++ b/arch/ia64/kvm/kvm_minstate.h
@@ -0,0 +1,273 @@
+/*
+ *  kvm_minstate.h: min save macros
+ *  Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/types.h>
+#include <asm/kregs.h>
+#include "asm-offsets.h"
+
+#define KVM_MINSTATE_START_SAVE_MIN	     					\
+	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
+	;;									\
+	mov.m r28 = ar.rnat;                                  			\
+	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
+	;;									\
+	lfetch.fault.excl.nt1 [r22];						\
+	addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1;  /* compute base of memory stack */  \
+	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
+	;;									\
+	mov ar.bspstore = r22;				/* switch to kernel RBS */\
+	;;									\
+	mov r18 = ar.bsp;							\
+	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
+
+
+
+#define KVM_MINSTATE_END_SAVE_MIN						\
+	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
+	;;
+
+
+#define PAL_VSA_SYNC_READ						\
+	/* begin to call pal vps sync_read */				\
+	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
+	adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21;  /* entry point */	\
+	;;								\
+	ld8 r25 = [r25];      /* read vpd base */			\
+	ld8 r20 = [r20];						\
+	;;								\
+	add r20 = PAL_VPS_SYNC_READ,r20;				\
+	;;								\
+{ .mii;									\
+	nop 0x0;							\
+	mov r24 = ip;							\
+	mov b0 = r20;							\
+	;;								\
+};									\
+{ .mmb;									\
+	add r24 = 0x20, r24;						\
+	nop 0x0;							\
+	br.cond.sptk b0;        /*  call the service */			\
+	;;								\
+};
+
+
+
+#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
+
+/*
+ * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
+ * the minimum state necessary that allows us to turn psr.ic back
+ * on.
+ *
+ * Assumed state upon entry:
+ *  psr.ic: off
+ *  r31:	contains saved predicates (pr)
+ *
+ * Upon exit, the state is as follows:
+ *  psr.ic: off
+ *   r2 = points to &pt_regs.r16
+ *   r8 = contents of ar.ccv
+ *   r9 = contents of ar.csd
+ *  r10 = contents of ar.ssd
+ *  r11 = FPSR_DEFAULT
+ *  r12 = kernel sp (kernel virtual address)
+ *  r13 = points to current task_struct (kernel virtual address)
+ *  p15 = TRUE if psr.i is set in cr.ipsr
+ *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
+ *	  preserved
+ *
+ * Note that psr.ic is NOT turned on by this macro.  This is so that
+ * we can pass interruption state as arguments to a handler.
+ */
+
+
+#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
+
+#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
+	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
+	mov r27 = ar.rsc;         /* M */			\
+	mov r20 = r1;         /* A */				\
+	mov r25 = ar.unat;        /* M */			\
+	mov r29 = cr.ipsr;        /* M */			\
+	mov r26 = ar.pfs;         /* I */			\
+	mov r18 = cr.isr;         				\
+	COVER;              /* B;; (or nothing) */		\
+	;;							\
+	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
+	mov r1 = r16;						\
+/*	mov r21=r16;	*/					\
+	/* switch from user to kernel RBS: */			\
+	;;							\
+	invala;             /* M */				\
+	SAVE_IFS;						\
+	;;							\
+	KVM_MINSTATE_START_SAVE_MIN				\
+	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
+	adds r16 = PT(CR_IPSR),r1;				\
+	;;							\
+	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
+	st8 [r16] = r29;      /* save cr.ipsr */		\
+	;;							\
+	lfetch.fault.excl.nt1 [r17];				\
+	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
+	mov r29 = b0						\
+	;;							\
+	adds r16 = PT(R8),r1; /* initialize first base pointer */\
+	adds r17 = PT(R9),r1; /* initialize second base pointer */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r8,16;			\
+.mem.offset 8,0; st8.spill [r17] = r9,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r10,24;			\
+.mem.offset 8,0; st8.spill [r17] = r11,24;			\
+	;;							\
+	mov r9 = cr.iip;         /* M */			\
+	mov r10 = ar.fpsr;        /* M */			\
+	;;							\
+	st8 [r16] = r9,16;    /* save cr.iip */			\
+	st8 [r17] = r30,16;   /* save cr.ifs */			\
+	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
+	;;							\
+	st8 [r16] = r25,16;   /* save ar.unat */		\
+	st8 [r17] = r26,16;    /* save ar.pfs */		\
+	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
+	;;							\
+	st8 [r16] = r27,16;   /* save ar.rsc */			\
+	st8 [r17] = r28,16;   /* save ar.rnat */		\
+	;;          /* avoid RAW on r16 & r17 */		\
+	st8 [r16] = r23,16;   /* save ar.bspstore */		\
+	st8 [r17] = r31,16;   /* save predicates */		\
+	;;							\
+	st8 [r16] = r29,16;   /* save b0 */			\
+	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
+.mem.offset 8,0; st8.spill [r17] = r12,16;			\
+	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r13,16;			\
+.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
+	mov r13 = r21;   /* establish `current' */		\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r15,16;			\
+.mem.offset 8,0; st8.spill [r17] = r14,16;			\
+	;;							\
+.mem.offset 0,0; st8.spill [r16] = r2,16;			\
+.mem.offset 8,0; st8.spill [r17] = r3,16;			\
+	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
+	 ;;							\
+	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
+	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
+	mov r26 = cr.iipa;					\
+	mov r27 = cr.isr;					\
+	;;							\
+	st8 [r16] = r26;					\
+	st8 [r17] = r27;					\
+	;;							\
+	EXTRA;							\
+	mov r8 = ar.ccv;					\
+	mov r9 = ar.csd;					\
+	mov r10 = ar.ssd;					\
+	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
+	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
+	;;							\
+	ld8 r1 = [r17];/* establish kernel global pointer */	\
+	;;							\
+	PAL_VSA_SYNC_READ					\
+	KVM_MINSTATE_END_SAVE_MIN
+
+/*
+ * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
+ *
+ * Assumed state upon entry:
+ *  psr.ic: on
+ *  r2: points to &pt_regs.f6
+ *  r3: points to &pt_regs.f7
+ *  r8: contents of ar.ccv
+ *  r9: contents of ar.csd
+ *  r10:	contents of ar.ssd
+ *  r11:	FPSR_DEFAULT
+ *
+ * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
+ */
+#define KVM_SAVE_REST				\
+.mem.offset 0,0; st8.spill [r2] = r16,16;	\
+.mem.offset 8,0; st8.spill [r3] = r17,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r18,16;	\
+.mem.offset 8,0; st8.spill [r3] = r19,16;	\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r20,16;	\
+.mem.offset 8,0; st8.spill [r3] = r21,16;	\
+	mov r18=b6;			\
+	;;				\
+.mem.offset 0,0; st8.spill [r2] = r22,16;	\
+.mem.offset 8,0; st8.spill [r3] = r23,16;	\
+	mov r19 = b7;				\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r24,16;	\
+.mem.offset 8,0; st8.spill [r3] = r25,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r26,16;	\
+.mem.offset 8,0; st8.spill [r3] = r27,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r28,16;	\
+.mem.offset 8,0; st8.spill [r3] = r29,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r30,16;	\
+.mem.offset 8,0; st8.spill [r3] = r31,32;	\
+	;;					\
+	mov ar.fpsr = r11;			\
+	st8 [r2] = r8,8;			\
+	adds r24 = PT(B6)-PT(F7),r3;		\
+	adds r25 = PT(B7)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r18,16;       /* b6 */	\
+	st8 [r25] = r19,16;       /* b7 */	\
+	adds r2 = PT(R4)-PT(F6),r2;		\
+	adds r3 = PT(R5)-PT(F7),r3;		\
+	;;					\
+	st8 [r24] = r9;	/* ar.csd */		\
+	st8 [r25] = r10;	/* ar.ssd */	\
+	;;					\
+	mov r18 = ar.unat;			\
+	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
+	;;					\
+	st8 [r19] = r18; /* eml_unat */ 	\
+
+
+#define KVM_SAVE_EXTRA				\
+.mem.offset 0,0; st8.spill [r2] = r4,16;	\
+.mem.offset 8,0; st8.spill [r3] = r5,16;	\
+	;;					\
+.mem.offset 0,0; st8.spill [r2] = r6,16;	\
+.mem.offset 8,0; st8.spill [r3] = r7;		\
+	;;					\
+	mov r26 = ar.unat;			\
+	;;					\
+	st8 [r2] = r26;/* eml_unat */ 		\
+
+#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
+#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
+#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )
diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
new file mode 100644
index 000000000000..6d6cbcb14893
--- /dev/null
+++ b/arch/ia64/kvm/lapic.h
@@ -0,0 +1,25 @@
+#ifndef __KVM_IA64_LAPIC_H
+#define __KVM_IA64_LAPIC_H
+
+#include <linux/kvm_host.h>
+
+/*
+ * vlsapic
+ */
+struct kvm_lapic{
+	struct kvm_vcpu *vcpu;
+	uint64_t insvc[4];
+	uint64_t vhpi;
+	uint8_t xtp;
+	uint8_t pal_init_pending;
+	uint8_t pad[2];
+};
+
+int kvm_create_lapic(struct kvm_vcpu *vcpu);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
+
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
+
+#endif
diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
new file mode 100644
index 000000000000..e585c4607344
--- /dev/null
+++ b/arch/ia64/kvm/misc.h
@@ -0,0 +1,93 @@
+#ifndef __KVM_IA64_MISC_H
+#define __KVM_IA64_MISC_H
+
+#include <linux/kvm_host.h>
+/*
+ * misc.h
+ * 	Copyright (C) 2007, Intel Corporation.
+ *  	Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+/*
+ *Return p2m base address at host side!
+ */
+static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
+{
+	return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS);
+}
+
+static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
+		u64 paddr, u64 mem_flags)
+{
+	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
+	unsigned long pte;
+
+	pte = PAGE_ALIGN(paddr) | mem_flags;
+	pmt_base[gfn] = pte;
+}
+
+/*Function for translating host address to guest address*/
+
+static inline void *to_guest(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
+			KVM_VM_DATA_BASE);
+}
+
+/*Function for translating guest address to host address*/
+
+static inline void *to_host(struct kvm *kvm, void *addr)
+{
+	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
+			+ kvm->arch.vm_base);
+}
+
+/* Get host context of the vcpu */
+static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.host;
+	return to_guest(vcpu->kvm, ctx);
+}
+
+/* Get guest context of the vcpu */
+static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
+{
+	union context *ctx = &vcpu->arch.guest;
+	return  to_guest(vcpu->kvm, ctx);
+}
+
+/* kvm get exit data from gvmm! */
+static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.exit_data;
+}
+
+/*kvm get vcpu ioreq for kvm module!*/
+static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p_ctl_data;
+
+	if (vcpu) {
+		p_ctl_data = kvm_get_exit_data(vcpu);
+		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
+			return &p_ctl_data->u.ioreq;
+	}
+
+	return NULL;
+}
+
+#endif
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
new file mode 100644
index 000000000000..351bf70da463
--- /dev/null
+++ b/arch/ia64/kvm/mmio.c
@@ -0,0 +1,341 @@
+/*
+ * mmio.c: MMIO emulation components.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
+ *
+ * Copyright (c) 2007 Intel Corporation  KVM support.
+ * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ * Xiantao Zhang  (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include <linux/kvm_host.h>
+
+#include "vcpu.h"
+
+static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
+{
+	VLSAPIC_XTP(v) = val;
+}
+
+/*
+ * LSAPIC OFFSET
+ */
+#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
+#define PIB_OFST_INTA          0x1E0000
+#define PIB_OFST_XTP           0x1E0008
+
+/*
+ * execute write IPI op.
+ */
+static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
+					uint64_t addr, uint64_t data)
+{
+	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	p->exit_reason = EXIT_REASON_IPI;
+	p->u.ipi_data.addr.val = addr;
+	p->u.ipi_data.data.val = data;
+	vmm_transition(current_vcpu);
+
+	local_irq_restore(psr);
+
+}
+
+void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
+			unsigned long length, unsigned long val)
+{
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		/*panic_domain(NULL, "Undefined write on PIB INTA\n");*/
+		panic_vm(v);
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			vlsapic_write_xtp(v, val);
+		} else {
+			/*panic_domain(NULL,
+			"Undefined write on PIB XTP\n");*/
+			panic_vm(v);
+		}
+		break;
+	default:
+		if (PIB_LOW_HALF(addr)) {
+			/*lower half */
+			if (length != 8)
+				/*panic_domain(NULL,
+				"Can't LHF write with size %ld!\n",
+				length);*/
+				panic_vm(v);
+			else
+				vlsapic_write_ipi(v, addr, val);
+		} else {   /*	upper half
+				printk("IPI-UHF write %lx\n",addr);*/
+			panic_vm(v);
+		}
+		break;
+	}
+}
+
+unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
+		unsigned long length)
+{
+	uint64_t result = 0;
+
+	addr &= (PIB_SIZE - 1);
+
+	switch (addr) {
+	case PIB_OFST_INTA:
+		if (length == 1) /* 1 byte load */
+			; /* There is no i8259, there is no INTA access*/
+		else
+			/*panic_domain(NULL,"Undefined read on PIB INTA\n"); */
+			panic_vm(v);
+
+		break;
+	case PIB_OFST_XTP:
+		if (length == 1) {
+			result = VLSAPIC_XTP(v);
+			/* printk("read xtp %lx\n", result); */
+		} else {
+			/*panic_domain(NULL,
+			"Undefined read on PIB XTP\n");*/
+			panic_vm(v);
+		}
+		break;
+	default:
+		panic_vm(v);
+		break;
+	}
+	return result;
+}
+
+static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
+					u16 s, int ma, int dir)
+{
+	unsigned long iot;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
+
+	local_irq_save(psr);
+
+	/*Intercept the acces for PIB range*/
+	if (iot == GPFN_PIB) {
+		if (!dir)
+			lsapic_write(vcpu, src_pa, s, *dest);
+		else
+			*dest = lsapic_read(vcpu, src_pa, s);
+		goto out;
+	}
+	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
+	p->u.ioreq.addr = src_pa;
+	p->u.ioreq.size = s;
+	p->u.ioreq.dir = dir;
+	if (dir == IOREQ_WRITE)
+		p->u.ioreq.data = *dest;
+	p->u.ioreq.state = STATE_IOREQ_READY;
+	vmm_transition(vcpu);
+
+	if (p->u.ioreq.state == STATE_IORESP_READY) {
+		if (dir == IOREQ_READ)
+			*dest = p->u.ioreq.data;
+	} else
+		panic_vm(vcpu);
+out:
+	local_irq_restore(psr);
+	return ;
+}
+
+/*
+   dir 1: read 0:write
+   inst_type 0:integer 1:floating point
+ */
+#define SL_INTEGER	0	/* store/load interger*/
+#define SL_FLOATING	1     	/* store/load floating*/
+
+void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
+{
+	struct kvm_pt_regs *regs;
+	IA64_BUNDLE bundle;
+	int slot, dir = 0;
+	int inst_type = -1;
+	u16 size = 0;
+	u64 data, slot1a, slot1b, temp, update_reg;
+	s32 imm;
+	INST64 inst;
+
+	regs = vcpu_regs(vcpu);
+
+	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
+		/* if fetch code fail, return and try again */
+		return;
+	}
+	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
+	if (!slot)
+		inst.inst = bundle.slot0;
+	else if (slot == 1) {
+		slot1a = bundle.slot1a;
+		slot1b = bundle.slot1b;
+		inst.inst = slot1a + (slot1b << 18);
+	} else if (slot == 2)
+		inst.inst = bundle.slot2;
+
+	/* Integer Load/Store */
+	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
+		inst_type = SL_INTEGER;
+		size = (inst.M1.x6 & 0x3);
+		if ((inst.M1.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M4.r2);
+		} else if ((inst.M1.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+		}
+	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
+		/* Integer Load + Reg update */
+		inst_type = SL_INTEGER;
+		dir = IOREQ_READ;
+		size = (inst.M2.x6 & 0x3);
+		temp = vcpu_get_gr(vcpu, inst.M2.r3);
+		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
+		temp += update_reg;
+		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
+	} else if (inst.M3.major == 5) {
+		/*Integer Load/Store + Imm update*/
+		inst_type = SL_INTEGER;
+		size = (inst.M3.x6&0x3);
+		if ((inst.M5.x6 >> 2) > 0xb) {
+			/*write*/
+			dir = IOREQ_WRITE;
+			data = vcpu_get_gr(vcpu, inst.M5.r2);
+			temp = vcpu_get_gr(vcpu, inst.M5.r3);
+			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
+				(inst.M5.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
+
+		} else if ((inst.M3.x6 >> 2) < 0xb) {
+			/*read*/
+			dir = IOREQ_READ;
+			temp = vcpu_get_gr(vcpu, inst.M3.r3);
+			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
+				(inst.M3.imm7 << 23);
+			temp += imm >> 23;
+			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
+
+		}
+	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
+				&& inst.M9.m == 0 && inst.M9.x == 0) {
+		/* Floating-point spill*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
+		/* Write high word. FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
+		/* Floating-point spill + Imm update */
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+
+		/* Write high word.FIXME: this is a kludge!  */
+		v.u.bits[1] &= 0x3ffff;
+		mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE);
+		data = v.u.bits[0];
+		size = 3;
+	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
+		/* Floating-point stf8 + Imm update */
+		struct ia64_fpreg v;
+		inst_type = SL_FLOATING;
+		dir = IOREQ_WRITE;
+		size = 3;
+		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
+		data = v.u.bits[0]; /* Significand.  */
+		temp = vcpu_get_gr(vcpu, inst.M10.r3);
+		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
+			(inst.M10.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
+	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
+			&& inst.M15.x6 <= 0x2f) {
+		temp = vcpu_get_gr(vcpu, inst.M15.r3);
+		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
+			(inst.M15.imm7 << 23);
+		temp += imm >> 23;
+		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
+
+		vcpu_increment_iip(vcpu);
+		return;
+	} else if (inst.M12.major == 6 && inst.M12.m == 1
+			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
+		/* Floating-point Load Pair + Imm ldfp8 M12*/
+		struct ia64_fpreg v;
+
+		inst_type = SL_FLOATING;
+		dir = IOREQ_READ;
+		size = 8;     /*ldfd*/
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
+		padr += 8;
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		v.u.bits[0] = data;
+		v.u.bits[1] = 0x1003E;
+		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
+		padr += 8;
+		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
+		vcpu_increment_iip(vcpu);
+		return;
+	} else {
+		inst_type = -1;
+		panic_vm(vcpu);
+	}
+
+	size = 1 << size;
+	if (dir == IOREQ_WRITE) {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+	} else {
+		mmio_access(vcpu, padr, &data, size, ma, dir);
+		if (inst_type == SL_INTEGER)
+			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
+		else
+			panic_vm(vcpu);
+
+	}
+	vcpu_increment_iip(vcpu);
+}
diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
new file mode 100644
index 000000000000..e4f15d641b22
--- /dev/null
+++ b/arch/ia64/kvm/optvfault.S
@@ -0,0 +1,918 @@
+/*
+ * arch/ia64/vmx/optvfault.S
+ * optimize virtualization fault handler
+ *
+ * Copyright (C) 2006 Intel Co
+ *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include <asm/processor.h>
+
+#include "vti.h"
+#include "asm-offsets.h"
+
+#define ACCE_MOV_FROM_AR
+#define ACCE_MOV_FROM_RR
+#define ACCE_MOV_TO_RR
+#define ACCE_RSM
+#define ACCE_SSM
+#define ACCE_MOV_TO_PSR
+#define ACCE_THASH
+
+//mov r1=ar3
+GLOBAL_ENTRY(kvm_asm_mov_from_ar)
+#ifndef ACCE_MOV_FROM_AR
+	br.many kvm_virtualization_fault_back
+#endif
+	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
+	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
+	extr.u r17=r25,6,7
+	;;
+	ld8 r18=[r18]
+	mov r19=ar.itc
+	mov r24=b0
+	;;
+	add r19=r19,r18
+	addl r20=@gprel(asm_mov_to_reg),gp
+	;;
+	st8 [r16] = r19
+	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
+	shladd r17=r17,4,r20
+	;;
+	mov b0=r17
+	br.sptk.few b0
+	;;
+END(kvm_asm_mov_from_ar)
+
+
+// mov r1=rr[r3]
+GLOBAL_ENTRY(kvm_asm_mov_from_rr)
+#ifndef ACCE_MOV_FROM_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,6,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r24=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_from_rr_back_1:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
+	shr.u r26=r19,61
+	;;
+	shladd r17=r17,4,r22
+	shladd r27=r26,3,r27
+	;;
+	ld8 r19=[r27]
+	mov b0=r17
+	br.many b0
+END(kvm_asm_mov_from_rr)
+
+
+// mov rr[r3]=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_rr)
+#ifndef ACCE_MOV_TO_RR
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r16=r25,20,7
+	extr.u r17=r25,13,7
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
+	shladd r16=r16,4,r20
+	mov r22=b0
+	;;
+	add r27=VMM_VCPU_VRR0_OFFSET,r21
+	mov b0=r16
+	br.many b0
+	;;
+kvm_asm_mov_to_rr_back_1:
+	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
+	shr.u r23=r19,61
+	shladd r17=r17,4,r20
+	;;
+	//if rr6, go back
+	cmp.eq p6,p0=6,r23
+	mov b0=r22
+	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
+	;;
+	mov r28=r19
+	mov b0=r17
+	br.many b0
+kvm_asm_mov_to_rr_back_2:
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	shladd r27=r23,3,r27
+	;; // vrr.rid<<4 |0xe
+	st8 [r27]=r19
+	mov b0=r30
+	;;
+	extr.u r16=r19,8,26
+	extr.u r18 =r19,2,6
+	mov r17 =0xe
+	;;
+	shladd r16 = r16, 4, r17
+	extr.u r19 =r19,0,8
+	;;
+	shl r16 = r16,8
+	;;
+	add r19 = r19, r16
+	;; //set ve 1
+	dep r19=-1,r19,0,1
+	cmp.lt p6,p0=14,r18
+	;;
+	(p6) mov r18=14
+	;;
+	(p6) dep r19=r18,r19,2,6
+	;;
+	cmp.eq p6,p0=0,r23
+	;;
+	cmp.eq.or p6,p0=4,r23
+	;;
+	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	;;
+	ld4 r16=[r16]
+	cmp.eq p7,p0=r0,r0
+	(p6) shladd r17=r23,1,r17
+	;;
+	(p6) st8 [r17]=r19
+	(p6) tbit.nz p6,p7=r16,0
+	;;
+	(p7) mov rr[r28]=r19
+	mov r24=r22
+	br.many b0
+END(kvm_asm_mov_to_rr)
+
+
+//rsm
+GLOBAL_ENTRY(kvm_asm_rsm)
+#ifndef ACCE_RSM
+	br.many kvm_virtualization_fault_back
+#endif
+	add r16=VMM_VPD_BASE_OFFSET,r21
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	ld8 r16=[r16]
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;
+	add r17=VPD_VPSR_START_OFFSET,r16
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	//r26 is imm24
+	dep r26=r28,r26,23,1
+	;;
+	ld8 r18=[r17]
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
+	ld4 r23=[r22]
+	sub r27=-1,r26
+	mov r24=b0
+	;;
+	mov r20=cr.ipsr
+	or r28=r27,r28
+	and r19=r18,r27
+	;;
+	st8 [r17]=r19
+	and r20=r20,r28
+	/* Comment it out due to short of fp lazy alorgithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	*/
+	;;
+	mov cr.ipsr=r20
+	tbit.nz p6,p0=r23,0
+	;;
+	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
+	(p6) br.dptk kvm_resume_to_guest
+	;;
+	add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	dep r23=-1,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	br.many kvm_resume_to_guest
+END(kvm_asm_rsm)
+
+
+//ssm
+GLOBAL_ENTRY(kvm_asm_ssm)
+#ifndef ACCE_SSM
+	br.many kvm_virtualization_fault_back
+#endif
+	add r16=VMM_VPD_BASE_OFFSET,r21
+	extr.u r26=r25,6,21
+	extr.u r27=r25,31,2
+	;;
+	ld8 r16=[r16]
+	extr.u r28=r25,36,1
+	dep r26=r27,r26,21,2
+	;;  //r26 is imm24
+	add r27=VPD_VPSR_START_OFFSET,r16
+	dep r26=r28,r26,23,1
+	;;  //r19 vpsr
+	ld8 r29=[r27]
+	mov r24=b0
+	;;
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	mov r20=cr.ipsr
+	or r19=r29,r26
+	;;
+	ld4 r23=[r22]
+	st8 [r27]=r19
+	or r20=r20,r26
+	;;
+	mov cr.ipsr=r20
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	and r19=r28,r19
+	tbit.z p6,p0=r23,0
+	;;
+	cmp.ne.or p6,p0=r28,r19
+	(p6) br.dptk kvm_asm_ssm_1
+	;;
+	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_ssm_1:
+	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
+	;;
+	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
+	(p6) br.dptk kvm_resume_to_guest
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest
+END(kvm_asm_ssm)
+
+
+//mov psr.l=r2
+GLOBAL_ENTRY(kvm_asm_mov_to_psr)
+#ifndef ACCE_MOV_TO_PSR
+	br.many kvm_virtualization_fault_back
+#endif
+	add r16=VMM_VPD_BASE_OFFSET,r21
+	extr.u r26=r25,13,7 //r2
+	;;
+	ld8 r16=[r16]
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
+	shladd r26=r26,4,r20
+	mov r24=b0
+	;;
+	add r27=VPD_VPSR_START_OFFSET,r16
+	mov b0=r26
+	br.many b0
+	;;
+kvm_asm_mov_to_psr_back:
+	ld8 r17=[r27]
+	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
+	dep r19=0,r19,32,32
+	;;
+	ld4 r23=[r22]
+	dep r18=0,r17,0,32
+	;;
+	add r30=r18,r19
+	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
+	;;
+	st8 [r27]=r30
+	and r27=r28,r30
+	and r29=r28,r17
+	;;
+	cmp.eq p5,p0=r29,r27
+	cmp.eq p6,p7=r28,r27
+	(p5) br.many kvm_asm_mov_to_psr_1
+	;;
+	//virtual to physical
+	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
+	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
+	(p7) dep r23=-1,r23,0,1
+	;;
+	//physical to virtual
+	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
+	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
+	(p6) dep r23=0,r23,0,1
+	;;
+	ld8 r26=[r26]
+	ld8 r27=[r27]
+	st4 [r22]=r23
+	dep.z r28=4,61,3
+	;;
+	mov rr[r0]=r26
+	;;
+	mov rr[r28]=r27
+	;;
+	srlz.d
+	;;
+kvm_asm_mov_to_psr_1:
+	mov r20=cr.ipsr
+	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
+	;;
+	or r19=r19,r28
+	dep r20=0,r20,0,32
+	;;
+	add r20=r19,r20
+	mov b0=r24
+	;;
+	/* Comment it out due to short of fp lazy algorithm support
+	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
+	;;
+	ld8 r27=[r27]
+	;;
+	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
+	;;
+	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
+	;;
+	*/
+	mov cr.ipsr=r20
+	cmp.ne p6,p0=r0,r0
+	;;
+	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
+	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
+	(p6) br.dpnt.few kvm_resume_to_guest
+	;;
+	add r29=VPD_VTPR_START_OFFSET,r16
+	add r30=VPD_VHPI_START_OFFSET,r16
+	;;
+	ld8 r29=[r29]
+	ld8 r30=[r30]
+	;;
+	extr.u r17=r29,4,4
+	extr.u r18=r29,16,1
+	;;
+	dep r17=r18,r17,4,1
+	;;
+	cmp.gt p6,p0=r30,r17
+	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
+	br.many kvm_resume_to_guest
+END(kvm_asm_mov_to_psr)
+
+
+ENTRY(kvm_asm_dispatch_vexirq)
+//increment iip
+	mov r16=cr.ipsr
+	;;
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	(p7) add r17=1,r17
+	;;
+	(p6) add r18=0x10,r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	(p6) mov cr.iip=r18
+	mov cr.ipsr=r16
+	mov r30 =1
+	br.many kvm_dispatch_vexirq
+END(kvm_asm_dispatch_vexirq)
+
+// thash
+// TODO: add support when pta.vf = 1
+GLOBAL_ENTRY(kvm_asm_thash)
+#ifndef ACCE_THASH
+	br.many kvm_virtualization_fault_back
+#endif
+	extr.u r17=r25,20,7		// get r3 from opcode in r25
+	extr.u r18=r25,6,7		// get r1 from opcode in r25
+	addl r20=@gprel(asm_mov_from_reg),gp
+	;;
+	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
+	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
+	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
+	;;
+	mov r24=b0
+	;;
+	ld8 r16=[r16]		// get VPD addr
+	mov b0=r17
+	br.many b0			// r19 return value
+	;;
+kvm_asm_thash_back1:
+	shr.u r23=r19,61		// get RR number
+	adds r25=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
+	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
+	;;
+	shladd r27=r23,3,r25	// get vcpu->arch.vrr[r23]'s addr
+	ld8 r17=[r16]		// get PTA
+	mov r26=1
+	;;
+	extr.u r29=r17,2,6		// get pta.size
+	ld8 r25=[r27]		// get vcpu->arch.vrr[r23]'s value
+	;;
+	extr.u r25=r25,2,6		// get rr.ps
+	shl r22=r26,r29		// 1UL << pta.size
+	;;
+	shr.u r23=r19,r25		// vaddr >> rr.ps
+	adds r26=3,r29		// pta.size + 3
+	shl r27=r17,3		// pta << 3
+	;;
+	shl r23=r23,3		// (vaddr >> rr.ps) << 3
+	shr.u r27=r27,r26		// (pta << 3) >> (pta.size+3)
+	movl r16=7<<61
+	;;
+	adds r22=-1,r22		// (1UL << pta.size) - 1
+	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
+	and r19=r19,r16		// vaddr & VRN_MASK
+	;;
+	and r22=r22,r23		// vhpt_offset
+	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
+	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
+	;;
+	or r19=r19,r22		// calc pval
+	shladd r17=r18,4,r26
+	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
+	;;
+	mov b0=r17
+	br.many b0
+END(kvm_asm_thash)
+
+#define MOV_TO_REG0	\
+{;			\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	nop.b 0x0;		\
+	;;			\
+};
+
+
+#define MOV_TO_REG(n)	\
+{;			\
+	mov r##n##=r19;	\
+	mov b0=r30;	\
+	br.sptk.many b0;	\
+	;;			\
+};
+
+
+#define MOV_FROM_REG(n)	\
+{;				\
+	mov r19=r##n##;		\
+	mov b0=r30;		\
+	br.sptk.many b0;		\
+	;;				\
+};
+
+
+#define MOV_TO_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	mov r2=r19;				\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r##n##=r2;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r26;				\
+	mov b0=r30;				\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_to_bank0_reg##n##)
+
+
+#define MOV_FROM_BANK0_REG(n)			\
+ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
+{;						\
+	mov r26=r2;				\
+	nop.b 0x0;					\
+	bsw.1;					\
+	;;						\
+};						\
+{;						\
+	mov r2=r##n##;				\
+	nop.b 0x0;					\
+	bsw.0;					\
+	;;						\
+};						\
+{;						\
+	mov r19=r2;				\
+	mov r2=r26;				\
+	mov b0=r30;				\
+};						\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many b0;				\
+	;;						\
+};						\
+END(asm_mov_from_bank0_reg##n##)
+
+
+#define JMP_TO_MOV_TO_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_to_bank0_reg##n##;	\
+	;;						\
+}
+
+
+#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
+{;						\
+	nop.b 0x0;					\
+	nop.b 0x0;					\
+	br.sptk.many asm_mov_from_bank0_reg##n##;	\
+	;;						\
+}
+
+
+MOV_FROM_BANK0_REG(16)
+MOV_FROM_BANK0_REG(17)
+MOV_FROM_BANK0_REG(18)
+MOV_FROM_BANK0_REG(19)
+MOV_FROM_BANK0_REG(20)
+MOV_FROM_BANK0_REG(21)
+MOV_FROM_BANK0_REG(22)
+MOV_FROM_BANK0_REG(23)
+MOV_FROM_BANK0_REG(24)
+MOV_FROM_BANK0_REG(25)
+MOV_FROM_BANK0_REG(26)
+MOV_FROM_BANK0_REG(27)
+MOV_FROM_BANK0_REG(28)
+MOV_FROM_BANK0_REG(29)
+MOV_FROM_BANK0_REG(30)
+MOV_FROM_BANK0_REG(31)
+
+
+// mov from reg table
+ENTRY(asm_mov_from_reg)
+	MOV_FROM_REG(0)
+	MOV_FROM_REG(1)
+	MOV_FROM_REG(2)
+	MOV_FROM_REG(3)
+	MOV_FROM_REG(4)
+	MOV_FROM_REG(5)
+	MOV_FROM_REG(6)
+	MOV_FROM_REG(7)
+	MOV_FROM_REG(8)
+	MOV_FROM_REG(9)
+	MOV_FROM_REG(10)
+	MOV_FROM_REG(11)
+	MOV_FROM_REG(12)
+	MOV_FROM_REG(13)
+	MOV_FROM_REG(14)
+	MOV_FROM_REG(15)
+	JMP_TO_MOV_FROM_BANK0_REG(16)
+	JMP_TO_MOV_FROM_BANK0_REG(17)
+	JMP_TO_MOV_FROM_BANK0_REG(18)
+	JMP_TO_MOV_FROM_BANK0_REG(19)
+	JMP_TO_MOV_FROM_BANK0_REG(20)
+	JMP_TO_MOV_FROM_BANK0_REG(21)
+	JMP_TO_MOV_FROM_BANK0_REG(22)
+	JMP_TO_MOV_FROM_BANK0_REG(23)
+	JMP_TO_MOV_FROM_BANK0_REG(24)
+	JMP_TO_MOV_FROM_BANK0_REG(25)
+	JMP_TO_MOV_FROM_BANK0_REG(26)
+	JMP_TO_MOV_FROM_BANK0_REG(27)
+	JMP_TO_MOV_FROM_BANK0_REG(28)
+	JMP_TO_MOV_FROM_BANK0_REG(29)
+	JMP_TO_MOV_FROM_BANK0_REG(30)
+	JMP_TO_MOV_FROM_BANK0_REG(31)
+	MOV_FROM_REG(32)
+	MOV_FROM_REG(33)
+	MOV_FROM_REG(34)
+	MOV_FROM_REG(35)
+	MOV_FROM_REG(36)
+	MOV_FROM_REG(37)
+	MOV_FROM_REG(38)
+	MOV_FROM_REG(39)
+	MOV_FROM_REG(40)
+	MOV_FROM_REG(41)
+	MOV_FROM_REG(42)
+	MOV_FROM_REG(43)
+	MOV_FROM_REG(44)
+	MOV_FROM_REG(45)
+	MOV_FROM_REG(46)
+	MOV_FROM_REG(47)
+	MOV_FROM_REG(48)
+	MOV_FROM_REG(49)
+	MOV_FROM_REG(50)
+	MOV_FROM_REG(51)
+	MOV_FROM_REG(52)
+	MOV_FROM_REG(53)
+	MOV_FROM_REG(54)
+	MOV_FROM_REG(55)
+	MOV_FROM_REG(56)
+	MOV_FROM_REG(57)
+	MOV_FROM_REG(58)
+	MOV_FROM_REG(59)
+	MOV_FROM_REG(60)
+	MOV_FROM_REG(61)
+	MOV_FROM_REG(62)
+	MOV_FROM_REG(63)
+	MOV_FROM_REG(64)
+	MOV_FROM_REG(65)
+	MOV_FROM_REG(66)
+	MOV_FROM_REG(67)
+	MOV_FROM_REG(68)
+	MOV_FROM_REG(69)
+	MOV_FROM_REG(70)
+	MOV_FROM_REG(71)
+	MOV_FROM_REG(72)
+	MOV_FROM_REG(73)
+	MOV_FROM_REG(74)
+	MOV_FROM_REG(75)
+	MOV_FROM_REG(76)
+	MOV_FROM_REG(77)
+	MOV_FROM_REG(78)
+	MOV_FROM_REG(79)
+	MOV_FROM_REG(80)
+	MOV_FROM_REG(81)
+	MOV_FROM_REG(82)
+	MOV_FROM_REG(83)
+	MOV_FROM_REG(84)
+	MOV_FROM_REG(85)
+	MOV_FROM_REG(86)
+	MOV_FROM_REG(87)
+	MOV_FROM_REG(88)
+	MOV_FROM_REG(89)
+	MOV_FROM_REG(90)
+	MOV_FROM_REG(91)
+	MOV_FROM_REG(92)
+	MOV_FROM_REG(93)
+	MOV_FROM_REG(94)
+	MOV_FROM_REG(95)
+	MOV_FROM_REG(96)
+	MOV_FROM_REG(97)
+	MOV_FROM_REG(98)
+	MOV_FROM_REG(99)
+	MOV_FROM_REG(100)
+	MOV_FROM_REG(101)
+	MOV_FROM_REG(102)
+	MOV_FROM_REG(103)
+	MOV_FROM_REG(104)
+	MOV_FROM_REG(105)
+	MOV_FROM_REG(106)
+	MOV_FROM_REG(107)
+	MOV_FROM_REG(108)
+	MOV_FROM_REG(109)
+	MOV_FROM_REG(110)
+	MOV_FROM_REG(111)
+	MOV_FROM_REG(112)
+	MOV_FROM_REG(113)
+	MOV_FROM_REG(114)
+	MOV_FROM_REG(115)
+	MOV_FROM_REG(116)
+	MOV_FROM_REG(117)
+	MOV_FROM_REG(118)
+	MOV_FROM_REG(119)
+	MOV_FROM_REG(120)
+	MOV_FROM_REG(121)
+	MOV_FROM_REG(122)
+	MOV_FROM_REG(123)
+	MOV_FROM_REG(124)
+	MOV_FROM_REG(125)
+	MOV_FROM_REG(126)
+	MOV_FROM_REG(127)
+END(asm_mov_from_reg)
+
+
+/* must be in bank 0
+ * parameter:
+ * r31: pr
+ * r24: b0
+ */
+ENTRY(kvm_resume_to_guest)
+	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+	;;
+	ld8 r1 =[r16]
+	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
+	;;
+	mov r16=cr.ipsr
+	;;
+	ld8 r20 = [r20]
+	adds r19=VMM_VPD_BASE_OFFSET,r21
+	;;
+	ld8 r25=[r19]
+	extr.u r17=r16,IA64_PSR_RI_BIT,2
+	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
+	;;
+	(p6) mov r18=cr.iip
+	(p6) mov r17=r0
+	;;
+	(p6) add r18=0x10,r18
+	(p7) add r17=1,r17
+	;;
+	(p6) mov cr.iip=r18
+	dep r16=r17,r16,IA64_PSR_RI_BIT,2
+	;;
+	mov cr.ipsr=r16
+	adds r19= VPD_VPSR_START_OFFSET,r25
+	add r28=PAL_VPS_RESUME_NORMAL,r20
+	add r29=PAL_VPS_RESUME_HANDLER,r20
+	;;
+	ld8 r19=[r19]
+	mov b0=r29
+	cmp.ne p6,p7 = r0,r0
+	;;
+	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p1=vpsr.ic
+	;;
+	(p6) ld8 r26=[r25]
+	(p7) mov b0=r28
+	mov pr=r31,-2
+	br.sptk.many b0             // call pal service
+	;;
+END(kvm_resume_to_guest)
+
+
+MOV_TO_BANK0_REG(16)
+MOV_TO_BANK0_REG(17)
+MOV_TO_BANK0_REG(18)
+MOV_TO_BANK0_REG(19)
+MOV_TO_BANK0_REG(20)
+MOV_TO_BANK0_REG(21)
+MOV_TO_BANK0_REG(22)
+MOV_TO_BANK0_REG(23)
+MOV_TO_BANK0_REG(24)
+MOV_TO_BANK0_REG(25)
+MOV_TO_BANK0_REG(26)
+MOV_TO_BANK0_REG(27)
+MOV_TO_BANK0_REG(28)
+MOV_TO_BANK0_REG(29)
+MOV_TO_BANK0_REG(30)
+MOV_TO_BANK0_REG(31)
+
+
+// mov to reg table
+ENTRY(asm_mov_to_reg)
+	MOV_TO_REG0
+	MOV_TO_REG(1)
+	MOV_TO_REG(2)
+	MOV_TO_REG(3)
+	MOV_TO_REG(4)
+	MOV_TO_REG(5)
+	MOV_TO_REG(6)
+	MOV_TO_REG(7)
+	MOV_TO_REG(8)
+	MOV_TO_REG(9)
+	MOV_TO_REG(10)
+	MOV_TO_REG(11)
+	MOV_TO_REG(12)
+	MOV_TO_REG(13)
+	MOV_TO_REG(14)
+	MOV_TO_REG(15)
+	JMP_TO_MOV_TO_BANK0_REG(16)
+	JMP_TO_MOV_TO_BANK0_REG(17)
+	JMP_TO_MOV_TO_BANK0_REG(18)
+	JMP_TO_MOV_TO_BANK0_REG(19)
+	JMP_TO_MOV_TO_BANK0_REG(20)
+	JMP_TO_MOV_TO_BANK0_REG(21)
+	JMP_TO_MOV_TO_BANK0_REG(22)
+	JMP_TO_MOV_TO_BANK0_REG(23)
+	JMP_TO_MOV_TO_BANK0_REG(24)
+	JMP_TO_MOV_TO_BANK0_REG(25)
+	JMP_TO_MOV_TO_BANK0_REG(26)
+	JMP_TO_MOV_TO_BANK0_REG(27)
+	JMP_TO_MOV_TO_BANK0_REG(28)
+	JMP_TO_MOV_TO_BANK0_REG(29)
+	JMP_TO_MOV_TO_BANK0_REG(30)
+	JMP_TO_MOV_TO_BANK0_REG(31)
+	MOV_TO_REG(32)
+	MOV_TO_REG(33)
+	MOV_TO_REG(34)
+	MOV_TO_REG(35)
+	MOV_TO_REG(36)
+	MOV_TO_REG(37)
+	MOV_TO_REG(38)
+	MOV_TO_REG(39)
+	MOV_TO_REG(40)
+	MOV_TO_REG(41)
+	MOV_TO_REG(42)
+	MOV_TO_REG(43)
+	MOV_TO_REG(44)
+	MOV_TO_REG(45)
+	MOV_TO_REG(46)
+	MOV_TO_REG(47)
+	MOV_TO_REG(48)
+	MOV_TO_REG(49)
+	MOV_TO_REG(50)
+	MOV_TO_REG(51)
+	MOV_TO_REG(52)
+	MOV_TO_REG(53)
+	MOV_TO_REG(54)
+	MOV_TO_REG(55)
+	MOV_TO_REG(56)
+	MOV_TO_REG(57)
+	MOV_TO_REG(58)
+	MOV_TO_REG(59)
+	MOV_TO_REG(60)
+	MOV_TO_REG(61)
+	MOV_TO_REG(62)
+	MOV_TO_REG(63)
+	MOV_TO_REG(64)
+	MOV_TO_REG(65)
+	MOV_TO_REG(66)
+	MOV_TO_REG(67)
+	MOV_TO_REG(68)
+	MOV_TO_REG(69)
+	MOV_TO_REG(70)
+	MOV_TO_REG(71)
+	MOV_TO_REG(72)
+	MOV_TO_REG(73)
+	MOV_TO_REG(74)
+	MOV_TO_REG(75)
+	MOV_TO_REG(76)
+	MOV_TO_REG(77)
+	MOV_TO_REG(78)
+	MOV_TO_REG(79)
+	MOV_TO_REG(80)
+	MOV_TO_REG(81)
+	MOV_TO_REG(82)
+	MOV_TO_REG(83)
+	MOV_TO_REG(84)
+	MOV_TO_REG(85)
+	MOV_TO_REG(86)
+	MOV_TO_REG(87)
+	MOV_TO_REG(88)
+	MOV_TO_REG(89)
+	MOV_TO_REG(90)
+	MOV_TO_REG(91)
+	MOV_TO_REG(92)
+	MOV_TO_REG(93)
+	MOV_TO_REG(94)
+	MOV_TO_REG(95)
+	MOV_TO_REG(96)
+	MOV_TO_REG(97)
+	MOV_TO_REG(98)
+	MOV_TO_REG(99)
+	MOV_TO_REG(100)
+	MOV_TO_REG(101)
+	MOV_TO_REG(102)
+	MOV_TO_REG(103)
+	MOV_TO_REG(104)
+	MOV_TO_REG(105)
+	MOV_TO_REG(106)
+	MOV_TO_REG(107)
+	MOV_TO_REG(108)
+	MOV_TO_REG(109)
+	MOV_TO_REG(110)
+	MOV_TO_REG(111)
+	MOV_TO_REG(112)
+	MOV_TO_REG(113)
+	MOV_TO_REG(114)
+	MOV_TO_REG(115)
+	MOV_TO_REG(116)
+	MOV_TO_REG(117)
+	MOV_TO_REG(118)
+	MOV_TO_REG(119)
+	MOV_TO_REG(120)
+	MOV_TO_REG(121)
+	MOV_TO_REG(122)
+	MOV_TO_REG(123)
+	MOV_TO_REG(124)
+	MOV_TO_REG(125)
+	MOV_TO_REG(126)
+	MOV_TO_REG(127)
+END(asm_mov_to_reg)
diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
new file mode 100644
index 000000000000..5a33f7ed29a0
--- /dev/null
+++ b/arch/ia64/kvm/process.c
@@ -0,0 +1,970 @@
+/*
+ * process.c: handle interruption inject for guests.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  	Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Xiantao Zhang (xiantao.zhang@intel.com)
+ */
+#include "vcpu.h"
+
+#include <asm/pal.h>
+#include <asm/sal.h>
+#include <asm/fpswa.h>
+#include <asm/kregs.h>
+#include <asm/tlb.h>
+
+fpswa_interface_t *vmm_fpswa_interface;
+
+#define IA64_VHPT_TRANS_VECTOR			0x0000
+#define IA64_INST_TLB_VECTOR			0x0400
+#define IA64_DATA_TLB_VECTOR			0x0800
+#define IA64_ALT_INST_TLB_VECTOR		0x0c00
+#define IA64_ALT_DATA_TLB_VECTOR		0x1000
+#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
+#define IA64_INST_KEY_MISS_VECTOR		0x1800
+#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
+#define IA64_DIRTY_BIT_VECTOR			0x2000
+#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
+#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
+#define IA64_BREAK_VECTOR			0x2c00
+#define IA64_EXTINT_VECTOR			0x3000
+#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
+#define IA64_KEY_PERMISSION_VECTOR		0x5100
+#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
+#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
+#define IA64_GENEX_VECTOR			0x5400
+#define IA64_DISABLED_FPREG_VECTOR		0x5500
+#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
+#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
+#define IA64_DEBUG_VECTOR			0x5900
+#define IA64_UNALIGNED_REF_VECTOR		0x5a00
+#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
+#define IA64_FP_FAULT_VECTOR			0x5c00
+#define IA64_FP_TRAP_VECTOR			0x5d00
+#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
+#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
+#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
+
+/* SDM vol2 5.5 - IVA based interruption handling */
+#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
+			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
+			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
+
+#define DOMN_PAL_REQUEST    0x110000
+#define DOMN_SAL_REQUEST    0x110001
+
+static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
+	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
+	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
+	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
+	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
+	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
+	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
+	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
+};
+
+static void collect_interruption(struct kvm_vcpu *vcpu)
+{
+	u64 ipsr;
+	u64 vdcr;
+	u64 vifs;
+	unsigned long vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = vcpu_get_psr(vcpu);
+	vcpu_bsw0(vcpu);
+	if (vpsr & IA64_PSR_IC) {
+
+		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
+		 * since after guest do rfi, we still want these bits on in
+		 * mpsr
+		 */
+
+		ipsr = regs->cr_ipsr;
+		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
+					| IA64_PSR_DD | IA64_PSR_SS
+					| IA64_PSR_ED));
+		vcpu_set_ipsr(vcpu, vpsr);
+
+		/* Currently, for trap, we do not advance IIP to next
+		 * instruction. That's because we assume caller already
+		 * set up IIP correctly
+		 */
+
+		vcpu_set_iip(vcpu , regs->cr_iip);
+
+		/* set vifs.v to zero */
+		vifs = VCPU(vcpu, ifs);
+		vifs &= ~IA64_IFS_V;
+		vcpu_set_ifs(vcpu, vifs);
+
+		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
+	}
+
+	vdcr = VCPU(vcpu, dcr);
+
+	/* Set guest psr
+	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
+	 * be: set to the value of dcr.be
+	 * pp: set to the value of dcr.pp
+	 */
+	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
+	vpsr |= (vdcr & IA64_DCR_BE);
+
+	/* VDCR pp bit position is different from VPSR pp bit */
+	if (vdcr & IA64_DCR_PP) {
+		vpsr |= IA64_PSR_PP;
+	} else {
+		vpsr &= ~IA64_PSR_PP;;
+	}
+
+	vcpu_set_psr(vcpu, vpsr);
+
+}
+
+void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
+{
+	u64 viva;
+	struct kvm_pt_regs *regs;
+	union ia64_isr pt_isr;
+
+	regs = vcpu_regs(vcpu);
+
+	/* clear cr.isr.ir (incomplete register frame)*/
+	pt_isr.val = VMX(vcpu, cr_isr);
+	pt_isr.ir = 0;
+	VMX(vcpu, cr_isr) = pt_isr.val;
+
+	collect_interruption(vcpu);
+
+	viva = vcpu_get_iva(vcpu);
+	regs->cr_iip = viva + vec;
+}
+
+static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
+{
+	union ia64_rr rr, rr1;
+
+	rr.val = vcpu_get_rr(vcpu, ifa);
+	rr1.val = 0;
+	rr1.ps = rr.ps;
+	rr1.rid = rr.rid;
+	return (rr1.val);
+}
+
+
+/*
+ * Set vIFA & vITIR & vIHA, when vPSR.ic =1
+ * Parameter:
+ *  set_ifa: if true, set vIFA
+ *  set_itir: if true, set vITIR
+ *  set_iha: if true, set vIHA
+ */
+void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
+		int set_ifa, int set_itir, int set_iha)
+{
+	long vpsr;
+	u64 value;
+
+	vpsr = VCPU(vcpu, vpsr);
+	/* Vol2, Table 8-1 */
+	if (vpsr & IA64_PSR_IC) {
+		if (set_ifa)
+			vcpu_set_ifa(vcpu, vadr);
+		if (set_itir) {
+			value = vcpu_get_itir_on_fault(vcpu, vadr);
+			vcpu_set_itir(vcpu, value);
+		}
+
+		if (set_iha) {
+			value = vcpu_thash(vcpu, vadr);
+			vcpu_set_iha(vcpu, value);
+		}
+	}
+}
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
+}
+
+/*
+ * Instruction TLB Fault
+ *  @ Instruction TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
+}
+
+
+
+/*
+ * Data Nested TLB Fault
+ *  @ Data Nested TLB Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void nested_dtlb(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
+}
+
+/*
+ * Alternate Data TLB Fault
+ *  @ Alternate Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
+}
+
+
+/*
+ * Data TLB Fault
+ *  @ Data TLB vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
+}
+
+/* Deal with:
+ *  VHPT Translation Vector
+ */
+static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR, IHA*/
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
+	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
+
+
+}
+
+/*
+ * VHPT Instruction Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+
+/*
+ * VHPT Data Fault
+ *  @ VHPT Translation vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_vhpt_fault(vcpu, vadr);
+}
+
+
+
+/*
+ * Deal with:
+ *  General Exception vector
+ */
+void _general_exception(struct kvm_vcpu *vcpu)
+{
+	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
+}
+
+
+/*
+ * Illegal Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Illegal Dependency Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void illegal_dep(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Reserved Register/Field Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rsv_reg_field(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+/*
+ * Privileged Operation Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+
+void privilege_op(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Unimplement Data Address Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void unimpl_daddr(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/*
+ * Privileged Register Fault
+ *  @ General Exception Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void privilege_reg(struct kvm_vcpu *vcpu)
+{
+	_general_exception(vcpu);
+}
+
+/* Deal with
+ *  Nat consumption vector
+ * Parameter:
+ *  vaddr: Optional, if t == REGISTER
+ */
+static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
+						enum tlb_miss_type t)
+{
+	/* If vPSR.ic && t == DATA/INST, IFA */
+	if (t == DATA || t == INSTRUCTION) {
+		/* IFA */
+		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
+	}
+
+	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
+}
+
+/*
+ * Instruction Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
+}
+
+/*
+ * Register Nat Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void rnat_consumption(struct kvm_vcpu *vcpu)
+{
+	_nat_consumption_fault(vcpu, 0, REGISTER);
+}
+
+/*
+ * Data Nat Page Consumption Fault
+ *  @ Nat Consumption Vector
+ * Refer to SDM Vol2 Table 5-6 & 8-1
+ */
+void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	_nat_consumption_fault(vcpu, vadr, DATA);
+}
+
+/* Deal with
+ *  Page not present vector
+ */
+static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
+}
+
+
+void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+
+void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	__page_not_present(vcpu, vadr);
+}
+
+
+/* Deal with
+ *  Data access rights vector
+ */
+void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	/* If vPSR.ic, IFA, ITIR */
+	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
+	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
+}
+
+fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
+		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
+		unsigned long *ifs, struct kvm_pt_regs *regs)
+{
+	fp_state_t fp_state;
+	fpswa_ret_t ret;
+	struct kvm_vcpu *vcpu = current_vcpu;
+
+	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
+
+	if (!vmm_fpswa_interface)
+		return (fpswa_ret_t) {-1, 0, 0, 0};
+
+	/*
+	 * Just let fpswa driver to use hardware fp registers.
+	 * No fp register is valid in memory.
+	 */
+	memset(&fp_state, 0, sizeof(fp_state_t));
+
+	/*
+	 * unsigned long (*EFI_FPSWA) (
+	 *      unsigned long    trap_type,
+	 *      void             *Bundle,
+	 *      unsigned long    *pipsr,
+	 *      unsigned long    *pfsr,
+	 *      unsigned long    *pisr,
+	 *      unsigned long    *ppreds,
+	 *      unsigned long    *pifs,
+	 *      void             *fp_state);
+	 */
+	/*Call host fpswa interface directly to virtualize
+	 *guest fpswa request!
+	 */
+	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
+	ia64_srlz_d();
+
+	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
+			ipsr, fpsr, isr, pr, ifs, &fp_state);
+	ia64_set_rr(7UL << 61, old_rr7);
+	ia64_srlz_d();
+	return ret;
+}
+
+/*
+ * Handle floating-point assist faults and traps for domain.
+ */
+unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
+					unsigned long isr)
+{
+	struct kvm_vcpu *v = current_vcpu;
+	IA64_BUNDLE bundle;
+	unsigned long fault_ip;
+	fpswa_ret_t ret;
+
+	fault_ip = regs->cr_iip;
+	/*
+	 * When the FP trap occurs, the trapping instruction is completed.
+	 * If ipsr.ri == 0, there is the trapping instruction in previous
+	 * bundle.
+	 */
+	if (!fp_fault && (ia64_psr(regs)->ri == 0))
+		fault_ip -= 16;
+
+	if (fetch_code(v, fault_ip, &bundle))
+		return -EAGAIN;
+
+	if (!bundle.i64[0] && !bundle.i64[1])
+		return -EACCES;
+
+	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
+			&isr, &regs->pr, &regs->cr_ifs, regs);
+	return ret.status;
+}
+
+void reflect_interruption(u64 ifa, u64 isr, u64 iim,
+		u64 vec, struct kvm_pt_regs *regs)
+{
+	u64 vector;
+	int status ;
+	struct kvm_vcpu *vcpu = current_vcpu;
+	u64 vpsr = VCPU(vcpu, vpsr);
+
+	vector = vec2off[vec];
+
+	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
+		panic_vm(vcpu);
+		return;
+	}
+
+	switch (vec) {
+	case 32: 	/*IA64_FP_FAULT_VECTOR*/
+		status = vmm_handle_fpu_swa(1, regs, isr);
+		if (!status) {
+			vcpu_increment_iip(vcpu);
+			return;
+		} else if (-EAGAIN == status)
+			return;
+		break;
+	case 33:	/*IA64_FP_TRAP_VECTOR*/
+		status = vmm_handle_fpu_swa(0, regs, isr);
+		if (!status)
+			return ;
+		else if (-EAGAIN == status) {
+			vcpu_decrement_iip(vcpu);
+			return ;
+		}
+		break;
+	}
+
+	VCPU(vcpu, isr) = isr;
+	VCPU(vcpu, iipa) = regs->cr_iip;
+	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
+		VCPU(vcpu, iim) = iim;
+	else
+		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
+
+	inject_guest_interruption(vcpu, vector);
+}
+
+static void set_pal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	/*FIXME:For static and stacked convention, firmware
+	 * has put the parameters in gr28-gr31 before
+	 * break to vmm  !!*/
+
+	p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28);
+	p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29);
+	p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
+	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
+	p->exit_reason = EXIT_REASON_PAL_CALL;
+}
+
+static void set_pal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
+		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
+		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
+		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
+	} else
+		panic_vm(vcpu);
+}
+
+static void set_sal_call_data(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
+	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
+	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
+	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
+	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
+	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
+	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
+	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
+	p->exit_reason = EXIT_REASON_SAL_CALL;
+}
+
+static void set_sal_call_result(struct kvm_vcpu *vcpu)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+
+	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
+		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
+		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
+		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
+		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
+	} else
+		panic_vm(vcpu);
+}
+
+void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
+		unsigned long isr, unsigned long iim)
+{
+	struct kvm_vcpu *v = current_vcpu;
+
+	if (ia64_psr(regs)->cpl == 0) {
+		/* Allow hypercalls only when cpl = 0.  */
+		if (iim == DOMN_PAL_REQUEST) {
+			set_pal_call_data(v);
+			vmm_transition(v);
+			set_pal_call_result(v);
+			vcpu_increment_iip(v);
+			return;
+		} else if (iim == DOMN_SAL_REQUEST) {
+			set_sal_call_data(v);
+			vmm_transition(v);
+			set_sal_call_result(v);
+			vcpu_increment_iip(v);
+			return;
+		}
+	}
+	reflect_interruption(ifa, isr, iim, 11, regs);
+}
+
+void check_pending_irq(struct kvm_vcpu *vcpu)
+{
+	int  mask, h_pending, h_inservice;
+	u64 isr;
+	unsigned long  vpsr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	h_pending = highest_pending_irq(vcpu);
+	if (h_pending == NULL_VECTOR) {
+		update_vhpi(vcpu, NULL_VECTOR);
+		return;
+	}
+	h_inservice = highest_inservice_irq(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	mask = irq_masked(vcpu, h_pending, h_inservice);
+	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
+		isr = vpsr & IA64_PSR_RI;
+		update_vhpi(vcpu, h_pending);
+		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+	} else if (mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+	} else {
+		/* masked by vpsr.i or vtpr.*/
+		update_vhpi(vcpu, h_pending);
+	}
+}
+
+static void generate_exirq(struct kvm_vcpu *vcpu)
+{
+	unsigned  vpsr;
+	uint64_t isr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	if (!(vpsr & IA64_PSR_IC))
+		panic_vm(vcpu);
+	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
+}
+
+void vhpi_detection(struct kvm_vcpu *vcpu)
+{
+	uint64_t    threshold, vhpi;
+	union ia64_tpr       vtpr;
+	struct ia64_psr vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vtpr.val = VCPU(vcpu, tpr);
+
+	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
+	vhpi = VCPU(vcpu, vhpi);
+	if (vhpi > threshold) {
+		/* interrupt actived*/
+		generate_exirq(vcpu);
+	}
+}
+
+
+void leave_hypervisor_tail(void)
+{
+	struct kvm_vcpu *v = current_vcpu;
+
+	if (VMX(v, timer_check)) {
+		VMX(v, timer_check) = 0;
+		if (VMX(v, itc_check)) {
+			if (vcpu_get_itc(v) > VCPU(v, itm)) {
+				if (!(VCPU(v, itv) & (1 << 16))) {
+					vcpu_pend_interrupt(v, VCPU(v, itv)
+							& 0xff);
+				VMX(v, itc_check) = 0;
+				} else {
+					v->arch.timer_pending = 1;
+				}
+				VMX(v, last_itc) = VCPU(v, itm) + 1;
+			}
+		}
+	}
+
+	rmb();
+	if (v->arch.irq_new_pending) {
+		v->arch.irq_new_pending = 0;
+		VMX(v, irq_check) = 0;
+		check_pending_irq(v);
+		return;
+	}
+	if (VMX(v, irq_check)) {
+		VMX(v, irq_check) = 0;
+		vhpi_detection(v);
+	}
+}
+
+
+static inline void handle_lds(struct kvm_pt_regs *regs)
+{
+	regs->cr_ipsr |= IA64_PSR_ED;
+}
+
+void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
+{
+	unsigned long pte;
+	union ia64_rr rr;
+
+	rr.val = ia64_get_rr(vadr);
+	pte =  vadr & _PAGE_PPN_MASK;
+	pte = pte | PHY_PAGE_WB;
+	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
+	return;
+}
+
+void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
+{
+	unsigned long vpsr;
+	int type;
+
+	u64 vhpt_adr, gppa, pteval, rr, itir;
+	union ia64_isr misr;
+	union ia64_pta vpta;
+	struct thash_data *data;
+	struct kvm_vcpu *v = current_vcpu;
+
+	vpsr = VCPU(v, vpsr);
+	misr.val = VMX(v, cr_isr);
+
+	type = vec;
+
+	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
+		if (vec == 2) {
+			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
+				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
+				return;
+			}
+		}
+		physical_tlb_miss(v, vadr, type);
+		return;
+	}
+	data = vtlb_lookup(v, vadr, type);
+	if (data != 0) {
+		if (type == D_TLB) {
+			gppa = (vadr & ((1UL << data->ps) - 1))
+				+ (data->ppn >> (data->ps - 12) << data->ps);
+			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
+				if (data->pl >= ((regs->cr_ipsr >>
+						IA64_PSR_CPL0_BIT) & 3))
+					emulate_io_inst(v, gppa, data->ma);
+				else {
+					vcpu_set_isr(v, misr.val);
+					data_access_rights(v, vadr);
+				}
+				return ;
+			}
+		}
+		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
+
+	} else if (type == D_TLB) {
+		if (misr.sp) {
+			handle_lds(regs);
+			return;
+		}
+
+		rr = vcpu_get_rr(v, vadr);
+		itir = rr & (RR_RID_MASK | RR_PS_MASK);
+
+		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				alt_dtlb(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+			return ;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+		/* avoid recursively walking (short format) VHPT */
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (!(pteval & _PAGE_P)) {
+				if (vpsr & IA64_PSR_IC) {
+					vcpu_set_isr(v, misr.val);
+					dtlb_fault(v, vadr);
+				} else {
+					nested_dtlb(v);
+				}
+			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
+				thash_purge_and_insert(v, pteval, itir,
+								vadr, D_TLB);
+			} else if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dtlb_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		} else {
+			/* Can't read VHPT.  */
+			if (vpsr & IA64_PSR_IC) {
+				vcpu_set_isr(v, misr.val);
+				dvhpt_fault(v, vadr);
+			} else {
+				nested_dtlb(v);
+			}
+		}
+	} else if (type == I_TLB) {
+		if (!(vpsr & IA64_PSR_IC))
+			misr.ni = 1;
+		if (!vhpt_enabled(v, vadr, INST_REF)) {
+			vcpu_set_isr(v, misr.val);
+			alt_itlb(v, vadr);
+			return;
+		}
+
+		vpta.val = vcpu_get_pta(v);
+
+		vhpt_adr = vcpu_thash(v, vadr);
+		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
+			/* VHPT successfully read.  */
+			if (pteval & _PAGE_P) {
+				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
+					vcpu_set_isr(v, misr.val);
+					itlb_fault(v, vadr);
+					return ;
+				}
+				rr = vcpu_get_rr(v, vadr);
+				itir = rr & (RR_RID_MASK | RR_PS_MASK);
+				thash_purge_and_insert(v, pteval, itir,
+							vadr, I_TLB);
+			} else {
+				vcpu_set_isr(v, misr.val);
+				inst_page_not_present(v, vadr);
+			}
+		} else {
+			vcpu_set_isr(v, misr.val);
+			ivhpt_fault(v, vadr);
+		}
+	}
+}
+
+void kvm_vexirq(struct kvm_vcpu *vcpu)
+{
+	u64 vpsr, isr;
+	struct kvm_pt_regs *regs;
+
+	regs = vcpu_regs(vcpu);
+	vpsr = VCPU(vcpu, vpsr);
+	isr = vpsr & IA64_PSR_RI;
+	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
+}
+
+void kvm_ia64_handle_irq(struct kvm_vcpu *v)
+{
+	struct exit_ctl_data *p = &v->arch.exit_data;
+	long psr;
+
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
+	vmm_transition(v);
+	local_irq_restore(psr);
+
+	VMX(v, timer_check) = 1;
+
+}
+
+static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
+{
+	u64 oldrid, moldrid, oldpsbits, vaddr;
+	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
+	vaddr = p->vaddr;
+
+	oldrid = VMX(v, vrr[0]);
+	VMX(v, vrr[0]) = p->rr;
+	oldpsbits = VMX(v, psbits[0]);
+	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
+	moldrid = ia64_get_rr(0x0);
+	ia64_set_rr(0x0, vrrtomrr(p->rr));
+	ia64_srlz_d();
+
+	vaddr = PAGEALIGN(vaddr, p->ps);
+	thash_purge_entries_remote(v, vaddr, p->ps);
+
+	VMX(v, vrr[0]) = oldrid;
+	VMX(v, psbits[0]) = oldpsbits;
+	ia64_set_rr(0x0, moldrid);
+	ia64_dv_serialize_data();
+}
+
+static void vcpu_do_resume(struct kvm_vcpu *vcpu)
+{
+	/*Re-init VHPT and VTLB once from resume*/
+	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
+	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
+
+	ia64_set_pta(vcpu->arch.vhpt.pta.val);
+}
+
+static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
+{
+	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
+		vcpu_do_resume(vcpu);
+		return;
+	}
+
+	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
+		thash_purge_all(vcpu);
+		return;
+	}
+
+	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
+		while (vcpu->arch.ptc_g_count > 0)
+			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
+	}
+}
+
+void vmm_transition(struct kvm_vcpu *vcpu)
+{
+	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
+			0, 0, 0, 0, 0, 0);
+	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
+						0, 0, 0, 0, 0, 0);
+	kvm_do_resume_op(vcpu);
+}
diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
new file mode 100644
index 000000000000..30897d44d61e
--- /dev/null
+++ b/arch/ia64/kvm/trampoline.S
@@ -0,0 +1,1038 @@
+/* Save all processor states
+ *
+ * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
+ * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
+ */
+
+#include <asm/asmmacro.h>
+#include "asm-offsets.h"
+
+
+#define CTX(name)    VMM_CTX_##name##_OFFSET
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r32;		\
+	add	r3 = CTX(B1),r32;		\
+	mov	r16 = b0;			\
+	mov	r17 = b1;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b2;			\
+	mov	r17 = b3;			\
+	;;					\
+	st8	[r2]=r16,16;			\
+	st8	[r3]=r17,16;			\
+	;;					\
+	mov	r16 = b4;			\
+	mov	r17 = b5;			\
+	;;					\
+	st8	[r2]=r16;   			\
+	st8	[r3]=r17;   			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_BRANCH_REGS			\
+	add	r2 = CTX(B0),r33;		\
+	add	r3 = CTX(B1),r33;		\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b0 = r16;			\
+	mov	b1 = r17;			\
+	;;					\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	b2 = r16;			\
+	mov	b3 = r17;			\
+	;;					\
+	ld8	r16=[r2];   			\
+	ld8	r17=[r3];   			\
+	;;					\
+	mov	b4=r16;				\
+	mov	b5=r17;				\
+	;;
+
+
+	/*
+	 *	r32: context_t base address
+	 *	bsw == 1
+	 *	Save all bank1 general registers, r4 ~ r7
+	 */
+#define	SAVE_GENERAL_REGS			\
+	add	r2=CTX(R4),r32;			\
+	add	r3=CTX(R5),r32;			\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r4,16;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r5,16;		\
+	;;					\
+.mem.offset 0,0;        			\
+	st8.spill	[r2]=r6,48;		\
+.mem.offset 8,0;        			\
+	st8.spill	[r3]=r7,48;		\
+	;;                          		\
+.mem.offset 0,0;        			\
+    st8.spill    [r2]=r12;			\
+.mem.offset 8,0;				\
+    st8.spill    [r3]=r13;			\
+    ;;
+
+	/*
+	 *	r33: context_t base address
+	 *	bsw == 1
+	 */
+#define	RESTORE_GENERAL_REGS			\
+	add	r2=CTX(R4),r33;			\
+	add	r3=CTX(R5),r33;			\
+	;;					\
+	ld8.fill	r4=[r2],16;		\
+	ld8.fill	r5=[r3],16;		\
+	;;					\
+	ld8.fill	r6=[r2],48;		\
+	ld8.fill	r7=[r3],48;		\
+	;;					\
+	ld8.fill    r12=[r2];			\
+	ld8.fill    r13 =[r3];			\
+	;;
+
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r32;		\
+	add	r3 = CTX(KR1),r32;		\
+	mov	r16 = ar.k0;			\
+	mov	r17 = ar.k1;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;		        		\
+	mov	r16 = ar.k2;			\
+	mov	r17 = ar.k3;			\
+	;;		        		\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k4;			\
+	mov	r17 = ar.k5;			\
+	;;				    	\
+	st8	[r2] = r16,16;			\
+	st8	[r3] = r17,16;			\
+	;;					\
+	mov	r16 = ar.k6;			\
+	mov	r17 = ar.k7;			\
+	;;		    			\
+	st8	[r2] = r16;     		\
+	st8	[r3] = r17;			\
+	;;
+
+
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_KERNEL_REGS			\
+	add	r2 = CTX(KR0),r33;		\
+	add	r3 = CTX(KR1),r33;		\
+	;;		    			\
+	ld8	r16=[r2],16;     		\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k0=r16;  			\
+	mov	ar.k1=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;		        		\
+	mov	ar.k2=r16;   			\
+	mov	ar.k3=r17;	    		\
+	;;		        		\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k4=r16;			\
+	mov	ar.k5=r17;	    		\
+	;;				    	\
+	ld8	r16=[r2],16;			\
+	ld8	r17=[r3],16;			\
+	;;					\
+	mov	ar.k6=r16;  			\
+	mov	ar.k7=r17;	    		\
+	;;
+
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_APP_REGS				\
+	add  r2 = CTX(BSPSTORE),r32;		\
+	mov  r16 = ar.bspstore;			\
+	;;					\
+	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
+	mov  r16 = ar.rnat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
+	mov  r16 = ar.fcr;			\
+	;;					\
+	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
+	mov  r16 = ar.eflag;			\
+	;;					\
+	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
+	mov  r16 = ar.cflg;			\
+	;;					\
+	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
+	mov  r16 = ar.fsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
+	mov  r16 = ar.fir;			\
+	;;					\
+	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
+	mov  r16 = ar.fdr;			\
+	;;					\
+	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
+	mov  r16 = ar.unat;			\
+	;;					\
+	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
+	mov  r16 = ar.fpsr;			\
+	;;					\
+	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
+	mov  r16 = ar.pfs;			\
+	;;					\
+	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
+	mov  r16 = ar.lc;			\
+	;;					\
+	st8  [r2] = r16;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_APP_REGS			\
+	add  r2=CTX(BSPSTORE),r33;		\
+	;;					\
+	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
+	;;					\
+	mov  ar.bspstore=r16;			\
+	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
+	;;					\
+	mov  ar.rnat=r16;			\
+	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
+	;;					\
+	mov  ar.fcr=r16;			\
+	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
+	;;					\
+	mov  ar.eflag=r16;			\
+	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
+	;;					\
+	mov  ar.cflg=r16;			\
+	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
+	;;					\
+	mov  ar.fsr=r16;			\
+	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
+	;;					\
+	mov  ar.fir=r16;			\
+	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
+	;;					\
+	mov  ar.fdr=r16;			\
+	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
+	;;					\
+	mov  ar.unat=r16;			\
+	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
+	;;					\
+	mov  ar.fpsr=r16;			\
+	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
+	;;					\
+	mov  ar.pfs=r16;			\
+	ld8  r16=[r2];				\
+	;;					\
+	mov  ar.lc=r16;				\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_CTL_REGS				\
+	add	r2 = CTX(DCR),r32;		\
+	mov	r16 = cr.dcr;			\
+	;;					\
+	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
+	;;                          		\
+	mov	r16 = cr.iva;			\
+	;;					\
+	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
+	;;					\
+	mov r16 = cr.pta;			\
+	;;					\
+	st8 [r2] = r16 ;			\
+	;;
+
+	/*
+	 *	r33:		context_t base address
+	 */
+#define	RESTORE_CTL_REGS				\
+	add	r2 = CTX(DCR),r33;	        	\
+	;;						\
+	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
+	;;                      			\
+	mov	cr.dcr = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
+	;;						\
+	mov	cr.iva = r16;				\
+	dv_serialize_data;				\
+	;;						\
+	ld8 r16 = [r2];					\
+	;;						\
+	mov cr.pta = r16;				\
+	dv_serialize_data;				\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_REGION_REGS			\
+	add	r2=CTX(RR0),r32;		\
+	mov	r16=rr[r0];			\
+	dep.z	r18=1,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=2,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=3,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=4,61,3;			\
+	;;					\
+	st8	[r2]=r17,8;			\
+	mov	r16=rr[r18];			\
+	dep.z	r18=5,61,3;			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	mov	r17=rr[r18];			\
+	dep.z	r18=7,61,3;			\
+	;;					\
+	st8	[r2]=r17,16;			\
+	mov	r16=rr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;			\
+	;;
+
+	/*
+	 *	r33:context_t base address
+	 */
+#define	RESTORE_REGION_REGS	\
+	add	r2=CTX(RR0),r33;\
+	mov r18=r0;		\
+	;;			\
+	ld8	r20=[r2],8;	\
+	;;	/* rr0 */	\
+	ld8	r21=[r2],8;	\
+	;;	/* rr1 */	\
+	ld8	r22=[r2],8;	\
+	;;	/* rr2 */	\
+	ld8	r23=[r2],8;	\
+	;;	/* rr3 */	\
+	ld8	r24=[r2],8;	\
+	;;	/* rr4 */	\
+	ld8	r25=[r2],16;	\
+	;;	/* rr5 */	\
+	ld8	r27=[r2];	\
+	;;	/* rr7 */	\
+	mov rr[r18]=r20;	\
+	dep.z	r18=1,61,3;	\
+	;;  /* rr1 */		\
+	mov rr[r18]=r21;	\
+	dep.z	r18=2,61,3;	\
+	;;  /* rr2 */		\
+	mov rr[r18]=r22;	\
+	dep.z	r18=3,61,3;	\
+	;;  /* rr3 */		\
+	mov rr[r18]=r23;	\
+	dep.z	r18=4,61,3;	\
+	;;  /* rr4 */		\
+	mov rr[r18]=r24;	\
+	dep.z	r18=5,61,3;	\
+	;;  /* rr5 */		\
+	mov rr[r18]=r25;	\
+	dep.z	r18=7,61,3;	\
+	;;  /* rr7 */		\
+	mov rr[r18]=r27;	\
+	;;			\
+	srlz.i;			\
+	;;
+
+
+
+	/*
+	 *	r32:	context_t base address
+	 *	r36~r39:scratch registers
+	 */
+#define	SAVE_DEBUG_REGS				\
+	add	r2=CTX(IBR0),r32;		\
+	add	r3=CTX(DBR0),r32;		\
+	mov	r16=ibr[r0];			\
+	mov	r17=dbr[r0];			\
+	;;					\
+	st8	[r2]=r16,8; 			\
+	st8	[r3]=r17,8;	    		\
+	add	r18=1,r0;		    	\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=2,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=3,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=4,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=5,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=6,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	add	r18=7,r0;			\
+	;;					\
+	mov	r16=ibr[r18];			\
+	mov	r17=dbr[r18];			\
+	;;					\
+	st8	[r2]=r16,8;		    	\
+	st8	[r3]=r17,8;			\
+	;;
+
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_DEBUG_REGS			\
+	add	r2=CTX(IBR0),r33;		\
+	add	r3=CTX(DBR0),r33;		\
+	mov r16=7;    				\
+	mov r17=r0;				\
+	;;                    			\
+	mov ar.lc = r16;			\
+	;; 					\
+1:						\
+	ld8 r18=[r2],8;		    		\
+	ld8 r19=[r3],8;				\
+	;;					\
+	mov ibr[r17]=r18;			\
+	mov dbr[r17]=r19;			\
+	;;   					\
+	srlz.i;					\
+	;; 					\
+	add r17=1,r17;				\
+	br.cloop.sptk 1b;			\
+	;;
+
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_LOW				\
+	add	r2=CTX(F2),r32;			\
+	add	r3=CTX(F3),r32;			\
+	;;					\
+	stf.spill.nta	[r2]=f2,32;		\
+	stf.spill.nta	[r3]=f3,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f4,32;		\
+	stf.spill.nta	[r3]=f5,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f6,32;		\
+	stf.spill.nta	[r3]=f7,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f8,32;		\
+	stf.spill.nta	[r3]=f9,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f10,32;		\
+	stf.spill.nta	[r3]=f11,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f12,32;		\
+	stf.spill.nta	[r3]=f13,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f14,32;		\
+	stf.spill.nta	[r3]=f15,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f16,32;		\
+	stf.spill.nta	[r3]=f17,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f18,32;		\
+	stf.spill.nta	[r3]=f19,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f20,32;		\
+	stf.spill.nta	[r3]=f21,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f22,32;		\
+	stf.spill.nta	[r3]=f23,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f24,32;		\
+	stf.spill.nta	[r3]=f25,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f26,32;		\
+	stf.spill.nta	[r3]=f27,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f28,32;		\
+	stf.spill.nta	[r3]=f29,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f30;		\
+	stf.spill.nta	[r3]=f31;		\
+	;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_FPU_HIGH				\
+	add	r2=CTX(F32),r32;		\
+	add	r3=CTX(F33),r32;		\
+	;;					\
+	stf.spill.nta	[r2]=f32,32;		\
+	stf.spill.nta	[r3]=f33,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f34,32;		\
+	stf.spill.nta	[r3]=f35,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f36,32;		\
+	stf.spill.nta	[r3]=f37,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f38,32;		\
+	stf.spill.nta	[r3]=f39,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f40,32;		\
+	stf.spill.nta	[r3]=f41,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f42,32;		\
+	stf.spill.nta	[r3]=f43,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f44,32;		\
+	stf.spill.nta	[r3]=f45,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f46,32;		\
+	stf.spill.nta	[r3]=f47,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f48,32;		\
+	stf.spill.nta	[r3]=f49,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f50,32;		\
+	stf.spill.nta	[r3]=f51,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f52,32;		\
+	stf.spill.nta	[r3]=f53,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f54,32;		\
+	stf.spill.nta	[r3]=f55,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f56,32;		\
+	stf.spill.nta	[r3]=f57,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f58,32;		\
+	stf.spill.nta	[r3]=f59,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f60,32;		\
+	stf.spill.nta	[r3]=f61,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f62,32;		\
+	stf.spill.nta	[r3]=f63,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f64,32;		\
+	stf.spill.nta	[r3]=f65,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f66,32;		\
+	stf.spill.nta	[r3]=f67,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f68,32;		\
+	stf.spill.nta	[r3]=f69,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f70,32;		\
+	stf.spill.nta	[r3]=f71,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f72,32;		\
+	stf.spill.nta	[r3]=f73,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f74,32;		\
+	stf.spill.nta	[r3]=f75,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f76,32;		\
+	stf.spill.nta	[r3]=f77,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f78,32;		\
+	stf.spill.nta	[r3]=f79,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f80,32;		\
+	stf.spill.nta	[r3]=f81,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f82,32;		\
+	stf.spill.nta	[r3]=f83,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f84,32;		\
+	stf.spill.nta	[r3]=f85,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f86,32;		\
+	stf.spill.nta	[r3]=f87,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f88,32;		\
+	stf.spill.nta	[r3]=f89,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f90,32;		\
+	stf.spill.nta	[r3]=f91,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f92,32;		\
+	stf.spill.nta	[r3]=f93,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f94,32;		\
+	stf.spill.nta	[r3]=f95,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f96,32;		\
+	stf.spill.nta	[r3]=f97,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f98,32;		\
+	stf.spill.nta	[r3]=f99,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f100,32;		\
+	stf.spill.nta	[r3]=f101,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f102,32;		\
+	stf.spill.nta	[r3]=f103,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f104,32;		\
+	stf.spill.nta	[r3]=f105,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f106,32;		\
+	stf.spill.nta	[r3]=f107,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f108,32;		\
+	stf.spill.nta	[r3]=f109,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f110,32;		\
+	stf.spill.nta	[r3]=f111,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f112,32;		\
+	stf.spill.nta	[r3]=f113,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f114,32;		\
+	stf.spill.nta	[r3]=f115,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f116,32;		\
+	stf.spill.nta	[r3]=f117,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f118,32;		\
+	stf.spill.nta	[r3]=f119,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f120,32;		\
+	stf.spill.nta	[r3]=f121,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f122,32;		\
+	stf.spill.nta	[r3]=f123,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f124,32;		\
+	stf.spill.nta	[r3]=f125,32;		\
+	;;					\
+	stf.spill.nta	[r2]=f126;		\
+	stf.spill.nta	[r3]=f127;		\
+	;;
+
+     /*
+      *      r33:    point to context_t structure
+      */
+#define	RESTORE_FPU_LOW				\
+    add     r2 = CTX(F2), r33;			\
+    add     r3 = CTX(F3), r33;			\
+    ;;						\
+    ldf.fill.nta f2 = [r2], 32;			\
+    ldf.fill.nta f3 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f4 = [r2], 32;			\
+    ldf.fill.nta f5 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f6 = [r2], 32;			\
+    ldf.fill.nta f7 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f8 = [r2], 32;			\
+    ldf.fill.nta f9 = [r3], 32;			\
+    ;;						\
+    ldf.fill.nta f10 = [r2], 32;		\
+    ldf.fill.nta f11 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f12 = [r2], 32;		\
+    ldf.fill.nta f13 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f14 = [r2], 32;		\
+    ldf.fill.nta f15 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f16 = [r2], 32;		\
+    ldf.fill.nta f17 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f18 = [r2], 32;		\
+    ldf.fill.nta f19 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f20 = [r2], 32;		\
+    ldf.fill.nta f21 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f22 = [r2], 32;		\
+    ldf.fill.nta f23 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f24 = [r2], 32;		\
+    ldf.fill.nta f25 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f26 = [r2], 32;		\
+    ldf.fill.nta f27 = [r3], 32;		\
+	;;					\
+    ldf.fill.nta f28 = [r2], 32;		\
+    ldf.fill.nta f29 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f30 = [r2], 32;		\
+    ldf.fill.nta f31 = [r3], 32;		\
+    ;;
+
+
+
+    /*
+     *      r33:    point to context_t structure
+     */
+#define	RESTORE_FPU_HIGH			\
+    add     r2 = CTX(F32), r33;			\
+    add     r3 = CTX(F33), r33;			\
+    ;;						\
+    ldf.fill.nta f32 = [r2], 32;		\
+    ldf.fill.nta f33 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f34 = [r2], 32;		\
+    ldf.fill.nta f35 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f36 = [r2], 32;		\
+    ldf.fill.nta f37 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f38 = [r2], 32;		\
+    ldf.fill.nta f39 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f40 = [r2], 32;		\
+    ldf.fill.nta f41 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f42 = [r2], 32;		\
+    ldf.fill.nta f43 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f44 = [r2], 32;		\
+    ldf.fill.nta f45 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f46 = [r2], 32;		\
+    ldf.fill.nta f47 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f48 = [r2], 32;		\
+    ldf.fill.nta f49 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f50 = [r2], 32;		\
+    ldf.fill.nta f51 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f52 = [r2], 32;		\
+    ldf.fill.nta f53 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f54 = [r2], 32;		\
+    ldf.fill.nta f55 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f56 = [r2], 32;		\
+    ldf.fill.nta f57 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f58 = [r2], 32;		\
+    ldf.fill.nta f59 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f60 = [r2], 32;		\
+    ldf.fill.nta f61 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f62 = [r2], 32;		\
+    ldf.fill.nta f63 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f64 = [r2], 32;		\
+    ldf.fill.nta f65 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f66 = [r2], 32;		\
+    ldf.fill.nta f67 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f68 = [r2], 32;		\
+    ldf.fill.nta f69 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f70 = [r2], 32;		\
+    ldf.fill.nta f71 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f72 = [r2], 32;		\
+    ldf.fill.nta f73 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f74 = [r2], 32;		\
+    ldf.fill.nta f75 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f76 = [r2], 32;		\
+    ldf.fill.nta f77 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f78 = [r2], 32;		\
+    ldf.fill.nta f79 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f80 = [r2], 32;		\
+    ldf.fill.nta f81 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f82 = [r2], 32;		\
+    ldf.fill.nta f83 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f84 = [r2], 32;		\
+    ldf.fill.nta f85 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f86 = [r2], 32;		\
+    ldf.fill.nta f87 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f88 = [r2], 32;		\
+    ldf.fill.nta f89 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f90 = [r2], 32;		\
+    ldf.fill.nta f91 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f92 = [r2], 32;		\
+    ldf.fill.nta f93 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f94 = [r2], 32;		\
+    ldf.fill.nta f95 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f96 = [r2], 32;		\
+    ldf.fill.nta f97 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f98 = [r2], 32;		\
+    ldf.fill.nta f99 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f100 = [r2], 32;		\
+    ldf.fill.nta f101 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f102 = [r2], 32;		\
+    ldf.fill.nta f103 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f104 = [r2], 32;		\
+    ldf.fill.nta f105 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f106 = [r2], 32;		\
+    ldf.fill.nta f107 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f108 = [r2], 32;		\
+    ldf.fill.nta f109 = [r3], 32;   		\
+    ;;						\
+    ldf.fill.nta f110 = [r2], 32;		\
+    ldf.fill.nta f111 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f112 = [r2], 32;		\
+    ldf.fill.nta f113 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f114 = [r2], 32;		\
+    ldf.fill.nta f115 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f116 = [r2], 32;		\
+    ldf.fill.nta f117 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f118 = [r2], 32;		\
+    ldf.fill.nta f119 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f120 = [r2], 32;		\
+    ldf.fill.nta f121 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f122 = [r2], 32;		\
+    ldf.fill.nta f123 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f124 = [r2], 32;		\
+    ldf.fill.nta f125 = [r3], 32;		\
+    ;;						\
+    ldf.fill.nta f126 = [r2], 32;		\
+    ldf.fill.nta f127 = [r3], 32;		\
+    ;;
+
+	/*
+	 *	r32:		context_t base address
+	 */
+#define	SAVE_PTK_REGS				\
+    add r2=CTX(PKR0), r32;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1:						\
+    mov r18=pkr[r17];				\
+    ;;                     			\
+    srlz.i;					\
+    ;; 						\
+    st8 [r2]=r18, 8;				\
+    ;;    					\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+/*
+ *      r33:    point to context_t structure
+ *      ar.lc are corrupted.
+ */
+#define RESTORE_PTK_REGS	    		\
+    add r2=CTX(PKR0), r33;			\
+    mov r16=7;    				\
+    ;;                         			\
+    mov ar.lc=r16;  				\
+    mov r17=r0;					\
+    ;;						\
+1: 						\
+    ld8 r18=[r2], 8;				\
+    ;;						\
+    mov pkr[r17]=r18;				\
+    ;;    					\
+    srlz.i;					\
+    ;; 						\
+    add r17 =1,r17;				\
+    ;;                     			\
+    br.cloop.sptk 1b;				\
+    ;;
+
+
+/*
+ * void vmm_trampoline( context_t * from,
+ *			context_t * to)
+ *
+ * 	from:	r32
+ *	to:	r33
+ *  note: interrupt disabled before call this function.
+ */
+GLOBAL_ENTRY(vmm_trampoline)
+    mov r16 = psr
+    adds r2 = CTX(PSR), r32
+    ;;
+    st8 [r2] = r16, 8       // psr
+    mov r17 = pr
+    ;;
+    st8 [r2] = r17, 8       // pr
+    mov r18 = ar.unat
+    ;;
+    st8 [r2] = r18
+    mov r17 = ar.rsc
+    ;;
+    adds r2 = CTX(RSC),r32
+    ;;
+    st8 [r2]= r17
+    mov ar.rsc =0
+    flushrs
+    ;;
+    SAVE_GENERAL_REGS
+    ;;
+    SAVE_KERNEL_REGS
+    ;;
+    SAVE_APP_REGS
+    ;;
+    SAVE_BRANCH_REGS
+    ;;
+    SAVE_CTL_REGS
+    ;;
+    SAVE_REGION_REGS
+    ;;
+    //SAVE_DEBUG_REGS
+    ;;
+    rsm  psr.dfl
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_LOW
+    ;;
+    rsm  psr.dfh
+    ;;
+    srlz.d
+    ;;
+    SAVE_FPU_HIGH
+    ;;
+    SAVE_PTK_REGS
+    ;;
+    RESTORE_PTK_REGS
+    ;;
+    RESTORE_FPU_HIGH
+    ;;
+    RESTORE_FPU_LOW
+    ;;
+    //RESTORE_DEBUG_REGS
+    ;;
+    RESTORE_REGION_REGS
+    ;;
+    RESTORE_CTL_REGS
+    ;;
+    RESTORE_BRANCH_REGS
+    ;;
+    RESTORE_APP_REGS
+    ;;
+    RESTORE_KERNEL_REGS
+    ;;
+    RESTORE_GENERAL_REGS
+    ;;
+    adds r2=CTX(PSR), r33
+    ;;
+    ld8 r16=[r2], 8       // psr
+    ;;
+    mov psr.l=r16
+    ;;
+    srlz.d
+    ;;
+    ld8 r16=[r2], 8       // pr
+    ;;
+    mov pr =r16,-1
+    ld8 r16=[r2]       // unat
+    ;;
+    mov ar.unat=r16
+    ;;
+    adds r2=CTX(RSC),r33
+    ;;
+    ld8 r16 =[r2]
+    ;;
+    mov ar.rsc = r16
+    ;;
+    br.ret.sptk.few b0
+END(vmm_trampoline)
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
new file mode 100644
index 000000000000..e44027ce5667
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.c
@@ -0,0 +1,2163 @@
+/*
+ * kvm_vcpu.c: handling all virtual cpu related thing.
+ * Copyright (c) 2005, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ *  Shaofan Li (Susue Li) <susie.li@intel.com>
+ *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/ia64regs.h>
+#include <asm/gcc_intrin.h>
+#include <asm/kregs.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+
+/*
+ * Special notes:
+ * - Index by it/dt/rt sequence
+ * - Only existing mode transitions are allowed in this table
+ * - RSE is placed at lazy mode when emulating guest partial mode
+ * - If gva happens to be rr0 and rr4, only allowed case is identity
+ *   mapping (gva=gpa), or panic! (How?)
+ */
+int mm_switch_table[8][8] = {
+	/*  2004/09/12(Kevin): Allow switch to self */
+	/*
+	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
+	 *  This kind of transition usually occurs in the very early
+	 *  stage of Linux boot up procedure. Another case is in efi
+	 *  and pal calls. (see "arch/ia64/kernel/head.S")
+	 *
+	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
+	 *  This kind of transition is found when OSYa exits efi boot
+	 *  service. Due to gva = gpa in this case (Same region),
+	 *  data access can be satisfied though itlb entry for physical
+	 *  emulation is hit.
+	 */
+	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
+	 *  This kind of transition is found in OSYa.
+	 *
+	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
+	 *  This kind of transition is found in OSYa
+	 */
+	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
+	/* (1,0,0)->(1,1,1) */
+	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
+	/*
+	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
+	 *  This kind of transition usually occurs when Linux returns
+	 *  from the low level TLB miss handlers.
+	 *  (see "arch/ia64/kernel/ivt.S")
+	 */
+	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
+	{0,  0,  0,  0,  0,  0,  0,  0},
+	/*
+	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
+	 *  This kind of transition usually occurs in Linux low level
+	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
+	 *
+	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
+	 *  This kind of transition usually occurs in pal and efi calls,
+	 *  which requires running in physical mode.
+	 *  (see "arch/ia64/kernel/head.S")
+	 *  (1,1,1)->(1,0,0)
+	 */
+
+	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
+};
+
+void physical_mode_init(struct kvm_vcpu  *vcpu)
+{
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+}
+
+void switch_to_physical_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	/* Save original virtual mode rr[0] and rr[4] */
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
+	ia64_srlz_d();
+
+	ia64_set_psr(psr);
+	return;
+}
+
+
+void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	psr = ia64_clear_ic();
+	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
+	ia64_srlz_d();
+	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+	return;
+}
+
+static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
+{
+	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
+}
+
+void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+	int act;
+	act = mm_switch_action(old_psr, new_psr);
+	switch (act) {
+	case SW_V2P:
+		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
+		old_psr.val, new_psr.val);*/
+		switch_to_physical_rid(vcpu);
+		/*
+		 * Set rse to enforced lazy, to prevent active rse
+		 *save/restor when guest physical mode.
+		 */
+		vcpu->arch.mode_flags |= GUEST_IN_PHY;
+		break;
+	case SW_P2V:
+		switch_to_virtual_rid(vcpu);
+		/*
+		 * recover old mode which is saved when entering
+		 * guest physical mode
+		 */
+		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
+		break;
+	case SW_SELF:
+		break;
+	case SW_NOP:
+		break;
+	default:
+		/* Sanity check */
+		break;
+	}
+	return;
+}
+
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
+					struct ia64_psr new_psr)
+{
+
+	if ((old_psr.dt != new_psr.dt)
+			|| (old_psr.it != new_psr.it)
+			|| (old_psr.rt != new_psr.rt))
+		switch_mm_mode(vcpu, old_psr, new_psr);
+
+	return;
+}
+
+
+/*
+ * In physical mode, insert tc/tr for region 0 and 4 uses
+ * RID[0] and RID[4] which is for physical mode emulation.
+ * However what those inserted tc/tr wants is rid for
+ * virtual mode. So original virtual rid needs to be restored
+ * before insert.
+ *
+ * Operations which required such switch include:
+ *  - insertions (itc.*, itr.*)
+ *  - purges (ptc.* and ptr.*)
+ *  - tpa
+ *  - tak
+ *  - thash?, ttag?
+ * All above needs actual virtual rid for destination entry.
+ */
+
+void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu)) {
+		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
+		switch_to_virtual_rid(vcpu);
+	}
+	return;
+}
+
+/* Recover always follows prepare */
+void recover_if_physical_mode(struct kvm_vcpu *vcpu)
+{
+	if (is_physical_mode(vcpu))
+		switch_to_physical_rid(vcpu);
+	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
+	return;
+}
+
+#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
+
+static u16 gr_info[32] = {
+	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
+	RPT(r1), RPT(r2), RPT(r3),
+	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
+	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
+	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
+	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
+	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
+	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
+	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
+};
+
+#define IA64_FIRST_STACKED_GR   32
+#define IA64_FIRST_ROTATING_FR  32
+
+static inline unsigned long
+rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
+{
+	reg += rrb;
+	if (reg >= sor)
+		reg -= sor;
+	return reg;
+}
+
+/*
+ * Return the (rotated) index for floating point register
+ * be in the REGNUM (REGNUM must range from 32-127,
+ * result is in the range from 0-95.
+ */
+static inline unsigned long fph_index(struct kvm_pt_regs *regs,
+						long regnum)
+{
+	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
+	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
+}
+
+
+/*
+ * The inverse of the above: given bspstore and the number of
+ * registers, calculate ar.bsp.
+ */
+static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
+							long num_regs)
+{
+	long delta = ia64_rse_slot_num(addr) + num_regs;
+	int i = 0;
+
+	if (num_regs < 0)
+		delta -= 0x3e;
+	if (delta < 0) {
+		while (delta <= -0x3f) {
+			i--;
+			delta += 0x3f;
+		}
+	} else {
+		while (delta >= 0x3f) {
+			i++;
+			delta -= 0x3f;
+		}
+	}
+
+	return addr + num_regs + i;
+}
+
+static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+					unsigned long *val, int *nat)
+{
+	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	new_rsc = old_rsc&(~(0x3));
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	bsp = kbs + (regs->loadrs >> 19);
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	if (addr >= bspstore) {
+		ia64_flushrs();
+		ia64_mf();
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	}
+	*val = *addr;
+	if (nat) {
+		if (bspstore < rnat_addr)
+			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
+							& nat_mask);
+		else
+			*nat = (int)!!((*rnat_addr) & nat_mask);
+		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+	}
+}
+
+void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
+				unsigned long val, unsigned long nat)
+{
+	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
+	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
+	unsigned long nat_mask;
+	unsigned long old_rsc, new_rsc, psr;
+	unsigned long rnat;
+	long sof = (regs->cr_ifs) & 0x7f;
+	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
+	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
+	long ridx = r1 - 32;
+
+	if (ridx < sor)
+		ridx = rotate_reg(sor, rrb_gr, ridx);
+
+	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
+	/* put RSC to lazy mode, and set loadrs 0 */
+	new_rsc = old_rsc & (~0x3fff0003);
+	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
+	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
+
+	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
+	nat_mask = 1UL << ia64_rse_slot_num(addr);
+	rnat_addr = ia64_rse_rnat_addr(addr);
+
+	local_irq_save(psr);
+	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+	if (addr >= bspstore) {
+
+		ia64_flushrs();
+		ia64_mf();
+		*addr = val;
+		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		if (bspstore < rnat_addr)
+			rnat = rnat & (~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr)&(~nat_mask);
+
+		ia64_mf();
+		ia64_loadrs();
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	} else {
+		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
+		*addr = val;
+		if (bspstore < rnat_addr)
+			rnat = rnat&(~nat_mask);
+		else
+			*rnat_addr = (*rnat_addr) & (~nat_mask);
+
+		ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore);
+		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
+	}
+	local_irq_restore(psr);
+	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
+}
+
+void getreg(unsigned long regnum, unsigned long *val,
+				int *nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr, *unat;
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		get_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;;
+
+	addr += gr_info[regnum];
+
+	*val  = *(unsigned long *)addr;
+	/*
+	 * do it only when requested
+	 */
+	if (nat)
+		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
+}
+
+void setreg(unsigned long regnum, unsigned long val,
+			int nat, struct kvm_pt_regs *regs)
+{
+	unsigned long addr;
+	unsigned long bitmask;
+	unsigned long *unat;
+
+	/*
+	 * First takes care of stacked registers
+	 */
+	if (regnum >= IA64_FIRST_STACKED_GR) {
+		set_rse_reg(regs, regnum, val, nat);
+		return;
+	}
+
+	/*
+	 * Now look at registers in [0-31] range and init correct UNAT
+	 */
+	addr = (unsigned long)regs;
+	unat = &regs->eml_unat;
+	/*
+	 * add offset from base of struct
+	 * and do it !
+	 */
+	addr += gr_info[regnum];
+
+	*(unsigned long *)addr = val;
+
+	/*
+	 * We need to clear the corresponding UNAT bit to fully emulate the load
+	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
+	 */
+	bitmask   = 1UL << ((addr >> 3) & 0x3f);
+	if (nat)
+		*unat |= bitmask;
+	 else
+		*unat &= ~bitmask;
+
+}
+
+u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	u64 val;
+
+	if (!reg)
+		return 0;
+	getreg(reg, &val, 0, regs);
+	return val;
+}
+
+void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	long sof = (regs->cr_ifs) & 0x7f;
+
+	if (!reg)
+		return;
+	if (reg >= sof + 32)
+		return;
+	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
+}
+
+void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+				struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+#define CASE_FIXED_FP(reg)			\
+	case  (reg) :				\
+		ia64_stf_spill(fpval, reg);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(0);
+		CASE_FIXED_FP(1);
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+#undef CASE_FIXED_FP
+}
+
+void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
+					struct kvm_pt_regs *regs)
+{
+	/* Take floating register rotation into consideration*/
+	if (regnum >= IA64_FIRST_ROTATING_FR)
+		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
+
+#define CASE_FIXED_FP(reg)			\
+	case (reg) :				\
+		ia64_ldf_fill(reg, fpval);	\
+	break
+
+	switch (regnum) {
+		CASE_FIXED_FP(2);
+		CASE_FIXED_FP(3);
+		CASE_FIXED_FP(4);
+		CASE_FIXED_FP(5);
+
+		CASE_FIXED_FP(6);
+		CASE_FIXED_FP(7);
+		CASE_FIXED_FP(8);
+		CASE_FIXED_FP(9);
+		CASE_FIXED_FP(10);
+		CASE_FIXED_FP(11);
+
+		CASE_FIXED_FP(12);
+		CASE_FIXED_FP(13);
+		CASE_FIXED_FP(14);
+		CASE_FIXED_FP(15);
+		CASE_FIXED_FP(16);
+		CASE_FIXED_FP(17);
+		CASE_FIXED_FP(18);
+		CASE_FIXED_FP(19);
+		CASE_FIXED_FP(20);
+		CASE_FIXED_FP(21);
+		CASE_FIXED_FP(22);
+		CASE_FIXED_FP(23);
+		CASE_FIXED_FP(24);
+		CASE_FIXED_FP(25);
+		CASE_FIXED_FP(26);
+		CASE_FIXED_FP(27);
+		CASE_FIXED_FP(28);
+		CASE_FIXED_FP(29);
+		CASE_FIXED_FP(30);
+		CASE_FIXED_FP(31);
+		CASE_FIXED_FP(32);
+		CASE_FIXED_FP(33);
+		CASE_FIXED_FP(34);
+		CASE_FIXED_FP(35);
+		CASE_FIXED_FP(36);
+		CASE_FIXED_FP(37);
+		CASE_FIXED_FP(38);
+		CASE_FIXED_FP(39);
+		CASE_FIXED_FP(40);
+		CASE_FIXED_FP(41);
+		CASE_FIXED_FP(42);
+		CASE_FIXED_FP(43);
+		CASE_FIXED_FP(44);
+		CASE_FIXED_FP(45);
+		CASE_FIXED_FP(46);
+		CASE_FIXED_FP(47);
+		CASE_FIXED_FP(48);
+		CASE_FIXED_FP(49);
+		CASE_FIXED_FP(50);
+		CASE_FIXED_FP(51);
+		CASE_FIXED_FP(52);
+		CASE_FIXED_FP(53);
+		CASE_FIXED_FP(54);
+		CASE_FIXED_FP(55);
+		CASE_FIXED_FP(56);
+		CASE_FIXED_FP(57);
+		CASE_FIXED_FP(58);
+		CASE_FIXED_FP(59);
+		CASE_FIXED_FP(60);
+		CASE_FIXED_FP(61);
+		CASE_FIXED_FP(62);
+		CASE_FIXED_FP(63);
+		CASE_FIXED_FP(64);
+		CASE_FIXED_FP(65);
+		CASE_FIXED_FP(66);
+		CASE_FIXED_FP(67);
+		CASE_FIXED_FP(68);
+		CASE_FIXED_FP(69);
+		CASE_FIXED_FP(70);
+		CASE_FIXED_FP(71);
+		CASE_FIXED_FP(72);
+		CASE_FIXED_FP(73);
+		CASE_FIXED_FP(74);
+		CASE_FIXED_FP(75);
+		CASE_FIXED_FP(76);
+		CASE_FIXED_FP(77);
+		CASE_FIXED_FP(78);
+		CASE_FIXED_FP(79);
+		CASE_FIXED_FP(80);
+		CASE_FIXED_FP(81);
+		CASE_FIXED_FP(82);
+		CASE_FIXED_FP(83);
+		CASE_FIXED_FP(84);
+		CASE_FIXED_FP(85);
+		CASE_FIXED_FP(86);
+		CASE_FIXED_FP(87);
+		CASE_FIXED_FP(88);
+		CASE_FIXED_FP(89);
+		CASE_FIXED_FP(90);
+		CASE_FIXED_FP(91);
+		CASE_FIXED_FP(92);
+		CASE_FIXED_FP(93);
+		CASE_FIXED_FP(94);
+		CASE_FIXED_FP(95);
+		CASE_FIXED_FP(96);
+		CASE_FIXED_FP(97);
+		CASE_FIXED_FP(98);
+		CASE_FIXED_FP(99);
+		CASE_FIXED_FP(100);
+		CASE_FIXED_FP(101);
+		CASE_FIXED_FP(102);
+		CASE_FIXED_FP(103);
+		CASE_FIXED_FP(104);
+		CASE_FIXED_FP(105);
+		CASE_FIXED_FP(106);
+		CASE_FIXED_FP(107);
+		CASE_FIXED_FP(108);
+		CASE_FIXED_FP(109);
+		CASE_FIXED_FP(110);
+		CASE_FIXED_FP(111);
+		CASE_FIXED_FP(112);
+		CASE_FIXED_FP(113);
+		CASE_FIXED_FP(114);
+		CASE_FIXED_FP(115);
+		CASE_FIXED_FP(116);
+		CASE_FIXED_FP(117);
+		CASE_FIXED_FP(118);
+		CASE_FIXED_FP(119);
+		CASE_FIXED_FP(120);
+		CASE_FIXED_FP(121);
+		CASE_FIXED_FP(122);
+		CASE_FIXED_FP(123);
+		CASE_FIXED_FP(124);
+		CASE_FIXED_FP(125);
+		CASE_FIXED_FP(126);
+		CASE_FIXED_FP(127);
+	}
+}
+
+void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
+						struct ia64_fpreg *val)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	if (reg > 1)
+		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
+}
+
+/************************************************************************
+ * lsapic timer
+ ***********************************************************************/
+u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
+{
+	unsigned long guest_itc;
+	guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
+
+	if (guest_itc >= VMX(vcpu, last_itc)) {
+		VMX(vcpu, last_itc) = guest_itc;
+		return  guest_itc;
+	} else
+		return VMX(vcpu, last_itc);
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
+static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
+{
+	struct kvm_vcpu *v;
+	int i;
+	long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
+	unsigned long vitv = VCPU(vcpu, itv);
+
+	if (vcpu->vcpu_id == 0) {
+		for (i = 0; i < MAX_VCPU_NUM; i++) {
+			v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i);
+			VMX(v, itc_offset) = itc_offset;
+			VMX(v, last_itc) = 0;
+		}
+	}
+	VMX(vcpu, last_itc) = 0;
+	if (VCPU(vcpu, itm) <= val) {
+		VMX(vcpu, itc_check) = 0;
+		vcpu_unpend_interrupt(vcpu, vitv);
+	} else {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
+	}
+
+}
+
+static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itm));
+}
+
+static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
+{
+	unsigned long vitv = VCPU(vcpu, itv);
+	VCPU(vcpu, itm) = val;
+
+	if (val > vcpu_get_itc(vcpu)) {
+		VMX(vcpu, itc_check) = 1;
+		vcpu_unpend_interrupt(vcpu, vitv);
+		VMX(vcpu, timer_pending) = 0;
+	} else
+		VMX(vcpu, itc_check) = 0;
+}
+
+#define  ITV_VECTOR(itv)    (itv&0xff)
+#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
+
+static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itv) = val;
+	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
+		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
+		vcpu->arch.timer_pending = 0;
+	}
+}
+
+static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
+{
+	int vec;
+
+	vec = highest_inservice_irq(vcpu);
+	if (vec == NULL_VECTOR)
+		return;
+	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
+	VCPU(vcpu, eoi) = 0;
+	vcpu->arch.irq_new_pending = 1;
+
+}
+
+/* See Table 5-8 in SDM vol2 for the definition */
+int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
+{
+	union ia64_tpr vtpr;
+
+	vtpr.val = VCPU(vcpu, tpr);
+
+	if (h_inservice == NMI_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == NMI_VECTOR) {
+		/* Non Maskable Interrupt */
+		return IRQ_NO_MASKED;
+	}
+
+	if (h_inservice == ExtINT_VECTOR)
+		return IRQ_MASKED_BY_INSVC;
+
+	if (h_pending == ExtINT_VECTOR) {
+		if (vtpr.mmi) {
+			/* mask all external IRQ */
+			return IRQ_MASKED_BY_VTPR;
+		} else
+			return IRQ_NO_MASKED;
+	}
+
+	if (is_higher_irq(h_pending, h_inservice)) {
+		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
+			return IRQ_NO_MASKED;
+		else
+			return IRQ_MASKED_BY_VTPR;
+	} else {
+		return IRQ_MASKED_BY_INSVC;
+	}
+}
+
+void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+
+	vcpu->arch.irq_new_pending = 1;
+}
+
+void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
+{
+	long spsr;
+	int ret;
+
+	local_irq_save(spsr);
+	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
+	local_irq_restore(spsr);
+	if (ret) {
+		vcpu->arch.irq_new_pending = 1;
+		wmb();
+	}
+}
+
+void update_vhpi(struct kvm_vcpu *vcpu, int vec)
+{
+	u64 vhpi;
+
+	if (vec == NULL_VECTOR)
+		vhpi = 0;
+	else if (vec == NMI_VECTOR)
+		vhpi = 32;
+	else if (vec == ExtINT_VECTOR)
+		vhpi = 16;
+	else
+		vhpi = vec >> 4;
+
+	VCPU(vcpu, vhpi) = vhpi;
+	if (VCPU(vcpu, vac).a_int)
+		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
+				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
+}
+
+u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
+{
+	int vec, h_inservice, mask;
+
+	vec = highest_pending_irq(vcpu);
+	h_inservice = highest_inservice_irq(vcpu);
+	mask = irq_masked(vcpu, vec, h_inservice);
+	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
+		if (VCPU(vcpu, vhpi))
+			update_vhpi(vcpu, NULL_VECTOR);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	if (mask == IRQ_MASKED_BY_VTPR) {
+		update_vhpi(vcpu, vec);
+		return IA64_SPURIOUS_INT_VECTOR;
+	}
+	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
+	vcpu_unpend_interrupt(vcpu, vec);
+	return  (u64)vec;
+}
+
+/**************************************************************************
+  Privileged operation emulation routines
+ **************************************************************************/
+u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_pta vpta;
+	union ia64_rr vrr;
+	u64 pval;
+	u64 vhpt_offset;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
+				vpta.val, 0, 0, 0, 0);
+	} else {
+		pval = (vadr & VRN_MASK) | vhpt_offset |
+			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
+	}
+	return  pval;
+}
+
+u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	union ia64_rr vrr;
+	union ia64_pta vpta;
+	u64 pval;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	if (vpta.vf) {
+		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
+						0, 0, 0, 0, 0);
+	} else
+		pval = 1;
+
+	return  pval;
+}
+
+u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
+{
+	struct thash_data *data;
+	union ia64_pta vpta;
+	u64 key;
+
+	vpta.val = vcpu_get_pta(vcpu);
+	if (vpta.vf == 0) {
+		key = 1;
+		return key;
+	}
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (!data || !data->p)
+		key = 1;
+	else
+		key = data->key;
+
+	return key;
+}
+
+
+
+void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long thash, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	thash = vcpu_thash(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
+}
+
+
+void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tag, vadr;
+
+	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
+	tag = vcpu_ttag(vcpu, vadr);
+	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
+}
+
+int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr)
+{
+	struct thash_data *data;
+	union ia64_isr visr, pt_isr;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr vpsr;
+
+	regs = vcpu_regs(vcpu);
+	pt_isr.val = VMX(vcpu, cr_isr);
+	visr.val = 0;
+	visr.ei = pt_isr.ei;
+	visr.ir = pt_isr.ir;
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	visr.na = 1;
+
+	data = vhpt_lookup(vadr);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			*padr = (data->gpaddr >> data->ps << data->ps) |
+				(vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+
+	data = vtlb_lookup(vcpu, vadr, D_TLB);
+	if (data) {
+		if (data->p == 0) {
+			vcpu_set_isr(vcpu, visr.val);
+			data_page_not_present(vcpu, vadr);
+			return IA64_FAULT;
+		} else if (data->ma == VA_MATTR_NATPAGE) {
+			vcpu_set_isr(vcpu, visr.val);
+			dnat_page_consumption(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
+				| (vadr & (PSIZE(data->ps) - 1));
+			return IA64_NO_FAULT;
+		}
+	}
+	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			alt_dtlb(vcpu, vadr);
+			return IA64_FAULT;
+		} else {
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	} else {
+		if (vpsr.ic) {
+			vcpu_set_isr(vcpu, visr.val);
+			dvhpt_fault(vcpu, vadr);
+			return IA64_FAULT;
+		} else{
+			nested_dtlb(vcpu);
+			return IA64_FAULT;
+		}
+	}
+
+	return IA64_NO_FAULT;
+}
+
+
+int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+
+	if (vcpu_tpa(vcpu, r3, &r1))
+		return IA64_FAULT;
+
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+	return(IA64_NO_FAULT);
+}
+
+void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1, r3;
+
+	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
+	r1 = vcpu_tak(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
+}
+
+
+/************************************
+ * Insert/Purge translation register/cache
+ ************************************/
+void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
+}
+
+void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
+{
+	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
+}
+
+void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 ps, va, rid;
+	struct thash_data *p_itr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+	rid = vcpu_get_rr(vcpu, ifa);
+	rid = rid & RR_RID_MASK;
+	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
+	vcpu_set_tr(p_itr, pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
+}
+
+
+void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
+{
+	u64 gpfn;
+	u64 ps, va, rid;
+	struct thash_data *p_dtr;
+
+	ps = itir_ps(itir);
+	va = PAGEALIGN(ifa, ps);
+	pte &= ~PAGE_FLAGS_RV_MASK;
+
+	if (ps != _PAGE_SIZE_16M)
+		thash_purge_entries(vcpu, va, ps);
+	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+	if (__gpfn_is_io(gpfn))
+		pte |= VTLB_PTE_IO;
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
+	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
+							pte, itir, va, rid);
+	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
+}
+
+void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
+		vcpu->arch.dtrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
+{
+	int index;
+	u64 va;
+
+	va = PAGEALIGN(ifa, ps);
+	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
+		vcpu->arch.itrs[index].page_flags = 0;
+
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	va = PAGEALIGN(va, ps);
+	thash_purge_entries(vcpu, va, ps);
+}
+
+void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
+{
+	thash_purge_all(vcpu);
+}
+
+void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	long psr;
+	local_irq_save(psr);
+	p->exit_reason = EXIT_REASON_PTC_G;
+
+	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
+	p->u.ptc_g_data.vaddr = va;
+	p->u.ptc_g_data.ps = ps;
+	vmm_transition(vcpu);
+	/* Do Local Purge Here*/
+	vcpu_ptc_l(vcpu, va, ps);
+	local_irq_restore(psr);
+}
+
+
+void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
+{
+	vcpu_ptc_ga(vcpu, va, ps);
+}
+
+void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	vcpu_ptc_e(vcpu, ifa);
+}
+
+void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long ifa, itir;
+
+	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
+	itir = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
+}
+
+void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
+}
+
+
+
+void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte, slot;
+
+	slot = vcpu_get_gr(vcpu, inst.M45.r3);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
+}
+
+void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_d(vcpu, pte, itir, ifa);
+}
+
+void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long itir, ifa, pte;
+
+	itir = vcpu_get_itir(vcpu);
+	ifa = vcpu_get_ifa(vcpu);
+	pte = vcpu_get_gr(vcpu, inst.M45.r2);
+	vcpu_itc_i(vcpu, pte, itir, ifa);
+}
+
+/*************************************
+ * Moves to semi-privileged registers
+ *************************************/
+
+void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long imm;
+
+	if (inst.M30.s)
+		imm = -inst.M30.imm;
+	else
+		imm = inst.M30.imm;
+
+	vcpu_set_itc(vcpu, imm);
+}
+
+void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
+	vcpu_set_itc(vcpu, r2);
+}
+
+
+void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r1;
+
+	r1 = vcpu_get_itc(vcpu);
+	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
+}
+/**************************************************************************
+  struct kvm_vcpu*protection key register access routines
+ **************************************************************************/
+
+unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	return ((unsigned long)ia64_get_pkr(reg));
+}
+
+void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
+{
+	ia64_set_pkr(reg, val);
+}
+
+
+unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa)
+{
+	union ia64_rr rr, rr1;
+
+	rr.val = vcpu_get_rr(vcpu, ifa);
+	rr1.val = 0;
+	rr1.ps = rr.ps;
+	rr1.rid = rr.rid;
+	return (rr1.val);
+}
+
+
+
+/********************************
+ * Moves to privileged registers
+ ********************************/
+unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
+					unsigned long val)
+{
+	union ia64_rr oldrr, newrr;
+	unsigned long rrval;
+	struct exit_ctl_data *p = &vcpu->arch.exit_data;
+	unsigned long psr;
+
+	oldrr.val = vcpu_get_rr(vcpu, reg);
+	newrr.val = val;
+	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
+
+	switch ((unsigned long)(reg >> VRN_SHIFT)) {
+	case VRN6:
+		vcpu->arch.vmm_rr = vrrtomrr(val);
+		local_irq_save(psr);
+		p->exit_reason = EXIT_REASON_SWITCH_RR6;
+		vmm_transition(vcpu);
+		local_irq_restore(psr);
+		break;
+	case VRN4:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr4 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	case VRN0:
+		rrval = vrrtomrr(val);
+		vcpu->arch.metaphysical_saved_rr0 = rrval;
+		if (!is_physical_mode(vcpu))
+			ia64_set_rr(reg, rrval);
+		break;
+	default:
+		ia64_set_rr(reg, vrrtomrr(val));
+		break;
+	}
+
+	return (IA64_NO_FAULT);
+}
+
+
+
+void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_rr(vcpu, r3, r2);
+}
+
+void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+}
+
+void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmc(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pmd(vcpu, r3, r2);
+}
+
+void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	u64 r3, r2;
+
+	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
+	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
+	vcpu_set_pkr(vcpu, r3, r2);
+}
+
+
+
+void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_rr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pkr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_dbr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_ibr(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_pmc(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+
+unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
+{
+	/* FIXME: This could get called as a result of a rsvd-reg fault */
+	if (reg > (ia64_get_cpuid(3) & 0xff))
+		return 0;
+	else
+		return ia64_get_cpuid(reg);
+}
+
+void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r3, r1;
+
+	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
+	r1 = vcpu_get_cpuid(vcpu, r3);
+	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
+}
+
+void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	VCPU(vcpu, tpr) = val;
+	vcpu->arch.irq_check = 1;
+}
+
+unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long r2;
+
+	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
+	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
+
+	switch (inst.M32.cr3) {
+	case 0:
+		vcpu_set_dcr(vcpu, r2);
+		break;
+	case 1:
+		vcpu_set_itm(vcpu, r2);
+		break;
+	case 66:
+		vcpu_set_tpr(vcpu, r2);
+		break;
+	case 67:
+		vcpu_set_eoi(vcpu, r2);
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+
+unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long tgt = inst.M33.r1;
+	unsigned long val;
+
+	switch (inst.M33.cr3) {
+	case 65:
+		val = vcpu_get_ivr(vcpu);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+
+	case 67:
+		vcpu_set_gr(vcpu, tgt, 0L, 0);
+		break;
+	default:
+		val = VCPU(vcpu, vcr[inst.M33.cr3]);
+		vcpu_set_gr(vcpu, tgt, val, 0);
+		break;
+	}
+
+	return 0;
+}
+
+
+
+void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
+{
+
+	unsigned long mask;
+	struct kvm_pt_regs *regs;
+	struct ia64_psr old_psr, new_psr;
+
+	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	regs = vcpu_regs(vcpu);
+	/* We only support guest as:
+	 *  vpsr.pk = 0
+	 *  vpsr.is = 0
+	 * Otherwise panic
+	 */
+	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
+		panic_vm(vcpu);
+
+	/*
+	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
+	 * Since these bits will become 0, after success execution of each
+	 * instruction, we will change set them to mIA64_PSR
+	 */
+	VCPU(vcpu, vpsr) = val
+		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
+			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
+
+	if (!old_psr.i && (val & IA64_PSR_I)) {
+		/* vpsr.i 0->1 */
+		vcpu->arch.irq_check = 1;
+	}
+	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	/*
+	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
+	 * , except for the following bits:
+	 *  ic/i/dt/si/rt/mc/it/bn/vm
+	 */
+	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
+		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
+		IA64_PSR_VM;
+
+	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
+
+	check_mm_mode_switch(vcpu, old_psr, new_psr);
+
+	return ;
+}
+
+unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
+{
+	struct ia64_psr vpsr;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+
+	if (!vpsr.ic)
+		VCPU(vcpu, ifs) = regs->cr_ifs;
+	regs->cr_ifs = IA64_IFS_V;
+	return (IA64_NO_FAULT);
+}
+
+
+
+/**************************************************************************
+  VCPU banked general register access routines
+ **************************************************************************/
+#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {     							\
+		__asm__ __volatile__ (					\
+				";;extr.u %0 = %3,%6,16;;\n"		\
+				"dep %1 = %0, %1, 0, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 16, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
+				"r"(*runat), "r"(b1unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw0(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+
+	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
+		for (i = 0; i < 16; i++) {
+			*b1++ = *r;
+			*r++ = *b0++;
+		}
+		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
+	}
+}
+
+#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
+	do {             						\
+		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
+				"dep %1 = %0, %1, 16, 16;;\n"		\
+				"st8 [%4] = %1\n"			\
+				"extr.u %0 = %2, 0, 16;;\n"		\
+				"dep %3 = %0, %3, %6, 16;;\n"		\
+				"st8 [%5] = %3\n"			\
+				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
+				"r"(*runat), "r"(b0unat), "r"(runat),	\
+				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
+	} while (0)
+
+void vcpu_bsw1(struct kvm_vcpu *vcpu)
+{
+	unsigned long i;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	unsigned long *r = &regs->r16;
+	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
+	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
+	unsigned long *runat = &regs->eml_unat;
+	unsigned long *b0unat = &VCPU(vcpu, vbnat);
+	unsigned long *b1unat = &VCPU(vcpu, vnat);
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
+		for (i = 0; i < 16; i++) {
+			*b0++ = *r;
+			*r++ = *b1++;
+		}
+		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
+				VMM_PT_REGS_R16_SLOT);
+		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
+	}
+}
+
+
+
+
+void vcpu_rfi(struct kvm_vcpu *vcpu)
+{
+	unsigned long ifs, psr;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	psr = VCPU(vcpu, ipsr);
+	if (psr & IA64_PSR_BN)
+		vcpu_bsw1(vcpu);
+	else
+		vcpu_bsw0(vcpu);
+	vcpu_set_psr(vcpu, psr);
+	ifs = VCPU(vcpu, ifs);
+	if (ifs >> 63)
+		regs->cr_ifs = ifs;
+	regs->cr_iip = VCPU(vcpu, iip);
+}
+
+
+/*
+   VPSR can't keep track of below bits of guest PSR
+   This function gets guest PSR
+ */
+
+unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
+{
+	unsigned long mask;
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+
+	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
+		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
+	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
+}
+
+void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
+					| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr &= (~imm24);
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long vpsr;
+	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
+				| inst.M44.imm;
+
+	vpsr = vcpu_get_psr(vcpu);
+	vpsr |= imm24;
+	vcpu_set_psr(vcpu, vpsr);
+}
+
+/* Generate Mask
+ * Parameter:
+ *  bit -- starting bit
+ *  len -- how many bits
+ */
+#define MASK(bit,len)				   	\
+({							\
+		__u64	ret;				\
+							\
+		__asm __volatile("dep %0=-1, r0, %1, %2"\
+				: "=r" (ret):		\
+		  "M" (bit),				\
+		  "M" (len));				\
+		ret;					\
+})
+
+void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
+{
+	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
+	vcpu_set_psr(vcpu, val);
+}
+
+void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_gr(vcpu, inst.M35.r2);
+	vcpu_set_psr_l(vcpu, val);
+}
+
+void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
+{
+	unsigned long val;
+
+	val = vcpu_get_psr(vcpu);
+	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
+	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
+}
+
+void vcpu_increment_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+	if (ipsr->ri == 2) {
+		ipsr->ri = 0;
+		regs->cr_iip += 16;
+	} else
+		ipsr->ri++;
+}
+
+void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
+	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
+
+	if (ipsr->ri == 0) {
+		ipsr->ri = 2;
+		regs->cr_iip -= 16;
+	} else
+		ipsr->ri--;
+}
+
+/** Emulate a privileged operation.
+ *
+ *
+ * @param vcpu virtual cpu
+ * @cause the reason cause virtualization fault
+ * @opcode the instruction code which cause virtualization fault
+ */
+
+void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
+{
+	unsigned long status, cause, opcode ;
+	INST64 inst;
+
+	status = IA64_NO_FAULT;
+	cause = VMX(vcpu, cause);
+	opcode = VMX(vcpu, opcode);
+	inst.inst = opcode;
+	/*
+	 * Switch to actual virtual rid in rr0 and rr4,
+	 * which is required by some tlb related instructions.
+	 */
+	prepare_if_physical_mode(vcpu);
+
+	switch (cause) {
+	case EVENT_RSM:
+		kvm_rsm(vcpu, inst);
+		break;
+	case EVENT_SSM:
+		kvm_ssm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PSR:
+		kvm_mov_to_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PSR:
+		kvm_mov_from_psr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CR:
+		kvm_mov_from_cr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_CR:
+		kvm_mov_to_cr(vcpu, inst);
+		break;
+	case EVENT_BSW_0:
+		vcpu_bsw0(vcpu);
+		break;
+	case EVENT_BSW_1:
+		vcpu_bsw1(vcpu);
+		break;
+	case EVENT_COVER:
+		vcpu_cover(vcpu);
+		break;
+	case EVENT_RFI:
+		vcpu_rfi(vcpu);
+		break;
+	case EVENT_ITR_D:
+		kvm_itr_d(vcpu, inst);
+		break;
+	case EVENT_ITR_I:
+		kvm_itr_i(vcpu, inst);
+		break;
+	case EVENT_PTR_D:
+		kvm_ptr_d(vcpu, inst);
+		break;
+	case EVENT_PTR_I:
+		kvm_ptr_i(vcpu, inst);
+		break;
+	case EVENT_ITC_D:
+		kvm_itc_d(vcpu, inst);
+		break;
+	case EVENT_ITC_I:
+		kvm_itc_i(vcpu, inst);
+		break;
+	case EVENT_PTC_L:
+		kvm_ptc_l(vcpu, inst);
+		break;
+	case EVENT_PTC_G:
+		kvm_ptc_g(vcpu, inst);
+		break;
+	case EVENT_PTC_GA:
+		kvm_ptc_ga(vcpu, inst);
+		break;
+	case EVENT_PTC_E:
+		kvm_ptc_e(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_RR:
+		kvm_mov_to_rr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_RR:
+		kvm_mov_from_rr(vcpu, inst);
+		break;
+	case EVENT_THASH:
+		kvm_thash(vcpu, inst);
+		break;
+	case EVENT_TTAG:
+		kvm_ttag(vcpu, inst);
+		break;
+	case EVENT_TPA:
+		status = kvm_tpa(vcpu, inst);
+		break;
+	case EVENT_TAK:
+		kvm_tak(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR_IMM:
+		kvm_mov_to_ar_imm(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_AR:
+		kvm_mov_to_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_AR:
+		kvm_mov_from_ar_reg(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_DBR:
+		kvm_mov_to_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_IBR:
+		kvm_mov_to_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMC:
+		kvm_mov_to_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PMD:
+		kvm_mov_to_pmd(vcpu, inst);
+		break;
+	case EVENT_MOV_TO_PKR:
+		kvm_mov_to_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_DBR:
+		kvm_mov_from_dbr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_IBR:
+		kvm_mov_from_ibr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PMC:
+		kvm_mov_from_pmc(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_PKR:
+		kvm_mov_from_pkr(vcpu, inst);
+		break;
+	case EVENT_MOV_FROM_CPUID:
+		kvm_mov_from_cpuid(vcpu, inst);
+		break;
+	case EVENT_VMSW:
+		status = IA64_FAULT;
+		break;
+	default:
+		break;
+	};
+	/*Assume all status is NO_FAULT ?*/
+	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
+		vcpu_increment_iip(vcpu);
+
+	recover_if_physical_mode(vcpu);
+}
+
+void init_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	vcpu->arch.mode_flags = GUEST_IN_PHY;
+	VMX(vcpu, vrr[0]) = 0x38;
+	VMX(vcpu, vrr[1]) = 0x38;
+	VMX(vcpu, vrr[2]) = 0x38;
+	VMX(vcpu, vrr[3]) = 0x38;
+	VMX(vcpu, vrr[4]) = 0x38;
+	VMX(vcpu, vrr[5]) = 0x38;
+	VMX(vcpu, vrr[6]) = 0x38;
+	VMX(vcpu, vrr[7]) = 0x38;
+	VCPU(vcpu, vpsr) = IA64_PSR_BN;
+	VCPU(vcpu, dcr) = 0;
+	/* pta.size must not be 0.  The minimum is 15 (32k) */
+	VCPU(vcpu, pta) = 15 << 2;
+	VCPU(vcpu, itv) = 0x10000;
+	VCPU(vcpu, itm) = 0;
+	VMX(vcpu, last_itc) = 0;
+
+	VCPU(vcpu, lid) = VCPU_LID(vcpu);
+	VCPU(vcpu, ivr) = 0;
+	VCPU(vcpu, tpr) = 0x10000;
+	VCPU(vcpu, eoi) = 0;
+	VCPU(vcpu, irr[0]) = 0;
+	VCPU(vcpu, irr[1]) = 0;
+	VCPU(vcpu, irr[2]) = 0;
+	VCPU(vcpu, irr[3]) = 0;
+	VCPU(vcpu, pmv) = 0x10000;
+	VCPU(vcpu, cmcv) = 0x10000;
+	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
+	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
+	update_vhpi(vcpu, NULL_VECTOR);
+	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
+
+	for (i = 0; i < 4; i++)
+		VLSAPIC_INSVC(vcpu, i) = 0;
+}
+
+void kvm_init_all_rr(struct kvm_vcpu *vcpu)
+{
+	unsigned long psr;
+
+	local_irq_save(psr);
+
+	/* WARNING: not allow co-exist of both virtual mode and physical
+	 * mode in same region
+	 */
+
+	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
+	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
+
+	if (is_physical_mode(vcpu)) {
+		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
+			panic_vm(vcpu);
+
+		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
+		ia64_dv_serialize_data();
+	} else {
+		ia64_set_rr((VRN0 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr0);
+		ia64_dv_serialize_data();
+		ia64_set_rr((VRN4 << VRN_SHIFT),
+				vcpu->arch.metaphysical_saved_rr4);
+		ia64_dv_serialize_data();
+	}
+	ia64_set_rr((VRN1 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN1])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN2 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN2])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN3 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN3])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN5 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN5])));
+	ia64_dv_serialize_data();
+	ia64_set_rr((VRN7 << VRN_SHIFT),
+			vrrtomrr(VMX(vcpu, vrr[VRN7])));
+	ia64_dv_serialize_data();
+	ia64_srlz_d();
+	ia64_set_psr(psr);
+}
+
+int vmm_entry(void)
+{
+	struct kvm_vcpu *v;
+	v = current_vcpu;
+
+	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
+						0, 0, 0, 0, 0, 0);
+	kvm_init_vtlb(v);
+	kvm_init_vhpt(v);
+	init_vcpu(v);
+	kvm_init_all_rr(v);
+	vmm_reset_entry();
+
+	return 0;
+}
+
+void panic_vm(struct kvm_vcpu *v)
+{
+	struct exit_ctl_data *p = &v->arch.exit_data;
+
+	p->exit_reason = EXIT_REASON_VM_PANIC;
+	vmm_transition(v);
+	/*Never to return*/
+	while (1);
+}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
new file mode 100644
index 000000000000..b0fcfb62c49e
--- /dev/null
+++ b/arch/ia64/kvm/vcpu.h
@@ -0,0 +1,740 @@
+/*
+ *  vcpu.h: vcpu routines
+ *  	Copyright (c) 2005, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
+ *
+ * 	Copyright (c) 2007, Intel Corporation.
+ *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
+ *	Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+
+#ifndef __KVM_VCPU_H__
+#define __KVM_VCPU_H__
+
+#include <asm/types.h>
+#include <asm/fpu.h>
+#include <asm/processor.h>
+
+#ifndef __ASSEMBLY__
+#include "vti.h"
+
+#include <linux/kvm_host.h>
+#include <linux/spinlock.h>
+
+typedef unsigned long IA64_INST;
+
+typedef union U_IA64_BUNDLE {
+	unsigned long i64[2];
+	struct { unsigned long template:5, slot0:41, slot1a:18,
+		slot1b:23, slot2:41; };
+	/* NOTE: following doesn't work because bitfields can't cross natural
+	   size boundaries
+	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
+} IA64_BUNDLE;
+
+typedef union U_INST64_A5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
+		imm9d:9, s:1, major:4; };
+} INST64_A5;
+
+typedef union U_INST64_B4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
+		wh:2, d:1, un1:1, major:4; };
+} INST64_B4;
+
+typedef union U_INST64_B8 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
+} INST64_B8;
+
+typedef union U_INST64_B9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
+} INST64_B9;
+
+typedef union U_INST64_I19 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
+} INST64_I19;
+
+typedef union U_INST64_I26 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I26;
+
+typedef union U_INST64_I27 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
+} INST64_I27;
+
+typedef union U_INST64_I28 { /* not privileged (mov from AR) */
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_I28;
+
+typedef union U_INST64_M28 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M28;
+
+typedef union U_INST64_M29 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M29;
+
+typedef union U_INST64_M30 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
+		x3:3, s:1, major:4; };
+} INST64_M30;
+
+typedef union U_INST64_M31 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M31;
+
+typedef union U_INST64_M32 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M32;
+
+typedef union U_INST64_M33 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M33;
+
+typedef union U_INST64_M35 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+
+} INST64_M35;
+
+typedef union U_INST64_M36 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
+} INST64_M36;
+
+typedef union U_INST64_M37 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
+		i:1, major:4; };
+} INST64_M37;
+
+typedef union U_INST64_M41 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
+} INST64_M41;
+
+typedef union U_INST64_M42 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M42;
+
+typedef union U_INST64_M43 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M43;
+
+typedef union U_INST64_M44 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
+} INST64_M44;
+
+typedef union U_INST64_M45 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
+} INST64_M45;
+
+typedef union U_INST64_M46 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
+		x3:3, un1:1, major:4; };
+} INST64_M46;
+
+typedef union U_INST64_M47 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
+} INST64_M47;
+
+typedef union U_INST64_M1{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M1;
+
+typedef union U_INST64_M2{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M2;
+
+typedef union U_INST64_M3{
+	IA64_INST inst;
+	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M3;
+
+typedef union U_INST64_M4 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M4;
+
+typedef union U_INST64_M5 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M5;
+
+typedef union U_INST64_M6 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M6;
+
+typedef union U_INST64_M9 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M9;
+
+typedef union U_INST64_M10 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M10;
+
+typedef union U_INST64_M12 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
+		x6:6, m:1, major:4; };
+} INST64_M12;
+
+typedef union U_INST64_M15 {
+	IA64_INST inst;
+	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
+		x6:6, s:1, major:4; };
+} INST64_M15;
+
+typedef union U_INST64 {
+	IA64_INST inst;
+	struct { unsigned long :37, major:4; } generic;
+	INST64_A5 A5;	/* used in build_hypercall_bundle only */
+	INST64_B4 B4;	/* used in build_hypercall_bundle only */
+	INST64_B8 B8;	/* rfi, bsw.[01] */
+	INST64_B9 B9;	/* break.b */
+	INST64_I19 I19;	/* used in build_hypercall_bundle only */
+	INST64_I26 I26;	/* mov register to ar (I unit) */
+	INST64_I27 I27;	/* mov immediate to ar (I unit) */
+	INST64_I28 I28;	/* mov from ar (I unit) */
+	INST64_M1  M1;	/* ld integer */
+	INST64_M2  M2;
+	INST64_M3  M3;
+	INST64_M4  M4;	/* st integer */
+	INST64_M5  M5;
+	INST64_M6  M6;	/* ldfd floating pointer 		*/
+	INST64_M9  M9;	/* stfd floating pointer		*/
+	INST64_M10 M10;	/* stfd floating pointer		*/
+	INST64_M12 M12;     /* ldfd pair floating pointer		*/
+	INST64_M15 M15;	/* lfetch + imm update			*/
+	INST64_M28 M28;	/* purge translation cache entry	*/
+	INST64_M29 M29;	/* mov register to ar (M unit)		*/
+	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
+	INST64_M31 M31;	/* mov from ar (M unit)			*/
+	INST64_M32 M32;	/* mov reg to cr			*/
+	INST64_M33 M33;	/* mov from cr				*/
+	INST64_M35 M35;	/* mov to psr				*/
+	INST64_M36 M36;	/* mov from psr				*/
+	INST64_M37 M37;	/* break.m				*/
+	INST64_M41 M41;	/* translation cache insert		*/
+	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
+	INST64_M43 M43;	/* mov from indirect reg		*/
+	INST64_M44 M44;	/* set/reset system mask		*/
+	INST64_M45 M45;	/* translation purge			*/
+	INST64_M46 M46;	/* translation access (tpa,tak)		*/
+	INST64_M47 M47;	/* purge translation entry		*/
+} INST64;
+
+#define MASK_41 ((unsigned long)0x1ffffffffff)
+
+/* Virtual address memory attributes encoding */
+#define VA_MATTR_WB         0x0
+#define VA_MATTR_UC         0x4
+#define VA_MATTR_UCE        0x5
+#define VA_MATTR_WC         0x6
+#define VA_MATTR_NATPAGE    0x7
+
+#define PMASK(size)         (~((size) - 1))
+#define PSIZE(size)         (1UL<<(size))
+#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
+#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
+#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
+#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
+
+#define ARCH_PAGE_SHIFT   12
+
+#define INVALID_TI_TAG (1UL << 63)
+
+#define VTLB_PTE_P_BIT      0
+#define VTLB_PTE_IO_BIT     60
+#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
+#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
+
+#define vcpu_quick_region_check(_tr_regions,_ifa)		\
+	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
+
+#define vcpu_quick_region_set(_tr_regions,_ifa)             \
+	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
+
+static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
+		u64 va, u64 rid)
+{
+	trp->page_flags = pte;
+	trp->itir = itir;
+	trp->vadr = va;
+	trp->rid = rid;
+}
+
+extern u64 kvm_lookup_mpa(u64 gpfn);
+extern u64 kvm_gpa_to_mpa(u64 gpa);
+
+/* Return I/O type if trye */
+#define __gpfn_is_io(gpfn)			\
+	({						\
+	 u64 pte, ret = 0;			\
+	 pte = kvm_lookup_mpa(gpfn);		\
+	 if (!(pte & GPFN_INV_MASK))		\
+	 ret = pte & GPFN_IO_MASK;	\
+	 ret;					\
+	 })
+
+#endif
+
+#define IA64_NO_FAULT	0
+#define IA64_FAULT	1
+
+#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
+
+#define SW_BAD  0   /* Bad mode transitition */
+#define SW_V2P  1   /* Physical emulatino is activated */
+#define SW_P2V  2   /* Exit physical mode emulation */
+#define SW_SELF 3   /* No mode transition */
+#define SW_NOP  4   /* Mode transition, but without action required */
+
+#define GUEST_IN_PHY    0x1
+#define GUEST_PHY_EMUL  0x2
+
+#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
+
+#define VRN_SHIFT	61
+#define VRN_MASK	0xe000000000000000
+#define VRN0		0x0UL
+#define VRN1		0x1UL
+#define VRN2		0x2UL
+#define VRN3		0x3UL
+#define VRN4		0x4UL
+#define VRN5		0x5UL
+#define VRN6		0x6UL
+#define VRN7		0x7UL
+
+#define IRQ_NO_MASKED         0
+#define IRQ_MASKED_BY_VTPR    1
+#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
+
+#define PTA_BASE_SHIFT      15
+
+#define IA64_PSR_VM_BIT     46
+#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
+
+/* Interruption Function State */
+#define IA64_IFS_V_BIT      63
+#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
+
+#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
+#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
+
+#ifndef __ASSEMBLY__
+
+#include <asm/gcc_intrin.h>
+
+#define is_physical_mode(v)		\
+	((v->arch.mode_flags) & GUEST_IN_PHY)
+
+#define is_virtual_mode(v)	\
+	(!is_physical_mode(v))
+
+#define MODE_IND(psr)	\
+	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
+
+#define _vmm_raw_spin_lock(x)						\
+	do {								\
+		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
+		__u64 ia64_spinlock_val;				\
+		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+		if (unlikely(ia64_spinlock_val)) {			\
+			do {						\
+				while (*ia64_spinlock_ptr)		\
+				ia64_barrier();				\
+				ia64_spinlock_val =			\
+				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
+			} while (ia64_spinlock_val);			\
+		}							\
+	} while (0)
+
+#define _vmm_raw_spin_unlock(x)				\
+	do { barrier();				\
+		((spinlock_t *)x)->raw_lock.lock = 0; } \
+while (0)
+
+void vmm_spin_lock(spinlock_t *lock);
+void vmm_spin_unlock(spinlock_t *lock);
+enum {
+	I_TLB = 1,
+	D_TLB = 2
+};
+
+union kvm_va {
+	struct {
+		unsigned long off : 60;		/* intra-region offset */
+		unsigned long reg :  4;		/* region number */
+	} f;
+	unsigned long l;
+	void *p;
+};
+
+#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+						_v.f.reg = 0; _v.l; })
+#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
+				_v.f.reg = -1; _v.p; })
+
+#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.rid; })
+#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
+						_v.ps; })
+#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
+						_v.ve; })
+
+enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
+enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
+
+#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
+#define VMX(_v, _x)  ((_v)->arch._x)
+
+#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
+#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
+
+static inline unsigned long itir_ps(unsigned long itir)
+{
+	return ((itir >> 2) & 0x3f);
+}
+
+
+/**************************************************************************
+  VCPU control register access routines
+ **************************************************************************/
+
+static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, itir));
+}
+
+static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, itir) = val;
+}
+
+static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, ifa));
+}
+
+static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifa) = val;
+}
+
+static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, iva));
+}
+
+static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, pta));
+}
+
+static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, lid));
+}
+
+static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, tpr));
+}
+
+static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
+{
+	return (0UL);		/*reads of eoi always return 0 */
+}
+
+static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[0]));
+}
+
+static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[1]));
+}
+
+static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[2]));
+}
+
+static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
+{
+	return ((u64)VCPU(vcpu, irr[3]));
+}
+
+static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
+{
+	ia64_setreg(_IA64_REG_CR_DCR, val);
+}
+
+static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, isr) = val;
+}
+
+static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, lid) = val;
+}
+
+static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ipsr) = val;
+}
+
+static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iip) = val;
+}
+
+static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, ifs) = val;
+}
+
+static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iipa) = val;
+}
+
+static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
+{
+	VCPU(vcpu, iha) = val;
+}
+
+
+static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return vcpu->arch.vrr[reg>>61];
+}
+
+/**************************************************************************
+  VCPU debug breakpoint register access routines
+ **************************************************************************/
+
+static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	__ia64_set_dbr(reg, val);
+}
+
+static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	ia64_set_ibr(reg, val);
+}
+
+static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)__ia64_get_dbr(reg));
+}
+
+static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
+{
+	return ((u64)ia64_get_ibr(reg));
+}
+
+/**************************************************************************
+  VCPU performance monitor register access routines
+ **************************************************************************/
+static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMC registers are discarded */
+	ia64_set_pmc(reg, val);
+}
+
+static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
+{
+	/* NOTE: Writes to unimplemented PMD registers are discarded */
+	ia64_set_pmd(reg, val);
+}
+
+static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMC registers return zero */
+	return ((u64)ia64_get_pmc(reg));
+}
+
+static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
+{
+	/* NOTE: Reads from unimplemented PMD registers return zero */
+	return ((u64)ia64_get_pmd(reg));
+}
+
+static inline unsigned long vrrtomrr(unsigned long val)
+{
+	union ia64_rr rr;
+	rr.val = val;
+	rr.rid = (rr.rid << 4) | 0xe;
+	if (rr.ps > PAGE_SHIFT)
+		rr.ps = PAGE_SHIFT;
+	rr.ve = 1;
+	return rr.val;
+}
+
+
+static inline int highest_bits(int *dat)
+{
+	u32  bits, bitnum;
+	int i;
+
+	/* loop for all 256 bits */
+	for (i = 7; i >= 0 ; i--) {
+		bits = dat[i];
+		if (bits) {
+			bitnum = fls(bits);
+			return i * 32 + bitnum - 1;
+		}
+	}
+	return NULL_VECTOR;
+}
+
+/*
+ * The pending irq is higher than the inservice one.
+ *
+ */
+static inline int is_higher_irq(int pending, int inservice)
+{
+	return ((pending > inservice)
+			|| ((pending != NULL_VECTOR)
+				&& (inservice == NULL_VECTOR)));
+}
+
+static inline int is_higher_class(int pending, int mic)
+{
+	return ((pending >> 4) > mic);
+}
+
+/*
+ * Return 0-255 for pending irq.
+ *        NULL_VECTOR: when no pending.
+ */
+static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
+{
+	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&VCPU(vcpu, irr[0]));
+}
+
+static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
+{
+	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
+		return NMI_VECTOR;
+	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
+		return ExtINT_VECTOR;
+
+	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
+}
+
+extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+					struct ia64_fpreg *val);
+extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg,
+					struct ia64_fpreg *val);
+extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg);
+extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat);
+extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu);
+extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val);
+extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
+extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
+extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
+					u64 itir, u64 va, int type);
+extern struct thash_data *vhpt_lookup(u64 va);
+extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
+extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
+extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
+extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
+extern int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
+		u64 itir, u64 ifa, int type);
+extern void thash_purge_all(struct kvm_vcpu *v);
+extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
+						u64 va, int is_data);
+extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
+						u64 ps, int is_data);
+
+extern void vcpu_increment_iip(struct kvm_vcpu *v);
+extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
+extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
+extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
+extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
+extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
+extern void nested_dtlb(struct kvm_vcpu *vcpu);
+extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
+extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
+
+extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
+extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
+
+extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
+extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
+extern void vmm_transition(struct kvm_vcpu *vcpu);
+extern void vmm_trampoline(union context *from, union context *to);
+extern int vmm_entry(void);
+extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
+
+extern void vmm_reset_entry(void);
+void kvm_init_vtlb(struct kvm_vcpu *v);
+void kvm_init_vhpt(struct kvm_vcpu *v);
+void thash_init(struct thash_cb *hcb, u64 sz);
+
+void panic_vm(struct kvm_vcpu *v);
+
+extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
+		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
+#endif
+#endif	/* __VCPU_H__ */
diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
new file mode 100644
index 000000000000..2275bf4e681a
--- /dev/null
+++ b/arch/ia64/kvm/vmm.c
@@ -0,0 +1,66 @@
+/*
+ * vmm.c: vmm module interface with kvm module
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ *  Xiantao Zhang (xiantao.zhang@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+
+#include<linux/module.h>
+#include<asm/fpswa.h>
+
+#include "vcpu.h"
+
+MODULE_AUTHOR("Intel");
+MODULE_LICENSE("GPL");
+
+extern char kvm_ia64_ivt;
+extern fpswa_interface_t *vmm_fpswa_interface;
+
+struct kvm_vmm_info vmm_info = {
+	.module	     = THIS_MODULE,
+	.vmm_entry   = vmm_entry,
+	.tramp_entry = vmm_trampoline,
+	.vmm_ivt     = (unsigned long)&kvm_ia64_ivt,
+};
+
+static int __init  kvm_vmm_init(void)
+{
+
+	vmm_fpswa_interface = fpswa_interface;
+
+	/*Register vmm data to kvm side*/
+	return kvm_init(&vmm_info, 1024, THIS_MODULE);
+}
+
+static void __exit kvm_vmm_exit(void)
+{
+	kvm_exit();
+	return ;
+}
+
+void vmm_spin_lock(spinlock_t *lock)
+{
+	_vmm_raw_spin_lock(lock);
+}
+
+void vmm_spin_unlock(spinlock_t *lock)
+{
+	_vmm_raw_spin_unlock(lock);
+}
+module_init(kvm_vmm_init)
+module_exit(kvm_vmm_exit)
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
new file mode 100644
index 000000000000..3ee5f481c06d
--- /dev/null
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -0,0 +1,1424 @@
+/*
+ * /ia64/kvm_ivt.S
+ *
+ * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
+ *      Stephane Eranian <eranian@hpl.hp.com>
+ *      David Mosberger <davidm@hpl.hp.com>
+ * Copyright (C) 2000, 2002-2003 Intel Co
+ *      Asit Mallick <asit.k.mallick@intel.com>
+ *      Suresh Siddha <suresh.b.siddha@intel.com>
+ *      Kenneth Chen <kenneth.w.chen@intel.com>
+ *      Fenghua Yu <fenghua.yu@intel.com>
+ *
+ *
+ * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
+ * for SMP
+ * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
+ * handler now uses virtual PT.
+ *
+ * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
+ *              Supporting Intel virtualization architecture
+ *
+ */
+
+/*
+ * This file defines the interruption vector table used by the CPU.
+ * It does not include one entry per possible cause of interruption.
+ *
+ * The first 20 entries of the table contain 64 bundles each while the
+ * remaining 48 entries contain only 16 bundles each.
+ *
+ * The 64 bundles are used to allow inlining the whole handler for
+ * critical
+ * interruptions like TLB misses.
+ *
+ *  For each entry, the comment is as follows:
+ *
+ *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
+ *              (12,51)
+ *  entry offset ----/     /         /                  /
+ *  /
+ *  entry number ---------/         /                  /
+ *  /
+ *  size of the entry -------------/                  /
+ *  /
+ *  vector name -------------------------------------/
+ *  /
+ *  interruptions triggering this vector
+ *  ----------------------/
+ *
+ * The table is 32KB in size and must be aligned on 32KB
+ * boundary.
+ * (The CPU ignores the 15 lower bits of the address)
+ *
+ * Table is based upon EAS2.6 (Oct 1999)
+ */
+
+
+#include <asm/asmmacro.h>
+#include <asm/cache.h>
+#include <asm/pgtable.h>
+
+#include "asm-offsets.h"
+#include "vcpu.h"
+#include "kvm_minstate.h"
+#include "vti.h"
+
+#if 1
+# define PSR_DEFAULT_BITS   psr.ac
+#else
+# define PSR_DEFAULT_BITS   0
+#endif
+
+
+#define KVM_FAULT(n)    \
+    kvm_fault_##n:;          \
+    mov r19=n;;          \
+    br.sptk.many kvm_fault_##n;         \
+    ;;                  \
+
+
+#define KVM_REFLECT(n)    \
+    mov r31=pr;           \
+    mov r19=n;       /* prepare to save predicates */ \
+    mov r29=cr.ipsr;      \
+    ;;      \
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
+(p7)br.sptk.many kvm_dispatch_reflection;        \
+    br.sptk.many kvm_panic;      \
+
+
+GLOBAL_ENTRY(kvm_panic)
+    br.sptk.many kvm_panic
+    ;;
+END(kvm_panic)
+
+
+
+
+
+    .section .text.ivt,"ax"
+
+    .align 32768    // align on 32KB boundary
+    .global kvm_ia64_ivt
+kvm_ia64_ivt:
+///////////////////////////////////////////////////////////////
+// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
+ENTRY(kvm_vhpt_miss)
+    KVM_FAULT(0)
+END(kvm_vhpt_miss)
+
+
+    .org kvm_ia64_ivt+0x400
+////////////////////////////////////////////////////////////////
+// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
+ENTRY(kvm_itlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+    (p6) br.sptk kvm_alt_itlb_miss
+    mov r19 = 1
+    br.sptk kvm_itlb_miss_dispatch
+    KVM_FAULT(1);
+END(kvm_itlb_miss)
+
+    .org kvm_ia64_ivt+0x0800
+//////////////////////////////////////////////////////////////////
+// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
+ENTRY(kvm_dtlb_miss)
+    mov r31 = pr
+    mov r29=cr.ipsr;
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
+(p6)br.sptk kvm_alt_dtlb_miss
+    br.sptk kvm_dtlb_miss_dispatch
+END(kvm_dtlb_miss)
+
+     .org kvm_ia64_ivt+0x0c00
+////////////////////////////////////////////////////////////////////
+// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
+ENTRY(kvm_alt_itlb_miss)
+    mov r16=cr.ifa    // get address that caused the TLB miss
+    ;;
+    movl r17=PAGE_KERNEL
+    mov r24=cr.ipsr
+    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+    ;;
+    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+    ;;
+    or r19=r17,r19      // insert PTE control bits into r19
+    ;;
+    movl r20=IA64_GRANULE_SHIFT<<2
+    ;;
+    mov cr.itir=r20
+    ;;
+    itc.i r19		// insert the TLB entry
+    mov pr=r31,-1
+    rfi
+END(kvm_alt_itlb_miss)
+
+    .org kvm_ia64_ivt+0x1000
+/////////////////////////////////////////////////////////////////////
+// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
+ENTRY(kvm_alt_dtlb_miss)
+    mov r16=cr.ifa		// get address that caused the TLB miss
+    ;;
+    movl r17=PAGE_KERNEL
+    movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
+    mov r24=cr.ipsr
+    ;;
+    and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
+    ;;
+    or r19=r19,r17	// insert PTE control bits into r19
+    ;;
+    movl r20=IA64_GRANULE_SHIFT<<2
+    ;;
+    mov cr.itir=r20
+    ;;
+    itc.d r19		// insert the TLB entry
+    mov pr=r31,-1
+    rfi
+END(kvm_alt_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1400
+//////////////////////////////////////////////////////////////////////
+// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
+ENTRY(kvm_nested_dtlb_miss)
+    KVM_FAULT(5)
+END(kvm_nested_dtlb_miss)
+
+    .org kvm_ia64_ivt+0x1800
+/////////////////////////////////////////////////////////////////////
+// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
+ENTRY(kvm_ikey_miss)
+    KVM_REFLECT(6)
+END(kvm_ikey_miss)
+
+    .org kvm_ia64_ivt+0x1c00
+/////////////////////////////////////////////////////////////////////
+// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
+ENTRY(kvm_dkey_miss)
+    KVM_REFLECT(7)
+END(kvm_dkey_miss)
+
+    .org kvm_ia64_ivt+0x2000
+////////////////////////////////////////////////////////////////////
+// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
+ENTRY(kvm_dirty_bit)
+    KVM_REFLECT(8)
+END(kvm_dirty_bit)
+
+    .org kvm_ia64_ivt+0x2400
+////////////////////////////////////////////////////////////////////
+// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
+ENTRY(kvm_iaccess_bit)
+    KVM_REFLECT(9)
+END(kvm_iaccess_bit)
+
+    .org kvm_ia64_ivt+0x2800
+///////////////////////////////////////////////////////////////////
+// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
+ENTRY(kvm_daccess_bit)
+    KVM_REFLECT(10)
+END(kvm_daccess_bit)
+
+    .org kvm_ia64_ivt+0x2c00
+/////////////////////////////////////////////////////////////////
+// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
+ENTRY(kvm_break_fault)
+    mov r31=pr
+    mov r19=11
+    mov r29=cr.ipsr
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!)
+    mov out0=cr.ifa
+    mov out2=cr.isr     // FIXME: pity to make this slow access twice
+    mov out3=cr.iim     // FIXME: pity to make this slow access twice
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15)ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out1=16,sp
+    br.call.sptk.many b6=kvm_ia64_handle_break
+    ;;
+END(kvm_break_fault)
+
+    .org kvm_ia64_ivt+0x3000
+/////////////////////////////////////////////////////////////////
+// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
+ENTRY(kvm_interrupt)
+    mov r31=pr		// prepare to save predicates
+    mov r19=12
+    mov r29=cr.ipsr
+    ;;
+    tbit.z p6,p7=r29,IA64_PSR_VM_BIT
+    tbit.z p0,p15=r29,IA64_PSR_I_BIT
+    ;;
+(p7) br.sptk kvm_dispatch_interrupt
+    ;;
+    mov r27=ar.rsc		/* M */
+    mov r20=r1			/* A */
+    mov r25=ar.unat		/* M */
+    mov r26=ar.pfs		/* I */
+    mov r28=cr.iip		/* M */
+    cover			/* B (or nothing) */
+    ;;
+    mov r1=sp
+    ;;
+    invala			/* M */
+    mov r30=cr.ifs
+    ;;
+    addl r1=-VMM_PT_REGS_SIZE,r1
+    ;;
+    adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
+    adds r16=PT(CR_IPSR),r1
+    ;;
+    lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
+    st8 [r16]=r29			/* save cr.ipsr */
+    ;;
+    lfetch.fault.excl.nt1 [r17]
+    mov r29=b0
+    ;;
+    adds r16=PT(R8),r1  	/* initialize first base pointer */
+    adds r17=PT(R9),r1  	/* initialize second base pointer */
+    mov r18=r0      		/* make sure r18 isn't NaT */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r8,16
+.mem.offset 8,0; st8.spill [r17]=r9,16
+        ;;
+.mem.offset 0,0; st8.spill [r16]=r10,24
+.mem.offset 8,0; st8.spill [r17]=r11,24
+        ;;
+    st8 [r16]=r28,16		/* save cr.iip */
+    st8 [r17]=r30,16		/* save cr.ifs */
+    mov r8=ar.fpsr		/* M */
+    mov r9=ar.csd
+    mov r10=ar.ssd
+    movl r11=FPSR_DEFAULT	/* L-unit */
+    ;;
+    st8 [r16]=r25,16		/* save ar.unat */
+    st8 [r17]=r26,16		/* save ar.pfs */
+    shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
+    ;;
+    st8 [r16]=r27,16		/* save ar.rsc */
+    adds r17=16,r17		/* skip over ar_rnat field */
+    ;;
+    st8 [r17]=r31,16		/* save predicates */
+    adds r16=16,r16		/* skip over ar_bspstore field */
+    ;;
+    st8 [r16]=r29,16		/* save b0 */
+    st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
+.mem.offset 8,0; st8.spill [r17]=r12,16
+    adds r12=-16,r1
+    /* switch to kernel memory stack (with 16 bytes of scratch) */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r13,16
+.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r15,16
+.mem.offset 8,0; st8.spill [r17]=r14,16
+    dep r14=-1,r0,60,4
+    ;;
+.mem.offset 0,0; st8.spill [r16]=r2,16
+.mem.offset 8,0; st8.spill [r17]=r3,16
+    adds r2=VMM_PT_REGS_R16_OFFSET,r1
+    adds r14 = VMM_VCPU_GP_OFFSET,r13
+    ;;
+    mov r8=ar.ccv
+    ld8 r14 = [r14]
+    ;;
+    mov r1=r14       /* establish kernel global pointer */
+    ;;                                          \
+    bsw.1
+    ;;
+    alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
+    mov out0=r13
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i
+    ;;
+    //(p15) ssm psr.i
+    adds r3=8,r2		// set up second base pointer for SAVE_REST
+    srlz.i			// ensure everybody knows psr.ic is back on
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r16,16
+.mem.offset 8,0; st8.spill [r3]=r17,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r18,16
+.mem.offset 8,0; st8.spill [r3]=r19,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r20,16
+.mem.offset 8,0; st8.spill [r3]=r21,16
+    mov r18=b6
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r22,16
+.mem.offset 8,0; st8.spill [r3]=r23,16
+    mov r19=b7
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r24,16
+.mem.offset 8,0; st8.spill [r3]=r25,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r26,16
+.mem.offset 8,0; st8.spill [r3]=r27,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r28,16
+.mem.offset 8,0; st8.spill [r3]=r29,16
+    ;;
+.mem.offset 0,0; st8.spill [r2]=r30,16
+.mem.offset 8,0; st8.spill [r3]=r31,32
+    ;;
+    mov ar.fpsr=r11       /* M-unit */
+    st8 [r2]=r8,8         /* ar.ccv */
+    adds r24=PT(B6)-PT(F7),r3
+    ;;
+    stf.spill [r2]=f6,32
+    stf.spill [r3]=f7,32
+    ;;
+    stf.spill [r2]=f8,32
+    stf.spill [r3]=f9,32
+    ;;
+    stf.spill [r2]=f10
+    stf.spill [r3]=f11
+    adds r25=PT(B7)-PT(F11),r3
+    ;;
+    st8 [r24]=r18,16       /* b6 */
+    st8 [r25]=r19,16       /* b7 */
+    ;;
+    st8 [r24]=r9           /* ar.csd */
+    st8 [r25]=r10          /* ar.ssd */
+    ;;
+    srlz.d		// make sure we see the effect of cr.ivr
+    addl r14=@gprel(ia64_leave_nested),gp
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=kvm_ia64_handle_irq
+    ;;
+END(kvm_interrupt)
+
+    .global kvm_dispatch_vexirq
+    .org kvm_ia64_ivt+0x3400
+//////////////////////////////////////////////////////////////////////
+// 0x3400 Entry 13 (size 64 bundles) Reserved
+ENTRY(kvm_virtual_exirq)
+    mov r31=pr
+    mov r19=13
+    mov r30 =r0
+    ;;
+kvm_dispatch_vexirq:
+    cmp.eq p6,p0 = 1,r30
+    ;;
+(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+(p6)ld8 r1 = [r29]
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,1,0
+    mov out0=r13
+
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    KVM_SAVE_REST
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    mov rp=r14
+    br.call.sptk.many b6=kvm_vexirq
+END(kvm_virtual_exirq)
+
+    .org kvm_ia64_ivt+0x3800
+/////////////////////////////////////////////////////////////////////
+// 0x3800 Entry 14 (size 64 bundles) Reserved
+    KVM_FAULT(14)
+    // this code segment is from 2.6.16.13
+
+
+    .org kvm_ia64_ivt+0x3c00
+///////////////////////////////////////////////////////////////////////
+// 0x3c00 Entry 15 (size 64 bundles) Reserved
+    KVM_FAULT(15)
+
+
+    .org kvm_ia64_ivt+0x4000
+///////////////////////////////////////////////////////////////////////
+// 0x4000 Entry 16 (size 64 bundles) Reserved
+    KVM_FAULT(16)
+
+    .org kvm_ia64_ivt+0x4400
+//////////////////////////////////////////////////////////////////////
+// 0x4400 Entry 17 (size 64 bundles) Reserved
+    KVM_FAULT(17)
+
+    .org kvm_ia64_ivt+0x4800
+//////////////////////////////////////////////////////////////////////
+// 0x4800 Entry 18 (size 64 bundles) Reserved
+    KVM_FAULT(18)
+
+    .org kvm_ia64_ivt+0x4c00
+//////////////////////////////////////////////////////////////////////
+// 0x4c00 Entry 19 (size 64 bundles) Reserved
+    KVM_FAULT(19)
+
+    .org kvm_ia64_ivt+0x5000
+//////////////////////////////////////////////////////////////////////
+// 0x5000 Entry 20 (size 16 bundles) Page Not Present
+ENTRY(kvm_page_not_present)
+    KVM_REFLECT(20)
+END(kvm_page_not_present)
+
+    .org kvm_ia64_ivt+0x5100
+///////////////////////////////////////////////////////////////////////
+// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
+ENTRY(kvm_key_permission)
+    KVM_REFLECT(21)
+END(kvm_key_permission)
+
+    .org kvm_ia64_ivt+0x5200
+//////////////////////////////////////////////////////////////////////
+// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
+ENTRY(kvm_iaccess_rights)
+    KVM_REFLECT(22)
+END(kvm_iaccess_rights)
+
+    .org kvm_ia64_ivt+0x5300
+//////////////////////////////////////////////////////////////////////
+// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
+ENTRY(kvm_daccess_rights)
+    KVM_REFLECT(23)
+END(kvm_daccess_rights)
+
+    .org kvm_ia64_ivt+0x5400
+/////////////////////////////////////////////////////////////////////
+// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
+ENTRY(kvm_general_exception)
+   KVM_REFLECT(24)
+   KVM_FAULT(24)
+END(kvm_general_exception)
+
+    .org kvm_ia64_ivt+0x5500
+//////////////////////////////////////////////////////////////////////
+// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
+ENTRY(kvm_disabled_fp_reg)
+    KVM_REFLECT(25)
+END(kvm_disabled_fp_reg)
+
+    .org kvm_ia64_ivt+0x5600
+////////////////////////////////////////////////////////////////////
+// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
+ENTRY(kvm_nat_consumption)
+    KVM_REFLECT(26)
+END(kvm_nat_consumption)
+
+    .org kvm_ia64_ivt+0x5700
+/////////////////////////////////////////////////////////////////////
+// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
+ENTRY(kvm_speculation_vector)
+    KVM_REFLECT(27)
+END(kvm_speculation_vector)
+
+    .org kvm_ia64_ivt+0x5800
+/////////////////////////////////////////////////////////////////////
+// 0x5800 Entry 28 (size 16 bundles) Reserved
+    KVM_FAULT(28)
+
+    .org kvm_ia64_ivt+0x5900
+///////////////////////////////////////////////////////////////////
+// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
+ENTRY(kvm_debug_vector)
+    KVM_FAULT(29)
+END(kvm_debug_vector)
+
+    .org kvm_ia64_ivt+0x5a00
+///////////////////////////////////////////////////////////////
+// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
+ENTRY(kvm_unaligned_access)
+    KVM_REFLECT(30)
+END(kvm_unaligned_access)
+
+    .org kvm_ia64_ivt+0x5b00
+//////////////////////////////////////////////////////////////////////
+// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
+ENTRY(kvm_unsupported_data_reference)
+    KVM_REFLECT(31)
+END(kvm_unsupported_data_reference)
+
+    .org kvm_ia64_ivt+0x5c00
+////////////////////////////////////////////////////////////////////
+// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
+ENTRY(kvm_floating_point_fault)
+    KVM_REFLECT(32)
+END(kvm_floating_point_fault)
+
+    .org kvm_ia64_ivt+0x5d00
+/////////////////////////////////////////////////////////////////////
+// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
+ENTRY(kvm_floating_point_trap)
+    KVM_REFLECT(33)
+END(kvm_floating_point_trap)
+
+    .org kvm_ia64_ivt+0x5e00
+//////////////////////////////////////////////////////////////////////
+// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
+ENTRY(kvm_lower_privilege_trap)
+    KVM_REFLECT(34)
+END(kvm_lower_privilege_trap)
+
+    .org kvm_ia64_ivt+0x5f00
+//////////////////////////////////////////////////////////////////////
+// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
+ENTRY(kvm_taken_branch_trap)
+    KVM_REFLECT(35)
+END(kvm_taken_branch_trap)
+
+    .org kvm_ia64_ivt+0x6000
+////////////////////////////////////////////////////////////////////
+// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
+ENTRY(kvm_single_step_trap)
+    KVM_REFLECT(36)
+END(kvm_single_step_trap)
+    .global kvm_virtualization_fault_back
+    .org kvm_ia64_ivt+0x6100
+/////////////////////////////////////////////////////////////////////
+// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
+ENTRY(kvm_virtualization_fault)
+    mov r31=pr
+    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+    st8 [r16] = r1
+    adds r17 = VMM_VCPU_GP_OFFSET, r21
+    ;;
+    ld8 r1 = [r17]
+    cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
+    cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
+    cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
+    cmp.eq p9,p0=EVENT_RSM,r24
+    cmp.eq p10,p0=EVENT_SSM,r24
+    cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
+    cmp.eq p12,p0=EVENT_THASH,r24
+    (p6) br.dptk.many kvm_asm_mov_from_ar
+    (p7) br.dptk.many kvm_asm_mov_from_rr
+    (p8) br.dptk.many kvm_asm_mov_to_rr
+    (p9) br.dptk.many kvm_asm_rsm
+    (p10) br.dptk.many kvm_asm_ssm
+    (p11) br.dptk.many kvm_asm_mov_to_psr
+    (p12) br.dptk.many kvm_asm_thash
+    ;;
+kvm_virtualization_fault_back:
+    adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
+    ;;
+    ld8 r1 = [r16]
+    ;;
+    mov r19=37
+    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+    ;;
+    st8 [r16] = r24
+    st8 [r17] = r25
+    ;;
+    cmp.ne p6,p0=EVENT_RFI, r24
+    (p6) br.sptk kvm_dispatch_virtualization_fault
+    ;;
+    adds r18=VMM_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]
+    ;;
+    adds r18=VMM_VPD_VIFS_OFFSET,r18
+    ;;
+    ld8 r18=[r18]
+    ;;
+    tbit.z p6,p0=r18,63
+    (p6) br.sptk kvm_dispatch_virtualization_fault
+    ;;
+    //if vifs.v=1 desert current register frame
+    alloc r18=ar.pfs,0,0,0,0
+    br.sptk kvm_dispatch_virtualization_fault
+END(kvm_virtualization_fault)
+
+    .org kvm_ia64_ivt+0x6200
+//////////////////////////////////////////////////////////////
+// 0x6200 Entry 38 (size 16 bundles) Reserved
+    KVM_FAULT(38)
+
+    .org kvm_ia64_ivt+0x6300
+/////////////////////////////////////////////////////////////////
+// 0x6300 Entry 39 (size 16 bundles) Reserved
+    KVM_FAULT(39)
+
+    .org kvm_ia64_ivt+0x6400
+/////////////////////////////////////////////////////////////////
+// 0x6400 Entry 40 (size 16 bundles) Reserved
+    KVM_FAULT(40)
+
+    .org kvm_ia64_ivt+0x6500
+//////////////////////////////////////////////////////////////////
+// 0x6500 Entry 41 (size 16 bundles) Reserved
+    KVM_FAULT(41)
+
+    .org kvm_ia64_ivt+0x6600
+//////////////////////////////////////////////////////////////////
+// 0x6600 Entry 42 (size 16 bundles) Reserved
+    KVM_FAULT(42)
+
+    .org kvm_ia64_ivt+0x6700
+//////////////////////////////////////////////////////////////////
+// 0x6700 Entry 43 (size 16 bundles) Reserved
+    KVM_FAULT(43)
+
+    .org kvm_ia64_ivt+0x6800
+//////////////////////////////////////////////////////////////////
+// 0x6800 Entry 44 (size 16 bundles) Reserved
+    KVM_FAULT(44)
+
+    .org kvm_ia64_ivt+0x6900
+///////////////////////////////////////////////////////////////////
+// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
+//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
+ENTRY(kvm_ia32_exception)
+    KVM_FAULT(45)
+END(kvm_ia32_exception)
+
+    .org kvm_ia64_ivt+0x6a00
+////////////////////////////////////////////////////////////////////
+// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
+ENTRY(kvm_ia32_intercept)
+    KVM_FAULT(47)
+END(kvm_ia32_intercept)
+
+    .org kvm_ia64_ivt+0x6c00
+/////////////////////////////////////////////////////////////////////
+// 0x6c00 Entry 48 (size 16 bundles) Reserved
+    KVM_FAULT(48)
+
+    .org kvm_ia64_ivt+0x6d00
+//////////////////////////////////////////////////////////////////////
+// 0x6d00 Entry 49 (size 16 bundles) Reserved
+    KVM_FAULT(49)
+
+    .org kvm_ia64_ivt+0x6e00
+//////////////////////////////////////////////////////////////////////
+// 0x6e00 Entry 50 (size 16 bundles) Reserved
+    KVM_FAULT(50)
+
+    .org kvm_ia64_ivt+0x6f00
+/////////////////////////////////////////////////////////////////////
+// 0x6f00 Entry 51 (size 16 bundles) Reserved
+    KVM_FAULT(52)
+
+    .org kvm_ia64_ivt+0x7100
+////////////////////////////////////////////////////////////////////
+// 0x7100 Entry 53 (size 16 bundles) Reserved
+    KVM_FAULT(53)
+
+    .org kvm_ia64_ivt+0x7200
+/////////////////////////////////////////////////////////////////////
+// 0x7200 Entry 54 (size 16 bundles) Reserved
+    KVM_FAULT(54)
+
+    .org kvm_ia64_ivt+0x7300
+////////////////////////////////////////////////////////////////////
+// 0x7300 Entry 55 (size 16 bundles) Reserved
+    KVM_FAULT(55)
+
+    .org kvm_ia64_ivt+0x7400
+////////////////////////////////////////////////////////////////////
+// 0x7400 Entry 56 (size 16 bundles) Reserved
+    KVM_FAULT(56)
+
+    .org kvm_ia64_ivt+0x7500
+/////////////////////////////////////////////////////////////////////
+// 0x7500 Entry 57 (size 16 bundles) Reserved
+    KVM_FAULT(57)
+
+    .org kvm_ia64_ivt+0x7600
+/////////////////////////////////////////////////////////////////////
+// 0x7600 Entry 58 (size 16 bundles) Reserved
+    KVM_FAULT(58)
+
+    .org kvm_ia64_ivt+0x7700
+////////////////////////////////////////////////////////////////////
+// 0x7700 Entry 59 (size 16 bundles) Reserved
+    KVM_FAULT(59)
+
+    .org kvm_ia64_ivt+0x7800
+////////////////////////////////////////////////////////////////////
+// 0x7800 Entry 60 (size 16 bundles) Reserved
+    KVM_FAULT(60)
+
+    .org kvm_ia64_ivt+0x7900
+/////////////////////////////////////////////////////////////////////
+// 0x7900 Entry 61 (size 16 bundles) Reserved
+    KVM_FAULT(61)
+
+    .org kvm_ia64_ivt+0x7a00
+/////////////////////////////////////////////////////////////////////
+// 0x7a00 Entry 62 (size 16 bundles) Reserved
+    KVM_FAULT(62)
+
+    .org kvm_ia64_ivt+0x7b00
+/////////////////////////////////////////////////////////////////////
+// 0x7b00 Entry 63 (size 16 bundles) Reserved
+    KVM_FAULT(63)
+
+    .org kvm_ia64_ivt+0x7c00
+////////////////////////////////////////////////////////////////////
+// 0x7c00 Entry 64 (size 16 bundles) Reserved
+    KVM_FAULT(64)
+
+    .org kvm_ia64_ivt+0x7d00
+/////////////////////////////////////////////////////////////////////
+// 0x7d00 Entry 65 (size 16 bundles) Reserved
+    KVM_FAULT(65)
+
+    .org kvm_ia64_ivt+0x7e00
+/////////////////////////////////////////////////////////////////////
+// 0x7e00 Entry 66 (size 16 bundles) Reserved
+    KVM_FAULT(66)
+
+    .org kvm_ia64_ivt+0x7f00
+////////////////////////////////////////////////////////////////////
+// 0x7f00 Entry 67 (size 16 bundles) Reserved
+    KVM_FAULT(67)
+
+    .org kvm_ia64_ivt+0x8000
+// There is no particular reason for this code to be here, other than that
+// there happens to be space here that would go unused otherwise.  If this
+// fault ever gets "unreserved", simply moved the following code to a more
+// suitable spot...
+
+
+ENTRY(kvm_dtlb_miss_dispatch)
+    mov r19 = 2
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=cr.ifa
+    mov out1=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+    ;;
+    KVM_SAVE_REST
+    KVM_SAVE_EXTRA
+    mov rp=r14
+    ;;
+    adds out2=16,r12
+    br.call.sptk.many b6=kvm_page_fault
+END(kvm_dtlb_miss_dispatch)
+
+ENTRY(kvm_itlb_miss_dispatch)
+
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,3,0
+    mov out0=cr.ifa
+    mov out1=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out2=16,r12
+    br.call.sptk.many b6=kvm_page_fault
+END(kvm_itlb_miss_dispatch)
+
+ENTRY(kvm_dispatch_reflection)
+    /*
+     * Input:
+     *  psr.ic: off
+     *  r19:    intr type (offset into ivt, see ia64_int.h)
+     *  r31:    contains saved predicates (pr)
+     */
+    KVM_SAVE_MIN_WITH_COVER_R19
+    alloc r14=ar.pfs,0,0,5,0
+    mov out0=cr.ifa
+    mov out1=cr.isr
+    mov out2=cr.iim
+    mov out3=r15
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    adds out4=16,r12
+    br.call.sptk.many b6=reflect_interruption
+END(kvm_dispatch_reflection)
+
+ENTRY(kvm_dispatch_virtualization_fault)
+    adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
+    adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
+    ;;
+    st8 [r16] = r24
+    st8 [r17] = r25
+    ;;
+    KVM_SAVE_MIN_WITH_COVER_R19
+    ;;
+    alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
+    mov out0=r13        //vcpu
+    adds r3=8,r2                // set up second base pointer
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i                  // guarantee that interruption collection is on
+    ;;
+    //(p15) ssm psr.i               // restore psr.i
+    addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
+    ;;
+    KVM_SAVE_REST
+    KVM_SAVE_EXTRA
+    mov rp=r14
+    ;;
+    adds out1=16,sp         //regs
+    br.call.sptk.many b6=kvm_emulate
+END(kvm_dispatch_virtualization_fault)
+
+
+ENTRY(kvm_dispatch_interrupt)
+    KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
+    ;;
+    alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
+    //mov out0=cr.ivr		// pass cr.ivr as first arg
+    adds r3=8,r2		// set up second base pointer for SAVE_REST
+    ;;
+    ssm psr.ic
+    ;;
+    srlz.i
+    ;;
+    //(p15) ssm psr.i
+    addl r14=@gprel(ia64_leave_hypervisor),gp
+    ;;
+    KVM_SAVE_REST
+    mov rp=r14
+    ;;
+    mov out0=r13		// pass pointer to pt_regs as second arg
+    br.call.sptk.many b6=kvm_ia64_handle_irq
+END(kvm_dispatch_interrupt)
+
+
+
+
+GLOBAL_ENTRY(ia64_leave_nested)
+	rsm psr.i
+	;;
+	adds r21=PT(PR)+16,r12
+	;;
+	lfetch [r21],PT(CR_IPSR)-PT(PR)
+	adds r2=PT(B6)+16,r12
+	adds r3=PT(R16)+16,r12
+	;;
+	lfetch [r21]
+	ld8 r28=[r2],8		// load b6
+	adds r29=PT(R24)+16,r12
+
+	ld8.fill r16=[r3]
+	adds r3=PT(AR_CSD)-PT(R16),r3
+	adds r30=PT(AR_CCV)+16,r12
+	;;
+	ld8.fill r24=[r29]
+	ld8 r15=[r30]		// load ar.ccv
+	;;
+	ld8 r29=[r2],16		// load b7
+	ld8 r30=[r3],16		// load ar.csd
+	;;
+	ld8 r31=[r2],16		// load ar.ssd
+	ld8.fill r8=[r3],16
+	;;
+	ld8.fill r9=[r2],16
+	ld8.fill r10=[r3],PT(R17)-PT(R10)
+	;;
+	ld8.fill r11=[r2],PT(R18)-PT(R11)
+	ld8.fill r17=[r3],16
+	;;
+	ld8.fill r18=[r2],16
+	ld8.fill r19=[r3],16
+	;;
+	ld8.fill r20=[r2],16
+	ld8.fill r21=[r3],16
+	mov ar.csd=r30
+	mov ar.ssd=r31
+	;;
+	rsm psr.i | psr.ic
+	// initiate turning off of interrupt and interruption collection
+	invala			// invalidate ALAT
+	;;
+	srlz.i
+	;;
+	ld8.fill r22=[r2],24
+	ld8.fill r23=[r3],24
+	mov b6=r28
+	;;
+	ld8.fill r25=[r2],16
+	ld8.fill r26=[r3],16
+	mov b7=r29
+	;;
+	ld8.fill r27=[r2],16
+	ld8.fill r28=[r3],16
+	;;
+	ld8.fill r29=[r2],16
+	ld8.fill r30=[r3],24
+	;;
+	ld8.fill r31=[r2],PT(F9)-PT(R31)
+	adds r3=PT(F10)-PT(F6),r3
+	;;
+	ldf.fill f9=[r2],PT(F6)-PT(F9)
+	ldf.fill f10=[r3],PT(F8)-PT(F10)
+	;;
+	ldf.fill f6=[r2],PT(F7)-PT(F6)
+	;;
+	ldf.fill f7=[r2],PT(F11)-PT(F7)
+	ldf.fill f8=[r3],32
+	;;
+	srlz.i			// ensure interruption collection is off
+	mov ar.ccv=r15
+	;;
+	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
+	;;
+	ldf.fill f11=[r2]
+//	mov r18=r13
+//    mov r21=r13
+	adds r16=PT(CR_IPSR)+16,r12
+	adds r17=PT(CR_IIP)+16,r12
+	;;
+	ld8 r29=[r16],16	// load cr.ipsr
+	ld8 r28=[r17],16	// load cr.iip
+	;;
+	ld8 r30=[r16],16	// load cr.ifs
+	ld8 r25=[r17],16	// load ar.unat
+	;;
+	ld8 r26=[r16],16	// load ar.pfs
+	ld8 r27=[r17],16	// load ar.rsc
+	cmp.eq p9,p0=r0,r0
+	// set p9 to indicate that we should restore cr.ifs
+	;;
+	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
+	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
+	;;
+	ld8 r31=[r16],16	// load predicates
+	ld8 r22=[r17],16	// load b0
+	;;
+	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
+	ld8.fill r1=[r17],16	// load r1
+	;;
+	ld8.fill r12=[r16],16
+	ld8.fill r13=[r17],16
+	;;
+	ld8 r20=[r16],16	// ar.fpsr
+	ld8.fill r15=[r17],16
+	;;
+	ld8.fill r14=[r16],16
+	ld8.fill r2=[r17]
+	;;
+	ld8.fill r3=[r16]
+	;;
+	mov r16=ar.bsp		// get existing backing store pointer
+	;;
+	mov b0=r22
+	mov ar.pfs=r26
+	mov cr.ifs=r30
+	mov cr.ipsr=r29
+	mov ar.fpsr=r20
+	mov cr.iip=r28
+	;;
+	mov ar.rsc=r27
+	mov ar.unat=r25
+	mov pr=r31,-1
+	rfi
+END(ia64_leave_nested)
+
+
+
+GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
+    /*
+     * work.need_resched etc. mustn't get changed
+     *by this CPU before it returns to
+    ;;
+     * user- or fsys-mode, hence we disable interrupts early on:
+     */
+    adds r2 = PT(R4)+16,r12
+    adds r3 = PT(R5)+16,r12
+    adds r8 = PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r8 = [r8]
+    ;;
+    mov ar.unat=r8
+    ;;
+    ld8.fill r4=[r2],16    //load r4
+    ld8.fill r5=[r3],16    //load r5
+    ;;
+    ld8.fill r6=[r2]    //load r6
+    ld8.fill r7=[r3]    //load r7
+    ;;
+END(ia64_leave_hypervisor_prepare)
+//fall through
+GLOBAL_ENTRY(ia64_leave_hypervisor)
+    rsm psr.i
+    ;;
+    br.call.sptk.many b0=leave_hypervisor_tail
+    ;;
+    adds r20=PT(PR)+16,r12
+    adds r8=PT(EML_UNAT)+16,r12
+    ;;
+    ld8 r8=[r8]
+    ;;
+    mov ar.unat=r8
+    ;;
+    lfetch [r20],PT(CR_IPSR)-PT(PR)
+    adds r2 = PT(B6)+16,r12
+    adds r3 = PT(B7)+16,r12
+    ;;
+    lfetch [r20]
+    ;;
+    ld8 r24=[r2],16        /* B6 */
+    ld8 r25=[r3],16        /* B7 */
+    ;;
+    ld8 r26=[r2],16        /* ar_csd */
+    ld8 r27=[r3],16        /* ar_ssd */
+    mov b6 = r24
+    ;;
+    ld8.fill r8=[r2],16
+    ld8.fill r9=[r3],16
+    mov b7 = r25
+    ;;
+    mov ar.csd = r26
+    mov ar.ssd = r27
+    ;;
+    ld8.fill r10=[r2],PT(R15)-PT(R10)
+    ld8.fill r11=[r3],PT(R14)-PT(R11)
+    ;;
+    ld8.fill r15=[r2],PT(R16)-PT(R15)
+    ld8.fill r14=[r3],PT(R17)-PT(R14)
+    ;;
+    ld8.fill r16=[r2],16
+    ld8.fill r17=[r3],16
+    ;;
+    ld8.fill r18=[r2],16
+    ld8.fill r19=[r3],16
+    ;;
+    ld8.fill r20=[r2],16
+    ld8.fill r21=[r3],16
+    ;;
+    ld8.fill r22=[r2],16
+    ld8.fill r23=[r3],16
+    ;;
+    ld8.fill r24=[r2],16
+    ld8.fill r25=[r3],16
+    ;;
+    ld8.fill r26=[r2],16
+    ld8.fill r27=[r3],16
+    ;;
+    ld8.fill r28=[r2],16
+    ld8.fill r29=[r3],16
+    ;;
+    ld8.fill r30=[r2],PT(F6)-PT(R30)
+    ld8.fill r31=[r3],PT(F7)-PT(R31)
+    ;;
+    rsm psr.i | psr.ic
+    // initiate turning off of interrupt and interruption collection
+    invala          // invalidate ALAT
+    ;;
+    srlz.i          // ensure interruption collection is off
+    ;;
+    bsw.0
+    ;;
+    adds r16 = PT(CR_IPSR)+16,r12
+    adds r17 = PT(CR_IIP)+16,r12
+    mov r21=r13		// get current
+    ;;
+    ld8 r31=[r16],16    // load cr.ipsr
+    ld8 r30=[r17],16    // load cr.iip
+    ;;
+    ld8 r29=[r16],16    // load cr.ifs
+    ld8 r28=[r17],16    // load ar.unat
+    ;;
+    ld8 r27=[r16],16    // load ar.pfs
+    ld8 r26=[r17],16    // load ar.rsc
+    ;;
+    ld8 r25=[r16],16    // load ar.rnat
+    ld8 r24=[r17],16    // load ar.bspstore
+    ;;
+    ld8 r23=[r16],16    // load predicates
+    ld8 r22=[r17],16    // load b0
+    ;;
+    ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
+    ld8.fill r1=[r17],16    //load r1
+    ;;
+    ld8.fill r12=[r16],16    //load r12
+    ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
+    ;;
+    ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
+    ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
+    ;;
+    ld8.fill r3=[r16]	//load r3
+    ld8 r18=[r17]	//load ar_ccv
+    ;;
+    mov ar.fpsr=r19
+    mov ar.ccv=r18
+    shr.u r18=r20,16
+    ;;
+kvm_rbs_switch:
+    mov r19=96
+
+kvm_dont_preserve_current_frame:
+/*
+    * To prevent leaking bits between the hypervisor and guest domain,
+    * we must clear the stacked registers in the "invalid" partition here.
+    * 5 registers/cycle on McKinley).
+    */
+#   define pRecurse	p6
+#   define pReturn	p7
+#   define Nregs	14
+
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    shr.u loc1=r18,9		// RNaTslots <= floor(dirtySize / (64*8))
+    sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
+    ;;
+    mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
+    shladd in0=loc1,3,r19
+    mov in1=0
+    ;;
+    TEXT_ALIGN(32)
+kvm_rse_clear_invalid:
+    alloc loc0=ar.pfs,2,Nregs-2,2,0
+    cmp.lt pRecurse,p0=Nregs*8,in0
+    // if more than Nregs regs left to clear, (re)curse
+    add out0=-Nregs*8,in0
+    add out1=1,in1		// increment recursion count
+    mov loc1=0
+    mov loc2=0
+    ;;
+    mov loc3=0
+    mov loc4=0
+    mov loc5=0
+    mov loc6=0
+    mov loc7=0
+(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
+    ;;
+    mov loc8=0
+    mov loc9=0
+    cmp.ne pReturn,p0=r0,in1
+    // if recursion count != 0, we need to do a br.ret
+    mov loc10=0
+    mov loc11=0
+(pReturn) br.ret.dptk.many b0
+
+#	undef pRecurse
+#	undef pReturn
+
+// loadrs has already been shifted
+    alloc r16=ar.pfs,0,0,0,0    // drop current register frame
+    ;;
+    loadrs
+    ;;
+    mov ar.bspstore=r24
+    ;;
+    mov ar.unat=r28
+    mov ar.rnat=r25
+    mov ar.rsc=r26
+    ;;
+    mov cr.ipsr=r31
+    mov cr.iip=r30
+    mov cr.ifs=r29
+    mov ar.pfs=r27
+    adds r18=VMM_VPD_BASE_OFFSET,r21
+    ;;
+    ld8 r18=[r18]   //vpd
+    adds r17=VMM_VCPU_ISR_OFFSET,r21
+    ;;
+    ld8 r17=[r17]
+    adds r19=VMM_VPD_VPSR_OFFSET,r18
+    ;;
+    ld8 r19=[r19]        //vpsr
+    adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21
+    ;;
+    ld8 r20=[r20]
+    ;;
+//vsa_sync_write_start
+    mov r25=r18
+    adds r16= VMM_VCPU_GP_OFFSET,r21
+    ;;
+    ld8 r16= [r16] // Put gp in r24
+    movl r24=@gprel(ia64_vmm_entry)  // calculate return address
+    ;;
+    add  r24=r24,r16
+    ;;
+    add r16=PAL_VPS_SYNC_WRITE,r20
+    ;;
+    mov b0=r16
+    br.cond.sptk b0         // call the service
+    ;;
+END(ia64_leave_hypervisor)
+// fall through
+GLOBAL_ENTRY(ia64_vmm_entry)
+/*
+ *  must be at bank 0
+ *  parameter:
+ *  r17:cr.isr
+ *  r18:vpd
+ *  r19:vpsr
+ *  r20:__vsa_base
+ *  r22:b0
+ *  r23:predicate
+ */
+    mov r24=r22
+    mov r25=r18
+    tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p1) br.sptk.many ia64_vmm_entry_out
+    ;;
+    tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT		//p1=cr.isr.ir
+    ;;
+    (p1) add r29=PAL_VPS_RESUME_NORMAL,r20
+    (p2) add r29=PAL_VPS_RESUME_HANDLER,r20
+    (p2) ld8 r26=[r25]
+    ;;
+ia64_vmm_entry_out:
+    mov pr=r23,-2
+    mov b0=r29
+    ;;
+    br.cond.sptk b0             // call pal service
+END(ia64_vmm_entry)
+
+
+
+/*
+ * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
+ *                  u64 arg3, u64 arg4, u64 arg5,
+ *                  u64 arg6, u64 arg7);
+ *
+ * XXX: The currently defined services use only 4 args at the max. The
+ *  rest are not consumed.
+ */
+GLOBAL_ENTRY(ia64_call_vsa)
+    .regstk 4,4,0,0
+
+rpsave  =   loc0
+pfssave =   loc1
+psrsave =   loc2
+entry   =   loc3
+hostret =   r24
+
+    alloc   pfssave=ar.pfs,4,4,0,0
+    mov rpsave=rp
+    adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
+    ;;
+    ld8 entry=[entry]
+1:  mov hostret=ip
+    mov r25=in1         // copy arguments
+    mov r26=in2
+    mov r27=in3
+    mov psrsave=psr
+    ;;
+    tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
+    tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
+    ;;
+    add hostret=2f-1b,hostret   // calculate return address
+    add entry=entry,in0
+    ;;
+    rsm psr.i | psr.ic
+    ;;
+    srlz.i
+    mov b6=entry
+    br.cond.sptk b6         // call the service
+2:
+    // Architectural sequence for enabling interrupts if necessary
+(p7)    ssm psr.ic
+    ;;
+(p7)    srlz.i
+    ;;
+//(p6)    ssm psr.i
+    ;;
+    mov rp=rpsave
+    mov ar.pfs=pfssave
+    mov r8=r31
+    ;;
+    srlz.d
+    br.ret.sptk rp
+
+END(ia64_call_vsa)
+
+#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
+
+GLOBAL_ENTRY(vmm_reset_entry)
+    //set up ipsr, iip, vpd.vpsr, dcr
+    // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
+    // For DCR: all bits 0
+    adds r14=-VMM_PT_REGS_SIZE, r12
+    ;;
+    movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
+    movl r10=0x8000000000000000
+    adds r16=PT(CR_IIP), r14
+    adds r20=PT(R1), r14
+    ;;
+    rsm psr.ic | psr.i
+    ;;
+    srlz.i
+    ;;
+    bsw.0
+    ;;
+    mov r21 =r13
+    ;;
+    bsw.1
+    ;;
+    mov ar.rsc = 0
+    ;;
+    flushrs
+    ;;
+    mov ar.bspstore = 0
+    // clear BSPSTORE
+    ;;
+    mov cr.ipsr=r6
+    mov cr.ifs=r10
+    ld8 r4 = [r16] // Set init iip for first run.
+    ld8 r1 = [r20]
+    ;;
+    mov cr.iip=r4
+    ;;
+    adds r16=VMM_VPD_BASE_OFFSET,r13
+    adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13
+    ;;
+    ld8 r18=[r16]
+    ld8 r20=[r20]
+    ;;
+    adds r19=VMM_VPD_VPSR_OFFSET,r18
+    ;;
+    ld8 r19=[r19]
+    mov r17=r0
+    mov r22=r0
+    mov r23=r0
+    br.cond.sptk ia64_vmm_entry
+    br.ret.sptk  b0
+END(vmm_reset_entry)
diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
new file mode 100644
index 000000000000..f6c5617e16af
--- /dev/null
+++ b/arch/ia64/kvm/vti.h
@@ -0,0 +1,290 @@
+/*
+ * vti.h: prototype for generial vt related interface
+ *   	Copyright (c) 2004, Intel Corporation.
+ *
+ *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
+ *	Fred Yang (fred.yang@intel.com)
+ * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
+ *
+ *  	Copyright (c) 2007, Intel Corporation.
+ *  	Zhang xiantao <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+#ifndef _KVM_VT_I_H
+#define _KVM_VT_I_H
+
+#ifndef __ASSEMBLY__
+#include <asm/page.h>
+
+#include <linux/kvm_host.h>
+
+/* define itr.i and itr.d  in ia64_itr function */
+#define	ITR	0x01
+#define	DTR	0x02
+#define	IaDTR	0x03
+
+#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
+#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
+
+#define RR6 (6UL<<61)
+#define RR7 (7UL<<61)
+
+
+/* config_options in pal_vp_init_env */
+#define	VP_INITIALIZE	1UL
+#define	VP_FR_PMC	1UL<<1
+#define	VP_OPCODE	1UL<<8
+#define	VP_CAUSE	1UL<<9
+#define VP_FW_ACC   	1UL<<63
+
+/* init vp env with initializing vm_buffer */
+#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
+	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
+/* init vp env without initializing vm_buffer */
+#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
+
+#define		PAL_VP_CREATE   265
+/* Stacked Virt. Initializes a new VPD for the operation of
+ * a new virtual processor in the virtual environment.
+ */
+#define		PAL_VP_ENV_INFO 266
+/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
+#define		PAL_VP_EXIT_ENV 267
+/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
+#define		PAL_VP_INIT_ENV 268
+/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
+#define		PAL_VP_REGISTER 269
+/*Stacked Virt. Register a different host IVT for the virtual processor.*/
+#define		PAL_VP_RESUME   270
+/* Renamed from PAL_VP_RESUME */
+#define		PAL_VP_RESTORE  270
+/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
+#define		PAL_VP_SUSPEND  271
+/* Renamed from PAL_VP_SUSPEND */
+#define		PAL_VP_SAVE	271
+/* Stacked Virt. Suspends operation for the specified virtual processor on
+ * the logical processor.
+ */
+#define		PAL_VP_TERMINATE 272
+/* Stacked Virt. Terminates operation for the specified virtual processor.*/
+
+union vac {
+	unsigned long value;
+	struct {
+		int a_int:1;
+		int a_from_int_cr:1;
+		int a_to_int_cr:1;
+		int a_from_psr:1;
+		int a_from_cpuid:1;
+		int a_cover:1;
+		int a_bsw:1;
+		long reserved:57;
+	};
+};
+
+union vdc {
+	unsigned long value;
+	struct {
+		int d_vmsw:1;
+		int d_extint:1;
+		int d_ibr_dbr:1;
+		int d_pmc:1;
+		int d_to_pmd:1;
+		int d_itm:1;
+		long reserved:58;
+	};
+};
+
+struct vpd {
+	union vac   vac;
+	union vdc   vdc;
+	unsigned long  virt_env_vaddr;
+	unsigned long  reserved1[29];
+	unsigned long  vhpi;
+	unsigned long  reserved2[95];
+	unsigned long  vgr[16];
+	unsigned long  vbgr[16];
+	unsigned long  vnat;
+	unsigned long  vbnat;
+	unsigned long  vcpuid[5];
+	unsigned long  reserved3[11];
+	unsigned long  vpsr;
+	unsigned long  vpr;
+	unsigned long  reserved4[76];
+	union {
+		unsigned long  vcr[128];
+		struct {
+			unsigned long dcr;
+			unsigned long itm;
+			unsigned long iva;
+			unsigned long rsv1[5];
+			unsigned long pta;
+			unsigned long rsv2[7];
+			unsigned long ipsr;
+			unsigned long isr;
+			unsigned long rsv3;
+			unsigned long iip;
+			unsigned long ifa;
+			unsigned long itir;
+			unsigned long iipa;
+			unsigned long ifs;
+			unsigned long iim;
+			unsigned long iha;
+			unsigned long rsv4[38];
+			unsigned long lid;
+			unsigned long ivr;
+			unsigned long tpr;
+			unsigned long eoi;
+			unsigned long irr[4];
+			unsigned long itv;
+			unsigned long pmv;
+			unsigned long cmcv;
+			unsigned long rsv5[5];
+			unsigned long lrr0;
+			unsigned long lrr1;
+			unsigned long rsv6[46];
+		};
+	};
+	unsigned long  reserved5[128];
+	unsigned long  reserved6[3456];
+	unsigned long  vmm_avail[128];
+	unsigned long  reserved7[4096];
+};
+
+#define PAL_PROC_VM_BIT		(1UL << 40)
+#define PAL_PROC_VMSW_BIT	(1UL << 54)
+
+static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
+		u64 *vp_env_info)
+{
+	struct ia64_pal_retval iprv;
+	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
+	*buffer_size = iprv.v0;
+	*vp_env_info = iprv.v1;
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_exit_env(u64 iva)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
+			u64 vbase_addr, u64 *vsa_base)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
+			vbase_addr);
+	*vsa_base = iprv.v0;
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
+{
+	struct ia64_pal_retval iprv;
+
+	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
+
+	return iprv.status;
+}
+
+#endif
+
+/*VPD field offset*/
+#define VPD_VAC_START_OFFSET		0
+#define VPD_VDC_START_OFFSET		8
+#define VPD_VHPI_START_OFFSET		256
+#define VPD_VGR_START_OFFSET		1024
+#define VPD_VBGR_START_OFFSET		1152
+#define VPD_VNAT_START_OFFSET		1280
+#define VPD_VBNAT_START_OFFSET		1288
+#define VPD_VCPUID_START_OFFSET		1296
+#define VPD_VPSR_START_OFFSET		1424
+#define VPD_VPR_START_OFFSET		1432
+#define VPD_VRSE_CFLE_START_OFFSET	1440
+#define VPD_VCR_START_OFFSET		2048
+#define VPD_VTPR_START_OFFSET		2576
+#define VPD_VRR_START_OFFSET		3072
+#define VPD_VMM_VAIL_START_OFFSET	31744
+
+/*Virtualization faults*/
+
+#define EVENT_MOV_TO_AR			 1
+#define EVENT_MOV_TO_AR_IMM		 2
+#define EVENT_MOV_FROM_AR		 3
+#define EVENT_MOV_TO_CR			 4
+#define EVENT_MOV_FROM_CR		 5
+#define EVENT_MOV_TO_PSR		 6
+#define EVENT_MOV_FROM_PSR		 7
+#define EVENT_ITC_D			 8
+#define EVENT_ITC_I			 9
+#define EVENT_MOV_TO_RR			 10
+#define EVENT_MOV_TO_DBR		 11
+#define EVENT_MOV_TO_IBR		 12
+#define EVENT_MOV_TO_PKR		 13
+#define EVENT_MOV_TO_PMC		 14
+#define EVENT_MOV_TO_PMD		 15
+#define EVENT_ITR_D			 16
+#define EVENT_ITR_I			 17
+#define EVENT_MOV_FROM_RR		 18
+#define EVENT_MOV_FROM_DBR		 19
+#define EVENT_MOV_FROM_IBR		 20
+#define EVENT_MOV_FROM_PKR		 21
+#define EVENT_MOV_FROM_PMC		 22
+#define EVENT_MOV_FROM_CPUID		 23
+#define EVENT_SSM			 24
+#define EVENT_RSM			 25
+#define EVENT_PTC_L			 26
+#define EVENT_PTC_G			 27
+#define EVENT_PTC_GA			 28
+#define EVENT_PTR_D			 29
+#define EVENT_PTR_I			 30
+#define EVENT_THASH			 31
+#define EVENT_TTAG			 32
+#define EVENT_TPA			 33
+#define EVENT_TAK			 34
+#define EVENT_PTC_E			 35
+#define EVENT_COVER			 36
+#define EVENT_RFI			 37
+#define EVENT_BSW_0			 38
+#define EVENT_BSW_1			 39
+#define EVENT_VMSW			 40
+
+/**PAL virtual services offsets */
+#define PAL_VPS_RESUME_NORMAL           0x0000
+#define PAL_VPS_RESUME_HANDLER          0x0400
+#define PAL_VPS_SYNC_READ               0x0800
+#define PAL_VPS_SYNC_WRITE              0x0c00
+#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
+#define PAL_VPS_THASH                   0x1400
+#define PAL_VPS_TTAG                    0x1800
+#define PAL_VPS_RESTORE                 0x1c00
+#define PAL_VPS_SAVE                    0x2000
+
+#endif/* _VT_I_H*/
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
new file mode 100644
index 000000000000..def4576d22b1
--- /dev/null
+++ b/arch/ia64/kvm/vtlb.c
@@ -0,0 +1,636 @@
+/*
+ * vtlb.c: guest virtual tlb handling module.
+ * Copyright (c) 2004, Intel Corporation.
+ *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *
+ * Copyright (c) 2007, Intel Corporation.
+ *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
+ *  Xiantao Zhang <xiantao.zhang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ */
+
+#include "vcpu.h"
+
+#include <linux/rwsem.h>
+
+#include <asm/tlb.h>
+
+/*
+ * Check to see if the address rid:va is translated by the TLB
+ */
+
+static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
+{
+	return ((trp->p) && (trp->rid == rid)
+				&& ((va-trp->vadr) < PSIZE(trp->ps)));
+}
+
+/*
+ * Only for GUEST TR format.
+ */
+static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
+{
+	u64 sa1, ea1;
+
+	if (!trp->p || trp->rid != rid)
+		return 0;
+
+	sa1 = trp->vadr;
+	ea1 = sa1 + PSIZE(trp->ps) - 1;
+	eva -= 1;
+	if ((sva > ea1) || (sa1 > eva))
+		return 0;
+	else
+		return 1;
+
+}
+
+void machine_tlb_purge(u64 va, u64 ps)
+{
+	ia64_ptcl(va, ps << 2);
+}
+
+void local_flush_tlb_all(void)
+{
+	int i, j;
+	unsigned long flags, count0, count1;
+	unsigned long stride0, stride1, addr;
+
+	addr    = current_vcpu->arch.ptce_base;
+	count0  = current_vcpu->arch.ptce_count[0];
+	count1  = current_vcpu->arch.ptce_count[1];
+	stride0 = current_vcpu->arch.ptce_stride[0];
+	stride1 = current_vcpu->arch.ptce_stride[1];
+
+	local_irq_save(flags);
+	for (i = 0; i < count0; ++i) {
+		for (j = 0; j < count1; ++j) {
+			ia64_ptce(addr);
+			addr += stride1;
+		}
+		addr += stride0;
+	}
+	local_irq_restore(flags);
+	ia64_srlz_i();          /* srlz.i implies srlz.d */
+}
+
+int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
+{
+	union ia64_rr    vrr;
+	union ia64_pta   vpta;
+	struct  ia64_psr   vpsr;
+
+	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
+	vrr.val = vcpu_get_rr(vcpu, vadr);
+	vpta.val = vcpu_get_pta(vcpu);
+
+	if (vrr.ve & vpta.ve) {
+		switch (ref) {
+		case DATA_REF:
+		case NA_REF:
+			return vpsr.dt;
+		case INST_REF:
+			return vpsr.dt && vpsr.it && vpsr.ic;
+		case RSE_REF:
+			return vpsr.dt && vpsr.rt;
+
+		}
+	}
+	return 0;
+}
+
+struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
+{
+	u64 index, pfn, rid, pfn_bits;
+
+	pfn_bits = vpta.size - 5 - 8;
+	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
+	rid = _REGION_ID(vrr);
+	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
+	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
+
+	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
+				(index << 5));
+}
+
+struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
+{
+
+	struct thash_data *trp;
+	int  i;
+	u64 rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;;
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+						i < NDTRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_translated(trp, rid, va))
+					return trp;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
+{
+	union ia64_rr rr;
+	struct thash_data *head;
+	unsigned long ps, gpaddr;
+
+	ps = itir_ps(itir);
+
+	gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
+		(ifa & ((1UL << ps) - 1));
+
+	rr.val = ia64_get_rr(ifa);
+	head = (struct thash_data *)ia64_thash(ifa);
+	head->etag = INVALID_TI_TAG;
+	ia64_mf();
+	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
+	head->itir = rr.ps << 2;
+	head->etag = ia64_ttag(ifa);
+	head->gpaddr = gpaddr;
+}
+
+void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
+{
+	u64 i, dirty_pages = 1;
+	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
+	spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
+	void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE)
+						+ KVM_MEM_DIRTY_LOG_OFS;
+	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
+
+	vmm_spin_lock(lock);
+	for (i = 0; i < dirty_pages; i++) {
+		/* avoid RMW */
+		if (!test_bit(base_gfn + i, dirty_bitmap))
+			set_bit(base_gfn + i , dirty_bitmap);
+	}
+	vmm_spin_unlock(lock);
+}
+
+void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
+{
+	u64 phy_pte, psr;
+	union ia64_rr mrr;
+
+	mrr.val = ia64_get_rr(va);
+	phy_pte = translate_phy_pte(&pte, itir, va);
+
+	if (itir_ps(itir) >= mrr.ps) {
+		vhpt_insert(phy_pte, itir, va, pte);
+	} else {
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, va, phy_pte, itir_ps(itir));
+		ia64_set_psr(psr);
+	}
+
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, itir_ps(itir));
+}
+
+/*
+ *   vhpt lookup
+ */
+struct thash_data *vhpt_lookup(u64 va)
+{
+	struct thash_data *head;
+	u64 tag;
+
+	head = (struct thash_data *)ia64_thash(va);
+	tag = ia64_ttag(va);
+	if (head->etag == tag)
+		return head;
+	return NULL;
+}
+
+u64 guest_vhpt_lookup(u64 iha, u64 *pte)
+{
+	u64 ret;
+	struct thash_data *data;
+
+	data = __vtr_lookup(current_vcpu, iha, D_TLB);
+	if (data != NULL)
+		thash_vhpt_insert(current_vcpu, data->page_flags,
+			data->itir, iha, D_TLB);
+
+	asm volatile ("rsm psr.ic|psr.i;;"
+			"srlz.d;;"
+			"ld8.s r9=[%1];;"
+			"tnat.nz p6,p7=r9;;"
+			"(p6) mov %0=1;"
+			"(p6) mov r9=r0;"
+			"(p7) extr.u r9=r9,0,53;;"
+			"(p7) mov %0=r0;"
+			"(p7) st8 [%2]=r9;;"
+			"ssm psr.ic;;"
+			"srlz.d;;"
+			/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
+			: "=r"(ret) : "r"(iha), "r"(pte):"memory");
+
+	return ret;
+}
+
+/*
+ *  purge software guest tlb
+ */
+
+static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, curadr, size, psbits, tag, rr_ps, num;
+	union ia64_rr vrr;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	psbits = VMX(v, psbits[(va >> 61)]);
+	start = va & ~((1UL << ps) - 1);
+	while (psbits) {
+		curadr = start;
+		rr_ps = __ffs(psbits);
+		psbits &= ~(1UL << rr_ps);
+		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
+		size = PSIZE(rr_ps);
+		vrr.ps = rr_ps;
+		while (num) {
+			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
+			if (cur->etag == tag && cur->ps == rr_ps)
+				cur->etag = INVALID_TI_TAG;
+			curadr += size;
+			num--;
+		}
+	}
+}
+
+
+/*
+ *  purge VHPT and machine TLB
+ */
+static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	struct thash_data *cur;
+	u64 start, size, tag, num;
+	union ia64_rr rr;
+
+	start = va & ~((1UL << ps) - 1);
+	rr.val = ia64_get_rr(va);
+	size = PSIZE(rr.ps);
+	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
+	while (num) {
+		cur = (struct thash_data *)ia64_thash(start);
+		tag = ia64_ttag(start);
+		if (cur->etag == tag)
+			cur->etag = INVALID_TI_TAG;
+		start += size;
+		num--;
+	}
+	machine_tlb_purge(va, ps);
+}
+
+/*
+ * Insert an entry into hash TLB or VHPT.
+ * NOTES:
+ *  1: When inserting VHPT to thash, "va" is a must covered
+ *  address by the inserted machine VHPT entry.
+ *  2: The format of entry is always in TLB.
+ *  3: The caller need to make sure the new entry will not overlap
+ *     with any existed entry.
+ */
+void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
+{
+	struct thash_data *head;
+	union ia64_rr vrr;
+	u64 tag;
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	vrr.val = vcpu_get_rr(v, va);
+	vrr.ps = itir_ps(itir);
+	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
+	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
+	head->page_flags = pte;
+	head->itir = itir;
+	head->etag = tag;
+}
+
+int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
+{
+	struct thash_data  *trp;
+	int  i;
+	u64 end, rid;
+
+	rid = vcpu_get_rr(vcpu, va);
+	rid = rid & RR_RID_MASK;
+	end = va + PSIZE(ps);
+	if (type == D_TLB) {
+		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
+					i < NDTRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	} else {
+		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
+			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
+					i < NITRS; i++, trp++) {
+				if (__is_tr_overlap(trp, rid, va, end))
+					return i;
+			}
+		}
+	}
+	return -1;
+}
+
+/*
+ * Purge entries in VTLB and VHPT
+ */
+void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	if (vcpu_quick_region_check(v->arch.tc_regions, va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
+{
+	u64 old_va = va;
+	va = REGION_OFFSET(va);
+	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
+		vtlb_purge(v, va, ps);
+	vhpt_purge(v, va, ps);
+}
+
+u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
+{
+	u64 ps, ps_mask, paddr, maddr;
+	union pte_flags phy_pte;
+
+	ps = itir_ps(itir);
+	ps_mask = ~((1UL << ps) - 1);
+	phy_pte.val = *pte;
+	paddr = *pte;
+	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
+	maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
+	if (maddr & GPFN_IO_MASK) {
+		*pte |= VTLB_PTE_IO;
+		return -1;
+	}
+	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
+					(paddr & ~PAGE_MASK);
+	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
+	return phy_pte.val;
+}
+
+/*
+ * Purge overlap TCs and then insert the new entry to emulate itc ops.
+ *    Notes: Only TC entry can purge and insert.
+ *    1 indicates this is MMIO
+ */
+int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
+						u64 ifa, int type)
+{
+	u64 ps;
+	u64 phy_pte;
+	union ia64_rr vrr, mrr;
+	int ret = 0;
+
+	ps = itir_ps(itir);
+	vrr.val = vcpu_get_rr(v, ifa);
+	mrr.val = ia64_get_rr(ifa);
+
+	phy_pte = translate_phy_pte(&pte, itir, ifa);
+
+	/* Ensure WB attribute if pte is related to a normal mem page,
+	 * which is required by vga acceleration since qemu maps shared
+	 * vram buffer with WB.
+	 */
+	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) {
+		pte &= ~_PAGE_MA_MASK;
+		phy_pte &= ~_PAGE_MA_MASK;
+	}
+
+	if (pte & VTLB_PTE_IO)
+		ret = 1;
+
+	vtlb_purge(v, ifa, ps);
+	vhpt_purge(v, ifa, ps);
+
+	if (ps == mrr.ps) {
+		if (!(pte&VTLB_PTE_IO)) {
+			vhpt_insert(phy_pte, itir, ifa, pte);
+		} else {
+			vtlb_insert(v, pte, itir, ifa);
+			vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+		}
+	} else if (ps > mrr.ps) {
+		vtlb_insert(v, pte, itir, ifa);
+		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
+		if (!(pte&VTLB_PTE_IO))
+			vhpt_insert(phy_pte, itir, ifa, pte);
+	} else {
+		u64 psr;
+		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
+		psr = ia64_clear_ic();
+		ia64_itc(type, ifa, phy_pte, ps);
+		ia64_set_psr(psr);
+	}
+	if (!(pte&VTLB_PTE_IO))
+		mark_pages_dirty(v, pte, ps);
+
+	return ret;
+}
+
+/*
+ * Purge all TCs or VHPT entries including those in Hash table.
+ *
+ */
+
+void thash_purge_all(struct kvm_vcpu *v)
+{
+	int i;
+	struct thash_data *head;
+	struct thash_cb  *vtlb, *vhpt;
+	vtlb = &v->arch.vtlb;
+	vhpt = &v->arch.vhpt;
+
+	for (i = 0; i < 8; i++)
+		VMX(v, psbits[i]) = 0;
+
+	head = vtlb->hash;
+	for (i = 0; i < vtlb->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	head = vhpt->hash;
+	for (i = 0; i < vhpt->num; i++) {
+		head->page_flags = 0;
+		head->etag = INVALID_TI_TAG;
+		head->itir = 0;
+		head->next = 0;
+		head++;
+	};
+
+	local_flush_tlb_all();
+}
+
+
+/*
+ * Lookup the hash table and its collision chain to find an entry
+ * covering this address rid:va or the entry.
+ *
+ * INPUT:
+ *  in: TLB format for both VHPT & TLB.
+ */
+
+struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
+{
+	struct thash_data  *cch;
+	u64    psbits, ps, tag;
+	union ia64_rr vrr;
+
+	struct thash_cb *hcb = &v->arch.vtlb;
+
+	cch = __vtr_lookup(v, va, is_data);;
+	if (cch)
+		return cch;
+
+	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
+		return NULL;
+
+	psbits = VMX(v, psbits[(va >> 61)]);
+	vrr.val = vcpu_get_rr(v, va);
+	while (psbits) {
+		ps = __ffs(psbits);
+		psbits &= ~(1UL << ps);
+		vrr.ps = ps;
+		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
+		if (cch->etag == tag && cch->ps == ps)
+			return cch;
+	}
+
+	return NULL;
+}
+
+
+/*
+ * Initialize internal control data before service.
+ */
+void thash_init(struct thash_cb *hcb, u64 sz)
+{
+	int i;
+	struct thash_data *head;
+
+	hcb->pta.val = (unsigned long)hcb->hash;
+	hcb->pta.vf = 1;
+	hcb->pta.ve = 1;
+	hcb->pta.size = sz;
+	head = hcb->hash;
+	for (i = 0; i < hcb->num; i++) {
+		head->page_flags = 0;
+		head->itir = 0;
+		head->etag = INVALID_TI_TAG;
+		head->next = 0;
+		head++;
+	}
+}
+
+u64 kvm_lookup_mpa(u64 gpfn)
+{
+	u64 *base = (u64 *) KVM_P2M_BASE;
+	return *(base + gpfn);
+}
+
+u64 kvm_gpa_to_mpa(u64 gpa)
+{
+	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
+	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
+}
+
+
+/*
+ * Fetch guest bundle code.
+ * INPUT:
+ *  gip: guest ip
+ *  pbundle: used to return fetched bundle.
+ */
+int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
+{
+	u64     gpip = 0;   /* guest physical IP*/
+	u64     *vpa;
+	struct thash_data    *tlb;
+	u64     maddr;
+
+	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
+		/* I-side physical mode */
+		gpip = gip;
+	} else {
+		tlb = vtlb_lookup(vcpu, gip, I_TLB);
+		if (tlb)
+			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
+				(gip & (PSIZE(tlb->ps) - 1));
+	}
+	if (gpip) {
+		maddr = kvm_gpa_to_mpa(gpip);
+	} else {
+		tlb = vhpt_lookup(gip);
+		if (tlb == NULL) {
+			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
+			return IA64_FAULT;
+		}
+		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
+					| (gip & (PSIZE(tlb->ps) - 1));
+	}
+	vpa = (u64 *)__kvm_va(maddr);
+
+	pbundle->i64[0] = *vpa++;
+	pbundle->i64[1] = *vpa;
+
+	return IA64_NO_FAULT;
+}
+
+
+void kvm_init_vhpt(struct kvm_vcpu *v)
+{
+	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
+	thash_init(&v->arch.vhpt, VHPT_SHIFT);
+	ia64_set_pta(v->arch.vhpt.pta.val);
+	/*Enable VHPT here?*/
+}
+
+void kvm_init_vtlb(struct kvm_vcpu *v)
+{
+	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
+	thash_init(&v->arch.vtlb, VTLB_SHIFT);
+}
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 5c1de53c8c1c..fc6c6636ffda 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -682,15 +682,6 @@ mem_init (void)
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 	pg_data_t *pgdat;
diff --git a/arch/m68k/kernel/ints.c b/arch/m68k/kernel/ints.c
index 2b412454cb41..ded7dd2f67b2 100644
--- a/arch/m68k/kernel/ints.c
+++ b/arch/m68k/kernel/ints.c
@@ -186,7 +186,7 @@ int setup_irq(unsigned int irq, struct irq_node *node)
 
 	if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
 		printk("%s: Incorrect IRQ %d from %s\n",
-		       __FUNCTION__, irq, node->devname);
+		       __func__, irq, node->devname);
 		return -ENXIO;
 	}
 
@@ -249,7 +249,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	unsigned long flags;
 
 	if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
-		printk("%s: Incorrect IRQ %d\n", __FUNCTION__, irq);
+		printk("%s: Incorrect IRQ %d\n", __func__, irq);
 		return;
 	}
 
@@ -267,7 +267,7 @@ void free_irq(unsigned int irq, void *dev_id)
 		node->handler = NULL;
 	} else
 		printk("%s: Removing probably wrong IRQ %d\n",
-		       __FUNCTION__, irq);
+		       __func__, irq);
 
 	if (!irq_list[irq]) {
 		if (contr->shutdown)
@@ -288,7 +288,7 @@ void enable_irq(unsigned int irq)
 
 	if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
 		printk("%s: Incorrect IRQ %d\n",
-		       __FUNCTION__, irq);
+		       __func__, irq);
 		return;
 	}
 
@@ -312,7 +312,7 @@ void disable_irq(unsigned int irq)
 
 	if (irq >= NR_IRQS || !(contr = irq_controller[irq])) {
 		printk("%s: Incorrect IRQ %d\n",
-		       __FUNCTION__, irq);
+		       __func__, irq);
 		return;
 	}
 
diff --git a/arch/m68k/mac/oss.c b/arch/m68k/mac/oss.c
index 50603d3dce84..3c943d2ec570 100644
--- a/arch/m68k/mac/oss.c
+++ b/arch/m68k/mac/oss.c
@@ -190,7 +190,7 @@ void oss_irq_enable(int irq) {
 			break;
 #ifdef DEBUG_IRQUSE
 		default:
-			printk("%s unknown irq %d\n",__FUNCTION__, irq);
+			printk("%s unknown irq %d\n", __func__, irq);
 			break;
 #endif
 	}
@@ -230,7 +230,7 @@ void oss_irq_disable(int irq) {
 			break;
 #ifdef DEBUG_IRQUSE
 		default:
-			printk("%s unknown irq %d\n", __FUNCTION__, irq);
+			printk("%s unknown irq %d\n", __func__, irq);
 			break;
 #endif
 	}
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index f42caa79e4e8..a2bb01f59642 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -79,7 +79,6 @@ void show_mem(void)
 
 	printk("\nMem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	for_each_online_pgdat(pgdat) {
 		for (i = 0; i < pgdat->node_spanned_pages; i++) {
 			struct page *page = pgdat->node_mem_map + i;
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 46161cef08b9..9f0e3d59bf92 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -47,7 +47,7 @@ static int q40_irq_startup(unsigned int irq)
 	switch (irq) {
 	case 1: case 2: case 8: case 9:
 	case 11: case 12: case 13:
-		printk("%s: ISA IRQ %d not implemented by HW\n", __FUNCTION__, irq);
+		printk("%s: ISA IRQ %d not implemented by HW\n", __func__, irq);
 		return -ENXIO;
 	}
 	return 0;
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 8724ed3298d3..e5a7c5d96364 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -81,7 +81,9 @@ config MIPS_COBALT
 config MACH_DECSTATION
 	bool "DECstations"
 	select BOOT_ELF32
+	select CEVT_DS1287
 	select CEVT_R4K
+	select CSRC_IOASIC
 	select CSRC_R4K
 	select CPU_DADDI_WORKAROUNDS if 64BIT
 	select CPU_R4000_WORKAROUNDS if 64BIT
@@ -221,6 +223,7 @@ config MIPS_MALTA
 	select DMA_NONCOHERENT
 	select GENERIC_ISA_DMA
 	select IRQ_CPU
+	select IRQ_GIC
 	select HW_HAS_PCI
 	select I8253
 	select I8259
@@ -309,12 +312,12 @@ config MACH_VR41XX
 	select GENERIC_HARDIRQS_NO__DO_IRQ
 
 config PNX8550_JBS
-	bool "Philips PNX8550 based JBS board"
+	bool "NXP PNX8550 based JBS board"
 	select PNX8550
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 
 config PNX8550_STB810
-	bool "Philips PNX8550 based STB810 board"
+	bool "NXP PNX8550 based STB810 board"
 	select PNX8550
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 
@@ -612,6 +615,7 @@ config TOSHIBA_JMR3927
 	select SYS_SUPPORTS_LITTLE_ENDIAN
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select GENERIC_HARDIRQS_NO__DO_IRQ
+	select GPIO_TXX9
 
 config TOSHIBA_RBTX4927
 	bool "Toshiba RBTX49[23]7 board"
@@ -653,7 +657,7 @@ config TOSHIBA_RBTX4938
 	select SYS_SUPPORTS_BIG_ENDIAN
 	select SYS_SUPPORTS_KGDB
 	select GENERIC_HARDIRQS_NO__DO_IRQ
-	select GENERIC_GPIO
+	select GPIO_TXX9
 	help
 	  This Toshiba board is based on the TX4938 processor. Say Y here to
 	  support this machine type
@@ -767,6 +771,9 @@ config BOOT_RAW
 config CEVT_BCM1480
 	bool
 
+config CEVT_DS1287
+	bool
+
 config CEVT_GT641XX
 	bool
 
@@ -782,12 +789,20 @@ config CEVT_TXX9
 config CSRC_BCM1480
 	bool
 
+config CSRC_IOASIC
+	bool
+
 config CSRC_R4K
 	bool
 
 config CSRC_SB1250
 	bool
 
+config GPIO_TXX9
+	select GENERIC_GPIO
+	select HAVE_GPIO_LIB
+	bool
+
 config CFE
 	bool
 
@@ -840,6 +855,9 @@ config MIPS_NILE4
 config MIPS_DISABLE_OBSOLETE_IDE
 	bool
 
+config SYNC_R4K
+	bool
+
 config NO_IOPORT
 	def_bool n
 
@@ -909,6 +927,9 @@ config IRQ_TXX9
 config IRQ_GT641XX
 	bool
 
+config IRQ_GIC
+	bool
+
 config MIPS_BOARDS_GEN
 	bool
 
@@ -1811,6 +1832,17 @@ config NR_CPUS
 	  performance should round up your number of processors to the next
 	  power of two.
 
+config MIPS_CMP
+	bool "MIPS CMP framework support"
+	depends on SMP
+	select SYNC_R4K
+	select SYS_SUPPORTS_SCHED_SMT
+	select WEAK_ORDERING
+	default n
+	help
+	  This is a placeholder option for the GCMP work. It will need to
+	  be handled differently...
+
 source "kernel/time/Kconfig"
 
 #
diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug
index fd7124c1b75a..f18cf92650e3 100644
--- a/arch/mips/Kconfig.debug
+++ b/arch/mips/Kconfig.debug
@@ -73,14 +73,4 @@ config RUNTIME_DEBUG
 	  include/asm-mips/debug.h for debuging macros.
 	  If unsure, say N.
 
-config MIPS_UNCACHED
-	bool "Run uncached"
-	depends on DEBUG_KERNEL && !SMP && !SGI_IP27
-	help
-	  If you say Y here there kernel will disable all CPU caches.  This will
-	  reduce the system's performance dramatically but can help finding
-	  otherwise hard to track bugs.  It can also useful if you're doing
-	  hardware debugging with a logic analyzer and need to see all traffic
-	  on the bus.
-
 endmenu
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 1c62381f5c23..69648d01acc0 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -410,21 +410,21 @@ load-$(CONFIG_CASIO_E55)	+= 0xffffffff80004000
 load-$(CONFIG_TANBAC_TB022X)	+= 0xffffffff80000000
 
 #
-# Common Philips PNX8550
+# Common NXP PNX8550
 #
-core-$(CONFIG_SOC_PNX8550)	+= arch/mips/philips/pnx8550/common/
+core-$(CONFIG_SOC_PNX8550)	+= arch/mips/nxp/pnx8550/common/
 cflags-$(CONFIG_SOC_PNX8550)	+= -Iinclude/asm-mips/mach-pnx8550
 
 #
-# Philips PNX8550 JBS board
+# NXP PNX8550 JBS board
 #
-libs-$(CONFIG_PNX8550_JBS)	+= arch/mips/philips/pnx8550/jbs/
+libs-$(CONFIG_PNX8550_JBS)	+= arch/mips/nxp/pnx8550/jbs/
 #cflags-$(CONFIG_PNX8550_JBS)	+= -Iinclude/asm-mips/mach-pnx8550
 load-$(CONFIG_PNX8550_JBS)	+= 0xffffffff80060000
 
-# Philips PNX8550 STB810 board
+# NXP PNX8550 STB810 board
 #
-libs-$(CONFIG_PNX8550_STB810)	+= arch/mips/philips/pnx8550/stb810/
+libs-$(CONFIG_PNX8550_STB810)	+= arch/mips/nxp/pnx8550/stb810/
 load-$(CONFIG_PNX8550_STB810)	+= 0xffffffff80060000
 
 # NEC EMMA2RH boards
diff --git a/arch/mips/au1000/common/cputable.c b/arch/mips/au1000/common/cputable.c
index 5c0d35d6e22a..8c93a05d7382 100644
--- a/arch/mips/au1000/common/cputable.c
+++ b/arch/mips/au1000/common/cputable.c
@@ -11,10 +11,7 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
-#include <linux/string.h>
-#include <linux/sched.h>
-#include <linux/threads.h>
-#include <linux/init.h>
+
 #include <asm/mach-au1x00/au1000.h>
 
 struct cpu_spec* cur_cpu_spec[NR_CPUS];
diff --git a/arch/mips/au1000/common/dbdma.c b/arch/mips/au1000/common/dbdma.c
index 57f17b41098d..53377dfc0640 100644
--- a/arch/mips/au1000/common/dbdma.c
+++ b/arch/mips/au1000/common/dbdma.c
@@ -31,18 +31,12 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1xxx_dbdma.h>
-#include <asm/system.h>
-
 
 #if defined(CONFIG_SOC_AU1550) || defined(CONFIG_SOC_AU1200)
 
diff --git a/arch/mips/au1000/common/dbg_io.c b/arch/mips/au1000/common/dbg_io.c
index 79e0b0a51ace..eae1bb2ca26e 100644
--- a/arch/mips/au1000/common/dbg_io.c
+++ b/arch/mips/au1000/common/dbg_io.c
@@ -1,5 +1,4 @@
 
-#include <asm/io.h>
 #include <asm/mach-au1x00/au1000.h>
 
 #ifdef CONFIG_KGDB
@@ -55,8 +54,7 @@ typedef         unsigned int  uint32;
 #define UART16550_READ(y)    (au_readl(DEBUG_BASE + y) & 0xff)
 #define UART16550_WRITE(y, z) (au_writel(z&0xff, DEBUG_BASE + y))
 
-extern unsigned long get_au1x00_uart_baud_base(void);
-extern unsigned long cal_r4koff(void);
+extern unsigned long calc_clock(void);
 
 void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
 {
@@ -64,7 +62,7 @@ void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
 	if (UART16550_READ(UART_MOD_CNTRL) != 0x3) {
 		UART16550_WRITE(UART_MOD_CNTRL, 3);
 	}
-	cal_r4koff();
+	calc_clock();
 
 	/* disable interrupts */
 	UART16550_WRITE(UART_IER, 0);
diff --git a/arch/mips/au1000/common/dma.c b/arch/mips/au1000/common/dma.c
index c78260d4e837..95f69ea146e9 100644
--- a/arch/mips/au1000/common/dma.c
+++ b/arch/mips/au1000/common/dma.c
@@ -33,12 +33,9 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/delay.h>
 #include <linux/interrupt.h>
-#include <asm/system.h>
+
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-au1x00/au1000_dma.h>
 
diff --git a/arch/mips/au1000/common/gpio.c b/arch/mips/au1000/common/gpio.c
index 0b658f1db4ce..525452589971 100644
--- a/arch/mips/au1000/common/gpio.c
+++ b/arch/mips/au1000/common/gpio.c
@@ -27,13 +27,8 @@
  * 	others have a second one : GPIO2
  */
 
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/types.h>
 #include <linux/module.h>
 
-#include <asm/addrspace.h>
-
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/gpio.h>
 
diff --git a/arch/mips/au1000/common/irq.c b/arch/mips/au1000/common/irq.c
index 3c7714f057ac..f0626992fd75 100644
--- a/arch/mips/au1000/common/irq.c
+++ b/arch/mips/au1000/common/irq.c
@@ -1,7 +1,6 @@
 /*
- * Copyright 2001 MontaVista Software Inc.
- * Author: MontaVista Software, Inc.
- *		ppopov@mvista.com or source@mvista.com
+ * Copyright 2001, 2007-2008 MontaVista Software Inc.
+ * Author: MontaVista Software, Inc. <source@mvista.com>
  *
  * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org)
  *
@@ -27,7 +26,6 @@
  */
 #include <linux/bitops.h>
 #include <linux/init.h>
-#include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 
@@ -591,7 +589,7 @@ void __init arch_init_irq(void)
 		imp++;
 	}
 
-	set_c0_status(ALLINTS);
+	set_c0_status(IE_IRQ0 | IE_IRQ1 | IE_IRQ2 | IE_IRQ3 | IE_IRQ4);
 
 	/* Board specific IRQ initialization.
 	*/
diff --git a/arch/mips/au1000/common/pci.c b/arch/mips/au1000/common/pci.c
index ce771487567d..7e966b31e3e1 100644
--- a/arch/mips/au1000/common/pci.c
+++ b/arch/mips/au1000/common/pci.c
@@ -30,7 +30,7 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/types.h>
+
 #include <linux/pci.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/arch/mips/au1000/common/platform.c b/arch/mips/au1000/common/platform.c
index 39d681265297..31d2a2270878 100644
--- a/arch/mips/au1000/common/platform.c
+++ b/arch/mips/au1000/common/platform.c
@@ -3,18 +3,65 @@
  *
  * Copyright 2004, Matt Porter <mporter@kernel.crashing.org>
  *
+ * (C) Copyright Embedded Alley Solutions, Inc 2005
+ * Author: Pantelis Antoniou <pantelis@embeddedalley.com>
+ *
  * This file is licensed under the terms of the GNU General Public
  * License version 2.  This program is licensed "as is" without any
  * warranty of any kind, whether express or implied.
  */
-#include <linux/device.h>
+
 #include <linux/platform_device.h>
-#include <linux/kernel.h>
+#include <linux/serial_8250.h>
 #include <linux/init.h>
-#include <linux/resource.h>
 
 #include <asm/mach-au1x00/au1xxx.h>
 
+#define PORT(_base, _irq)				\
+	{						\
+		.iobase		= _base,		\
+		.membase	= (void __iomem *)_base,\
+		.mapbase	= CPHYSADDR(_base),	\
+		.irq		= _irq,			\
+		.regshift	= 2,			\
+		.iotype		= UPIO_AU,		\
+		.flags		= UPF_SKIP_TEST 	\
+	}
+
+static struct plat_serial8250_port au1x00_uart_data[] = {
+#if defined(CONFIG_SERIAL_8250_AU1X00)
+#if defined(CONFIG_SOC_AU1000)
+	PORT(UART0_ADDR, AU1000_UART0_INT),
+	PORT(UART1_ADDR, AU1000_UART1_INT),
+	PORT(UART2_ADDR, AU1000_UART2_INT),
+	PORT(UART3_ADDR, AU1000_UART3_INT),
+#elif defined(CONFIG_SOC_AU1500)
+	PORT(UART0_ADDR, AU1500_UART0_INT),
+	PORT(UART3_ADDR, AU1500_UART3_INT),
+#elif defined(CONFIG_SOC_AU1100)
+	PORT(UART0_ADDR, AU1100_UART0_INT),
+	PORT(UART1_ADDR, AU1100_UART1_INT),
+	PORT(UART3_ADDR, AU1100_UART3_INT),
+#elif defined(CONFIG_SOC_AU1550)
+	PORT(UART0_ADDR, AU1550_UART0_INT),
+	PORT(UART1_ADDR, AU1550_UART1_INT),
+	PORT(UART3_ADDR, AU1550_UART3_INT),
+#elif defined(CONFIG_SOC_AU1200)
+	PORT(UART0_ADDR, AU1200_UART0_INT),
+	PORT(UART1_ADDR, AU1200_UART1_INT),
+#endif
+#endif	/* CONFIG_SERIAL_8250_AU1X00 */
+	{ },
+};
+
+static struct platform_device au1xx0_uart_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_AU1X00,
+	.dev			= {
+		.platform_data	= au1x00_uart_data,
+	},
+};
+
 /* OHCI (USB full speed host controller) */
 static struct resource au1xxx_usb_ohci_resources[] = {
 	[0] = {
@@ -186,19 +233,6 @@ static struct resource au1200_lcd_resources[] = {
 	}
 };
 
-static struct resource au1200_ide0_resources[] = {
-	[0] = {
-		.start		= AU1XXX_ATA_PHYS_ADDR,
-		.end 		= AU1XXX_ATA_PHYS_ADDR + AU1XXX_ATA_PHYS_LEN - 1,
-		.flags		= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start		= AU1XXX_ATA_INT,
-		.end		= AU1XXX_ATA_INT,
-		.flags		= IORESOURCE_IRQ,
-	}
-};
-
 static u64 au1200_lcd_dmamask = ~(u32)0;
 
 static struct platform_device au1200_lcd_device = {
@@ -212,20 +246,6 @@ static struct platform_device au1200_lcd_device = {
 	.resource       = au1200_lcd_resources,
 };
 
-
-static u64 ide0_dmamask = ~(u32)0;
-
-static struct platform_device au1200_ide0_device = {
-	.name		= "au1200-ide",
-	.id		= 0,
-	.dev = {
-		.dma_mask 		= &ide0_dmamask,
-		.coherent_dma_mask	= 0xffffffff,
-	},
-	.num_resources = ARRAY_SIZE(au1200_ide0_resources),
-	.resource	= au1200_ide0_resources,
-};
-
 static u64 au1xxx_mmc_dmamask =  ~(u32)0;
 
 static struct platform_device au1xxx_mmc_device = {
@@ -245,31 +265,6 @@ static struct platform_device au1x00_pcmcia_device = {
 	.id 		= 0,
 };
 
-#ifdef CONFIG_MIPS_DB1200
-
-static struct resource smc91x_resources[] = {
-	[0] = {
-		.name	= "smc91x-regs",
-		.start	= AU1XXX_SMC91111_PHYS_ADDR,
-		.end	= AU1XXX_SMC91111_PHYS_ADDR + 0xfffff,
-		.flags	= IORESOURCE_MEM,
-	},
-	[1] = {
-		.start	= AU1XXX_SMC91111_IRQ,
-		.end	= AU1XXX_SMC91111_IRQ,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device smc91x_device = {
-	.name		= "smc91x",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(smc91x_resources),
-	.resource	= smc91x_resources,
-};
-
-#endif
-
 /* All Alchemy demoboards with I2C have this #define in their headers */
 #ifdef SMBUS_PSC_BASE
 static struct resource pbdb_smbus_resources[] = {
@@ -289,6 +284,7 @@ static struct platform_device pbdb_smbus_device = {
 #endif
 
 static struct platform_device *au1xxx_platform_devices[] __initdata = {
+	&au1xx0_uart_device,
 	&au1xxx_usb_ohci_device,
 	&au1x00_pcmcia_device,
 #ifdef CONFIG_FB_AU1100
@@ -299,12 +295,8 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
 	&au1xxx_usb_gdt_device,
 	&au1xxx_usb_otg_device,
 	&au1200_lcd_device,
-	&au1200_ide0_device,
 	&au1xxx_mmc_device,
 #endif
-#ifdef CONFIG_MIPS_DB1200
-	&smc91x_device,
-#endif
 #ifdef SMBUS_PSC_BASE
 	&pbdb_smbus_device,
 #endif
@@ -312,6 +304,13 @@ static struct platform_device *au1xxx_platform_devices[] __initdata = {
 
 int __init au1xxx_platform_init(void)
 {
+	unsigned int uartclk = get_au1x00_uart_baud_base() * 16;
+	int i;
+
+	/* Fill up uartclk. */
+	for (i = 0; au1x00_uart_data[i].flags ; i++)
+		au1x00_uart_data[i].uartclk = uartclk;
+
 	return platform_add_devices(au1xxx_platform_devices, ARRAY_SIZE(au1xxx_platform_devices));
 }
 
diff --git a/arch/mips/au1000/common/power.c b/arch/mips/au1000/common/power.c
index 54047d69b820..a8cd2c1b9e1b 100644
--- a/arch/mips/au1000/common/power.c
+++ b/arch/mips/au1000/common/power.c
@@ -29,17 +29,14 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
 #include <linux/pm.h>
 #include <linux/pm_legacy.h>
-#include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/jiffies.h>
 
-#include <asm/string.h>
 #include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/system.h>
 #include <asm/cacheflush.h>
 #include <asm/mach-au1x00/au1000.h>
 
@@ -47,17 +44,13 @@
 
 #define DEBUG 1
 #ifdef DEBUG
-#  define DPRINTK(fmt, args...)	printk("%s: " fmt, __FUNCTION__ , ## args)
+#  define DPRINTK(fmt, args...)	printk("%s: " fmt, __func__, ## args)
 #else
 #  define DPRINTK(fmt, args...)
 #endif
 
 static void au1000_calibrate_delay(void);
 
-extern void set_au1x00_speed(unsigned int new_freq);
-extern unsigned int get_au1x00_speed(void);
-extern unsigned long get_au1x00_uart_baud_base(void);
-extern void set_au1x00_uart_baud_base(unsigned long new_baud_base);
 extern unsigned long save_local_and_disable(int controller);
 extern void restore_local_and_enable(int controller, unsigned long mask);
 extern void local_enable_irq(unsigned int irq_nr);
@@ -258,7 +251,6 @@ int au_sleep(void)
 static int pm_do_sleep(ctl_table * ctl, int write, struct file *file,
 		       void __user *buffer, size_t * len, loff_t *ppos)
 {
-	int retval = 0;
 #ifdef SLEEP_TEST_TIMEOUT
 #define TMPBUFLEN2 16
 	char buf[TMPBUFLEN2], *p;
@@ -278,36 +270,12 @@ static int pm_do_sleep(ctl_table * ctl, int write, struct file *file,
 		p = buf;
 		sleep_ticks = simple_strtoul(p, &p, 0);
 #endif
-		retval = pm_send_all(PM_SUSPEND, (void *) 2);
-
-		if (retval)
-			return retval;
 
 		au_sleep();
-		retval = pm_send_all(PM_RESUME, (void *) 0);
-	}
-	return retval;
-}
-
-static int pm_do_suspend(ctl_table * ctl, int write, struct file *file,
-			 void __user *buffer, size_t * len, loff_t *ppos)
-{
-	int retval = 0;
-
-	if (!write) {
-		*len = 0;
-	} else {
-		retval = pm_send_all(PM_SUSPEND, (void *) 2);
-		if (retval)
-			return retval;
-		suspend_mode = 1;
-
-		retval = pm_send_all(PM_RESUME, (void *) 0);
 	}
-	return retval;
+	return 0;
 }
 
-
 static int pm_do_freq(ctl_table * ctl, int write, struct file *file,
 		      void __user *buffer, size_t * len, loff_t *ppos)
 {
@@ -421,14 +389,6 @@ static int pm_do_freq(ctl_table * ctl, int write, struct file *file,
 
 static struct ctl_table pm_table[] = {
 	{
-		.ctl_name 	= CTL_UNNUMBERED,
-		.procname	= "suspend",
-		.data		= NULL,
-		.maxlen		= 0,
-		.mode		= 0600,
-		.proc_handler	= &pm_do_suspend
-	},
-	{
 		.ctl_name	= CTL_UNNUMBERED,
 		.procname	= "sleep",
 		.data		= NULL,
diff --git a/arch/mips/au1000/common/prom.c b/arch/mips/au1000/common/prom.c
index 90d70695aa60..f10af829e4ec 100644
--- a/arch/mips/au1000/common/prom.c
+++ b/arch/mips/au1000/common/prom.c
@@ -33,8 +33,8 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/module.h>
-#include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/string.h>
 
diff --git a/arch/mips/au1000/common/puts.c b/arch/mips/au1000/common/puts.c
index 2705829cd466..e34c67e89293 100644
--- a/arch/mips/au1000/common/puts.c
+++ b/arch/mips/au1000/common/puts.c
@@ -28,7 +28,6 @@
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/types.h>
 #include <asm/mach-au1x00/au1000.h>
 
 #define SERIAL_BASE   UART_BASE
diff --git a/arch/mips/au1000/common/reset.c b/arch/mips/au1000/common/reset.c
index b8638d293cf9..60cec537c745 100644
--- a/arch/mips/au1000/common/reset.c
+++ b/arch/mips/au1000/common/reset.c
@@ -27,13 +27,7 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <asm/io.h>
-#include <asm/pgtable.h>
-#include <asm/processor.h>
-#include <asm/reboot.h>
-#include <asm/system.h>
+
 #include <asm/mach-au1x00/au1000.h>
 
 extern int au_sleep(void);
diff --git a/arch/mips/au1000/common/setup.c b/arch/mips/au1000/common/setup.c
index 9e4ab80caab6..0e86f7a6b4a7 100644
--- a/arch/mips/au1000/common/setup.c
+++ b/arch/mips/au1000/common/setup.c
@@ -25,21 +25,14 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
 #include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/pm.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
 #include <asm/mipsregs.h>
 #include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/time.h>
 
 #include <au1000.h>
@@ -49,8 +42,6 @@ extern void __init board_setup(void);
 extern void au1000_restart(char *);
 extern void au1000_halt(void);
 extern void au1000_power_off(void);
-extern void au1x_time_init(void);
-extern void au1x_timer_setup(struct irqaction *irq);
 extern void set_cpuspec(void);
 
 void __init plat_mem_setup(void)
diff --git a/arch/mips/au1000/common/sleeper.S b/arch/mips/au1000/common/sleeper.S
index 683d9da84b66..4b3cf021a454 100644
--- a/arch/mips/au1000/common/sleeper.S
+++ b/arch/mips/au1000/common/sleeper.S
@@ -9,9 +9,9 @@
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  */
+
 #include <asm/asm.h>
 #include <asm/mipsregs.h>
-#include <asm/addrspace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
 
diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c
index e122bbc6cd88..bdb6d73b26fb 100644
--- a/arch/mips/au1000/common/time.c
+++ b/arch/mips/au1000/common/time.c
@@ -1,6 +1,6 @@
 /*
  *
- * Copyright (C) 2001 MontaVista Software, ppopov@mvista.com
+ * Copyright (C) 2001, 2006, 2008 MontaVista Software, <source@mvista.com>
  * Copied and modified Carsten Langgaard's time.c
  *
  * Carsten Langgaard, carstenl@mips.com
@@ -34,23 +34,13 @@
 
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sched.h>
 #include <linux/spinlock.h>
-#include <linux/hardirq.h>
 
-#include <asm/compiler.h>
 #include <asm/mipsregs.h>
 #include <asm/time.h>
-#include <asm/div64.h>
 #include <asm/mach-au1x00/au1000.h>
 
-#include <linux/mc146818rtc.h>
-#include <linux/timex.h>
-
-static unsigned long r4k_offset; /* Amount to increment compare reg each time */
-static unsigned long r4k_cur;    /* What counter should be at next timer irq */
-int	no_au1xxx_32khz;
+static int no_au1xxx_32khz;
 extern int allow_au1k_wait; /* default off for CP0 Counter */
 
 #ifdef CONFIG_PM
@@ -184,7 +174,7 @@ wakeup_counter0_set(int ticks)
  * "wait" is enabled, and we need to detect if the 32KHz isn't present
  * but requested......got it? :-)		-- Dan
  */
-unsigned long cal_r4koff(void)
+unsigned long calc_clock(void)
 {
 	unsigned long cpu_speed;
 	unsigned long flags;
@@ -229,19 +219,13 @@ unsigned long cal_r4koff(void)
 	// Equation: Baudrate = CPU / (SD * 2 * CLKDIV * 16)
 	set_au1x00_uart_baud_base(cpu_speed / (2 * ((int)(au_readl(SYS_POWERCTRL)&0x03) + 2) * 16));
 	spin_unlock_irqrestore(&time_lock, flags);
-	return (cpu_speed / HZ);
+	return cpu_speed;
 }
 
 void __init plat_time_init(void)
 {
-	unsigned int est_freq;
-
-	printk("calculating r4koff... ");
-	r4k_offset = cal_r4koff();
-	printk("%08lx(%d)\n", r4k_offset, (int) r4k_offset);
+	unsigned int est_freq = calc_clock();
 
-	//est_freq = 2*r4k_offset*HZ;
-	est_freq = r4k_offset*HZ;
 	est_freq += 5000;    /* round */
 	est_freq -= est_freq%10000;
 	printk("CPU frequency %d.%02d MHz\n", est_freq/1000000,
@@ -249,9 +233,6 @@ void __init plat_time_init(void)
  	set_au1x00_speed(est_freq);
  	set_au1x00_lcd_clock(); // program the LCD clock
 
-	r4k_cur = (read_c0_count() + r4k_offset);
-	write_c0_compare(r4k_cur);
-
 #ifdef CONFIG_PM
 	/*
 	 * setup counter 0, since it keeps ticking after a
@@ -265,12 +246,8 @@ void __init plat_time_init(void)
 	 * Check to ensure we really have a 32KHz oscillator before
 	 * we do this.
 	 */
-	if (no_au1xxx_32khz) {
+	if (no_au1xxx_32khz)
 		printk("WARNING: no 32KHz clock found.\n");
-
-		/* Ensure we get CPO_COUNTER interrupts.  */
-		set_c0_status(IE_IRQ5);
-	}
 	else {
 		while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S);
 		au_writel(0, SYS_TOYWRITE);
diff --git a/arch/mips/au1000/db1x00/board_setup.c b/arch/mips/au1000/db1x00/board_setup.c
index 99eafeada518..b7dcbad5c586 100644
--- a/arch/mips/au1000/db1x00/board_setup.c
+++ b/arch/mips/au1000/db1x00/board_setup.c
@@ -27,20 +27,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/mc146818rtc.h>
-#include <linux/delay.h>
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
+
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-db1x00/db1x00.h>
 
diff --git a/arch/mips/au1000/db1x00/init.c b/arch/mips/au1000/db1x00/init.c
index e822c123eab8..d3b967caf70c 100644
--- a/arch/mips/au1000/db1x00/init.c
+++ b/arch/mips/au1000/db1x00/init.c
@@ -28,13 +28,8 @@
  */
 
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/db1x00/irqmap.c b/arch/mips/au1000/db1x00/irqmap.c
index 09cea03411b0..eaa50c7b6341 100644
--- a/arch/mips/au1000/db1x00/irqmap.c
+++ b/arch/mips/au1000/db1x00/irqmap.c
@@ -25,26 +25,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 #ifdef CONFIG_MIPS_DB1500
diff --git a/arch/mips/au1000/mtx-1/board_setup.c b/arch/mips/au1000/mtx-1/board_setup.c
index 310d5dff89fc..5736354829c6 100644
--- a/arch/mips/au1000/mtx-1/board_setup.c
+++ b/arch/mips/au1000/mtx-1/board_setup.c
@@ -28,19 +28,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/delay.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/mach-au1x00/au1000.h>
 
 extern int (*board_pci_idsel)(unsigned int devsel, int assert);
diff --git a/arch/mips/au1000/mtx-1/init.c b/arch/mips/au1000/mtx-1/init.c
index e700fd312a24..c015cbce1cca 100644
--- a/arch/mips/au1000/mtx-1/init.c
+++ b/arch/mips/au1000/mtx-1/init.c
@@ -28,14 +28,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/string.h>
+
 #include <linux/kernel.h>
-#include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/bootmem.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/mtx-1/irqmap.c b/arch/mips/au1000/mtx-1/irqmap.c
index 49c612aeddcf..78d70c42c9db 100644
--- a/arch/mips/au1000/mtx-1/irqmap.c
+++ b/arch/mips/au1000/mtx-1/irqmap.c
@@ -25,26 +25,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 char irq_tab_alchemy[][5] __initdata = {
diff --git a/arch/mips/au1000/mtx-1/platform.c b/arch/mips/au1000/mtx-1/platform.c
index ce8637b3afa9..a7edbf0829ac 100644
--- a/arch/mips/au1000/mtx-1/platform.c
+++ b/arch/mips/au1000/mtx-1/platform.c
@@ -19,7 +19,6 @@
  */
 
 #include <linux/init.h>
-#include <linux/types.h>
 #include <linux/platform_device.h>
 #include <linux/leds.h>
 #include <linux/gpio_keys.h>
diff --git a/arch/mips/au1000/pb1000/board_setup.c b/arch/mips/au1000/pb1000/board_setup.c
index 5198c4f98b43..33f15acc1b17 100644
--- a/arch/mips/au1000/pb1000/board_setup.c
+++ b/arch/mips/au1000/pb1000/board_setup.c
@@ -23,19 +23,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
 #include <linux/delay.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-pb1x00/pb1000.h>
 
diff --git a/arch/mips/au1000/pb1000/init.c b/arch/mips/au1000/pb1000/init.c
index 2515b9fb24af..549447df71d6 100644
--- a/arch/mips/au1000/pb1000/init.c
+++ b/arch/mips/au1000/pb1000/init.c
@@ -26,14 +26,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/pb1000/irqmap.c b/arch/mips/au1000/pb1000/irqmap.c
index 88e354508204..b3d56b0af321 100644
--- a/arch/mips/au1000/pb1000/irqmap.c
+++ b/arch/mips/au1000/pb1000/irqmap.c
@@ -25,26 +25,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
 #include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 struct au1xxx_irqmap __initdata au1xxx_irq_map[] = {
diff --git a/arch/mips/au1000/pb1100/board_setup.c b/arch/mips/au1000/pb1100/board_setup.c
index 42874a6b31d1..656164c8e9ca 100644
--- a/arch/mips/au1000/pb1100/board_setup.c
+++ b/arch/mips/au1000/pb1100/board_setup.c
@@ -23,19 +23,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
 #include <linux/delay.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-pb1x00/pb1100.h>
 
diff --git a/arch/mips/au1000/pb1100/init.c b/arch/mips/au1000/pb1100/init.c
index 490c3801c275..c91344648ed3 100644
--- a/arch/mips/au1000/pb1100/init.c
+++ b/arch/mips/au1000/pb1100/init.c
@@ -27,14 +27,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/pb1100/irqmap.c b/arch/mips/au1000/pb1100/irqmap.c
index 880456bf8c11..b5021e3d477f 100644
--- a/arch/mips/au1000/pb1100/irqmap.c
+++ b/arch/mips/au1000/pb1100/irqmap.c
@@ -25,26 +25,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 struct au1xxx_irqmap __initdata au1xxx_irq_map[] = {
diff --git a/arch/mips/au1000/pb1200/Makefile b/arch/mips/au1000/pb1200/Makefile
index 970b1b1d5cda..4fe02ea65a60 100644
--- a/arch/mips/au1000/pb1200/Makefile
+++ b/arch/mips/au1000/pb1200/Makefile
@@ -3,5 +3,6 @@
 #
 
 lib-y := init.o board_setup.o irqmap.o
+obj-y += platform.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/mips/au1000/pb1200/board_setup.c b/arch/mips/au1000/pb1200/board_setup.c
index b98bebfa87c6..4493a792cc4c 100644
--- a/arch/mips/au1000/pb1200/board_setup.c
+++ b/arch/mips/au1000/pb1200/board_setup.c
@@ -23,27 +23,11 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
 #include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/mc146818rtc.h>
-#include <linux/delay.h>
-
-#if defined(CONFIG_BLK_DEV_IDE_AU1XXX)
-#include <linux/ide.h>
-#endif
-
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 
 #include <au1000.h>
-#include <au1xxx_dbdma.h>
 #include <prom.h>
 
 #ifdef CONFIG_MIPS_PB1200
@@ -52,8 +36,6 @@
 
 #ifdef CONFIG_MIPS_DB1200
 #include <asm/mach-db1x00/db1200.h>
-#define PB1200_ETH_INT DB1200_ETH_INT
-#define PB1200_IDE_INT DB1200_IDE_INT
 #endif
 
 extern void _board_init_irq(void);
diff --git a/arch/mips/au1000/pb1200/init.c b/arch/mips/au1000/pb1200/init.c
index 069ed45f04f2..72af5500660b 100644
--- a/arch/mips/au1000/pb1200/init.c
+++ b/arch/mips/au1000/pb1200/init.c
@@ -27,14 +27,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/pb1200/irqmap.c b/arch/mips/au1000/pb1200/irqmap.c
index 8fcd0df86f93..e61eb8e0b76b 100644
--- a/arch/mips/au1000/pb1200/irqmap.c
+++ b/arch/mips/au1000/pb1200/irqmap.c
@@ -22,26 +22,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
 #include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
-
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
+
 #include <asm/mach-au1x00/au1000.h>
 
 #ifdef CONFIG_MIPS_PB1200
diff --git a/arch/mips/au1000/pb1200/platform.c b/arch/mips/au1000/pb1200/platform.c
new file mode 100644
index 000000000000..5930110b9b6d
--- /dev/null
+++ b/arch/mips/au1000/pb1200/platform.c
@@ -0,0 +1,84 @@
+/*
+ * Pb1200/DBAu1200 board platform device registration
+ *
+ * Copyright (C) 2008 MontaVista Software Inc. <source@mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-au1x00/au1xxx.h>
+
+static struct resource ide_resources[] = {
+	[0] = {
+		.start	= IDE_PHYS_ADDR,
+		.end 	= IDE_PHYS_ADDR + IDE_PHYS_LEN - 1,
+		.flags	= IORESOURCE_MEM
+	},
+	[1] = {
+		.start	= IDE_INT,
+		.end	= IDE_INT,
+		.flags	= IORESOURCE_IRQ
+	}
+};
+
+static u64 ide_dmamask = ~(u32)0;
+
+static struct platform_device ide_device = {
+	.name		= "au1200-ide",
+	.id		= 0,
+	.dev = {
+		.dma_mask 		= &ide_dmamask,
+		.coherent_dma_mask	= 0xffffffff,
+	},
+	.num_resources	= ARRAY_SIZE(ide_resources),
+	.resource	= ide_resources
+};
+
+static struct resource smc91c111_resources[] = {
+	[0] = {
+		.name	= "smc91x-regs",
+		.start	= SMC91C111_PHYS_ADDR,
+		.end	= SMC91C111_PHYS_ADDR + 0xf,
+		.flags	= IORESOURCE_MEM
+	},
+	[1] = {
+		.start	= SMC91C111_INT,
+		.end	= SMC91C111_INT,
+		.flags	= IORESOURCE_IRQ
+	},
+};
+
+static struct platform_device smc91c111_device = {
+	.name		= "smc91x",
+	.id		= -1,
+	.num_resources	= ARRAY_SIZE(smc91c111_resources),
+	.resource	= smc91c111_resources
+};
+
+static struct platform_device *board_platform_devices[] __initdata = {
+	&ide_device,
+	&smc91c111_device
+};
+
+static int __init board_register_devices(void)
+{
+	return platform_add_devices(board_platform_devices,
+				    ARRAY_SIZE(board_platform_devices));
+}
+
+arch_initcall(board_register_devices);
diff --git a/arch/mips/au1000/pb1500/board_setup.c b/arch/mips/au1000/pb1500/board_setup.c
index 5446836869d6..24c652e8ec4b 100644
--- a/arch/mips/au1000/pb1500/board_setup.c
+++ b/arch/mips/au1000/pb1500/board_setup.c
@@ -23,19 +23,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
 #include <linux/delay.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-pb1x00/pb1500.h>
 
diff --git a/arch/mips/au1000/pb1500/init.c b/arch/mips/au1000/pb1500/init.c
index db558c967048..488507c07db9 100644
--- a/arch/mips/au1000/pb1500/init.c
+++ b/arch/mips/au1000/pb1500/init.c
@@ -27,14 +27,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/pb1500/irqmap.c b/arch/mips/au1000/pb1500/irqmap.c
index 810f695e24bb..4817ab44d07f 100644
--- a/arch/mips/au1000/pb1500/irqmap.c
+++ b/arch/mips/au1000/pb1500/irqmap.c
@@ -25,26 +25,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 char irq_tab_alchemy[][5] __initdata = {
diff --git a/arch/mips/au1000/pb1550/board_setup.c b/arch/mips/au1000/pb1550/board_setup.c
index e3cfb0d73180..45d60872b565 100644
--- a/arch/mips/au1000/pb1550/board_setup.c
+++ b/arch/mips/au1000/pb1550/board_setup.c
@@ -27,20 +27,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
-#include <linux/mc146818rtc.h>
-#include <linux/delay.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/mach-au1x00/au1000.h>
 #include <asm/mach-pb1x00/pb1550.h>
 
diff --git a/arch/mips/au1000/pb1550/init.c b/arch/mips/au1000/pb1550/init.c
index b716363ea564..f6b2fc587980 100644
--- a/arch/mips/au1000/pb1550/init.c
+++ b/arch/mips/au1000/pb1550/init.c
@@ -27,14 +27,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/pb1550/irqmap.c b/arch/mips/au1000/pb1550/irqmap.c
index 56becab28e5d..e1dac37af08a 100644
--- a/arch/mips/au1000/pb1550/irqmap.c
+++ b/arch/mips/au1000/pb1550/irqmap.c
@@ -25,26 +25,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 char irq_tab_alchemy[][5] __initdata = {
diff --git a/arch/mips/au1000/xxs1500/board_setup.c b/arch/mips/au1000/xxs1500/board_setup.c
index b2e413e597a8..79d1798621bf 100644
--- a/arch/mips/au1000/xxs1500/board_setup.c
+++ b/arch/mips/au1000/xxs1500/board_setup.c
@@ -23,19 +23,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/mm.h>
-#include <linux/console.h>
 #include <linux/delay.h>
 
-#include <asm/cpu.h>
-#include <asm/bootinfo.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/reboot.h>
-#include <asm/pgtable.h>
 #include <asm/mach-au1x00/au1000.h>
 
 void board_reset(void)
diff --git a/arch/mips/au1000/xxs1500/init.c b/arch/mips/au1000/xxs1500/init.c
index 7e6878c1b0a5..24fc6e132dc0 100644
--- a/arch/mips/au1000/xxs1500/init.c
+++ b/arch/mips/au1000/xxs1500/init.c
@@ -26,14 +26,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
+
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-#include <linux/string.h>
 #include <linux/kernel.h>
 
-#include <asm/addrspace.h>
 #include <asm/bootinfo.h>
 
 #include <prom.h>
diff --git a/arch/mips/au1000/xxs1500/irqmap.c b/arch/mips/au1000/xxs1500/irqmap.c
index a343da134334..dd6e3d1eb4d4 100644
--- a/arch/mips/au1000/xxs1500/irqmap.c
+++ b/arch/mips/au1000/xxs1500/irqmap.c
@@ -25,26 +25,9 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/errno.h>
+
 #include <linux/init.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
-#include <linux/module.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/interrupt.h>
-#include <linux/ioport.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/delay.h>
-#include <linux/bitops.h>
 
-#include <asm/bootinfo.h>
-#include <asm/io.h>
-#include <asm/mipsregs.h>
-#include <asm/system.h>
 #include <asm/mach-au1x00/au1000.h>
 
 struct au1xxx_irqmap __initdata au1xxx_irq_map[] = {
diff --git a/arch/mips/configs/mipssim_defconfig b/arch/mips/configs/mipssim_defconfig
index 6db0bdaefb27..4f6bce99d5cf 100644
--- a/arch/mips/configs/mipssim_defconfig
+++ b/arch/mips/configs/mipssim_defconfig
@@ -641,7 +641,6 @@ CONFIG_CROSSCOMPILE=y
 CONFIG_CMDLINE="nfsroot=192.168.192.169:/u1/mipsel,timeo=20 ip=dhcp"
 # CONFIG_DEBUG_STACK_USAGE is not set
 # CONFIG_RUNTIME_DEBUG is not set
-# CONFIG_MIPS_UNCACHED is not set
 
 #
 # Security options
diff --git a/arch/mips/configs/pnx8550-jbs_defconfig b/arch/mips/configs/pnx8550-jbs_defconfig
index 518a60892b78..780c7fc24b82 100644
--- a/arch/mips/configs/pnx8550-jbs_defconfig
+++ b/arch/mips/configs/pnx8550-jbs_defconfig
@@ -1223,7 +1223,6 @@ CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp"
 # CONFIG_KGDB is not set
 CONFIG_SYS_SUPPORTS_KGDB=y
 # CONFIG_RUNTIME_DEBUG is not set
-# CONFIG_MIPS_UNCACHED is not set
 
 #
 # Security options
diff --git a/arch/mips/configs/pnx8550-stb810_defconfig b/arch/mips/configs/pnx8550-stb810_defconfig
index 68351eb81bc8..267f21ed1d0f 100644
--- a/arch/mips/configs/pnx8550-stb810_defconfig
+++ b/arch/mips/configs/pnx8550-stb810_defconfig
@@ -1213,7 +1213,6 @@ CONFIG_CMDLINE="console=ttyS1,38400n8 kgdb=ttyS0 root=/dev/nfs ip=bootp"
 # CONFIG_KGDB is not set
 CONFIG_SYS_SUPPORTS_KGDB=y
 # CONFIG_RUNTIME_DEBUG is not set
-# CONFIG_MIPS_UNCACHED is not set
 
 #
 # Security options
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 60349062595a..3965fda94a89 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -9,30 +9,15 @@
  *
  */
 #include <linux/bcd.h>
-#include <linux/errno.h>
 #include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
 #include <linux/mc146818rtc.h>
-#include <linux/mm.h>
-#include <linux/module.h>
 #include <linux/param.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/time.h>
-#include <linux/types.h>
-
-#include <asm/bootinfo.h>
-#include <asm/cpu.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/mipsregs.h>
-#include <asm/sections.h>
-#include <asm/time.h>
 
+#include <asm/cpu-features.h>
+#include <asm/ds1287.h>
+#include <asm/time.h>
 #include <asm/dec/interrupts.h>
 #include <asm/dec/ioasic.h>
-#include <asm/dec/ioasic_addrs.h>
 #include <asm/dec/machtype.h>
 
 unsigned long read_persistent_clock(void)
@@ -139,42 +124,32 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 	return retval;
 }
 
-static int dec_timer_state(void)
+void __init plat_time_init(void)
 {
-	return (CMOS_READ(RTC_REG_C) & RTC_PF) != 0;
-}
+	u32 start, end;
+	int i = HZ / 10;
 
-static void dec_timer_ack(void)
-{
-	CMOS_READ(RTC_REG_C);			/* Ack the RTC interrupt.  */
-}
-
-static cycle_t dec_ioasic_hpt_read(void)
-{
-	/*
-	 * The free-running counter is 32-bit which is good for about
-	 * 2 minutes, 50 seconds at possible count rates of up to 25MHz.
-	 */
-	return ioasic_read(IO_REG_FCTR);
-}
+	/* Set up the rate of periodic DS1287 interrupts. */
+	ds1287_set_base_clock(HZ);
 
+	if (cpu_has_counter) {
+		while (!ds1287_timer_state())
+			;
 
-void __init plat_time_init(void)
-{
-	mips_timer_ack = dec_timer_ack;
+		start = read_c0_count();
 
-	if (!cpu_has_counter && IOASIC)
-		/* For pre-R4k systems we use the I/O ASIC's counter.  */
-		clocksource_mips.read = dec_ioasic_hpt_read;
+		while (i--)
+			while (!ds1287_timer_state())
+				;
 
-	/* Set up the rate of periodic DS1287 interrupts.  */
-	CMOS_WRITE(RTC_REF_CLCK_32KHZ | (16 - __ffs(HZ)), RTC_REG_A);
-}
+		end = read_c0_count();
 
-void __init plat_timer_setup(struct irqaction *irq)
-{
-	setup_irq(dec_interrupt[DEC_IRQ_RTC], irq);
+		mips_hpt_frequency = (end - start) * 10;
+		printk(KERN_INFO "MIPS counter frequency %dHz\n",
+			mips_hpt_frequency);
+	} else if (IOASIC)
+		/* For pre-R4k systems we use the I/O ASIC's counter.  */
+		dec_ioasic_clocksource_init();
 
-	/* Enable periodic DS1287 interrupts.  */
-	CMOS_WRITE(CMOS_READ(RTC_REG_B) | RTC_PIE, RTC_REG_B);
+	ds1287_clockevent_init(dec_interrupt[DEC_IRQ_RTC]);
 }
diff --git a/arch/mips/jmr3927/rbhma3100/setup.c b/arch/mips/jmr3927/rbhma3100/setup.c
index c886d804d303..f39c444e42d4 100644
--- a/arch/mips/jmr3927/rbhma3100/setup.c
+++ b/arch/mips/jmr3927/rbhma3100/setup.c
@@ -36,11 +36,13 @@
 #include <linux/pm.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/gpio.h>
 #ifdef CONFIG_SERIAL_TXX9
 #include <linux/serial_core.h>
 #endif
 
 #include <asm/txx9tmr.h>
+#include <asm/txx9pio.h>
 #include <asm/reboot.h>
 #include <asm/jmr3927/jmr3927.h>
 #include <asm/mipsregs.h>
@@ -340,9 +342,12 @@ static void __init tx3927_setup(void)
 
 	/* PIO */
 	/* PIO[15:12] connected to LEDs */
-	tx3927_pioptr->dir = 0x0000f000;
-	tx3927_pioptr->maskcpu = 0;
-	tx3927_pioptr->maskext = 0;
+	__raw_writel(0x0000f000, &tx3927_pioptr->dir);
+	__raw_writel(0, &tx3927_pioptr->maskcpu);
+	__raw_writel(0, &tx3927_pioptr->maskext);
+	txx9_gpio_init(TX3927_PIO_REG, 0, 16);
+	gpio_request(11, "dipsw1");
+	gpio_request(10, "dipsw2");
 	{
 		unsigned int conf;
 
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index 6fcdb6fda2e2..45545be3eb86 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -10,12 +10,15 @@ obj-y		+= cpu-probe.o branch.o entry.o genex.o irq.o process.o \
 
 obj-$(CONFIG_CEVT_BCM1480)	+= cevt-bcm1480.o
 obj-$(CONFIG_CEVT_R4K)		+= cevt-r4k.o
+obj-$(CONFIG_CEVT_DS1287)	+= cevt-ds1287.o
 obj-$(CONFIG_CEVT_GT641XX)	+= cevt-gt641xx.o
 obj-$(CONFIG_CEVT_SB1250)	+= cevt-sb1250.o
 obj-$(CONFIG_CEVT_TXX9)		+= cevt-txx9.o
 obj-$(CONFIG_CSRC_BCM1480)	+= csrc-bcm1480.o
+obj-$(CONFIG_CSRC_IOASIC)	+= csrc-ioasic.o
 obj-$(CONFIG_CSRC_R4K)		+= csrc-r4k.o
 obj-$(CONFIG_CSRC_SB1250)	+= csrc-sb1250.o
+obj-$(CONFIG_SYNC_R4K)		+= sync-r4k.o
 
 binfmt_irix-objs	:= irixelf.o irixinv.o irixioctl.o irixsig.o	\
 			   irix5sys.o sysirix.o
@@ -50,6 +53,8 @@ obj-$(CONFIG_MIPS_MT)		+= mips-mt.o
 obj-$(CONFIG_MIPS_MT_FPAFF)	+= mips-mt-fpaff.o
 obj-$(CONFIG_MIPS_MT_SMTC)	+= smtc.o smtc-asm.o smtc-proc.o
 obj-$(CONFIG_MIPS_MT_SMP)	+= smp-mt.o
+obj-$(CONFIG_MIPS_CMP)		+= smp-cmp.o
+obj-$(CONFIG_CPU_MIPSR2)	+= spram.o
 
 obj-$(CONFIG_MIPS_APSP_KSPD)	+= kspd.o
 obj-$(CONFIG_MIPS_VPE_LOADER)	+= vpe.o
@@ -62,6 +67,7 @@ obj-$(CONFIG_IRQ_CPU_RM9K)	+= irq-rm9000.o
 obj-$(CONFIG_MIPS_BOARDS_GEN)	+= irq-msc01.o
 obj-$(CONFIG_IRQ_TXX9)		+= irq_txx9.o
 obj-$(CONFIG_IRQ_GT641XX)	+= irq-gt641xx.o
+obj-$(CONFIG_IRQ_GIC)		+= irq-gic.o
 
 obj-$(CONFIG_32BIT)		+= scall32-o32.o
 obj-$(CONFIG_64BIT)		+= scall64-64.o
@@ -77,6 +83,8 @@ obj-$(CONFIG_64BIT)		+= cpu-bugs64.o
 
 obj-$(CONFIG_I8253)		+= i8253.o
 
+obj-$(CONFIG_GPIO_TXX9)		+= gpio_txx9.o
+
 obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index ca136298acdc..5bf03b3c4150 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -17,252 +17,252 @@
 #include <asm/ptrace.h>
 #include <asm/processor.h>
 
-#define text(t) __asm__("\n@@@" t)
+#define text(t) __asm__("\n->#" t)
 #define _offset(type, member) (&(((type *)NULL)->member))
 #define offset(string, ptr, member) \
-	__asm__("\n@@@" string "%0" : : "i" (_offset(ptr, member)))
+	__asm__("\n->" string " %0" : : "i" (_offset(ptr, member)))
 #define constant(string, member) \
-	__asm__("\n@@@" string "%X0" : : "ri" (member))
+	__asm__("\n->" string " %0" : : "ri" (member))
 #define size(string, size) \
-	__asm__("\n@@@" string "%0" : : "i" (sizeof(size)))
+	__asm__("\n->" string " %0" : : "i" (sizeof(size)))
 #define linefeed text("")
 
 void output_ptreg_defines(void)
 {
-	text("/* MIPS pt_regs offsets. */");
-	offset("#define PT_R0     ", struct pt_regs, regs[0]);
-	offset("#define PT_R1     ", struct pt_regs, regs[1]);
-	offset("#define PT_R2     ", struct pt_regs, regs[2]);
-	offset("#define PT_R3     ", struct pt_regs, regs[3]);
-	offset("#define PT_R4     ", struct pt_regs, regs[4]);
-	offset("#define PT_R5     ", struct pt_regs, regs[5]);
-	offset("#define PT_R6     ", struct pt_regs, regs[6]);
-	offset("#define PT_R7     ", struct pt_regs, regs[7]);
-	offset("#define PT_R8     ", struct pt_regs, regs[8]);
-	offset("#define PT_R9     ", struct pt_regs, regs[9]);
-	offset("#define PT_R10    ", struct pt_regs, regs[10]);
-	offset("#define PT_R11    ", struct pt_regs, regs[11]);
-	offset("#define PT_R12    ", struct pt_regs, regs[12]);
-	offset("#define PT_R13    ", struct pt_regs, regs[13]);
-	offset("#define PT_R14    ", struct pt_regs, regs[14]);
-	offset("#define PT_R15    ", struct pt_regs, regs[15]);
-	offset("#define PT_R16    ", struct pt_regs, regs[16]);
-	offset("#define PT_R17    ", struct pt_regs, regs[17]);
-	offset("#define PT_R18    ", struct pt_regs, regs[18]);
-	offset("#define PT_R19    ", struct pt_regs, regs[19]);
-	offset("#define PT_R20    ", struct pt_regs, regs[20]);
-	offset("#define PT_R21    ", struct pt_regs, regs[21]);
-	offset("#define PT_R22    ", struct pt_regs, regs[22]);
-	offset("#define PT_R23    ", struct pt_regs, regs[23]);
-	offset("#define PT_R24    ", struct pt_regs, regs[24]);
-	offset("#define PT_R25    ", struct pt_regs, regs[25]);
-	offset("#define PT_R26    ", struct pt_regs, regs[26]);
-	offset("#define PT_R27    ", struct pt_regs, regs[27]);
-	offset("#define PT_R28    ", struct pt_regs, regs[28]);
-	offset("#define PT_R29    ", struct pt_regs, regs[29]);
-	offset("#define PT_R30    ", struct pt_regs, regs[30]);
-	offset("#define PT_R31    ", struct pt_regs, regs[31]);
-	offset("#define PT_LO     ", struct pt_regs, lo);
-	offset("#define PT_HI     ", struct pt_regs, hi);
+	text("MIPS pt_regs offsets.");
+	offset("PT_R0", struct pt_regs, regs[0]);
+	offset("PT_R1", struct pt_regs, regs[1]);
+	offset("PT_R2", struct pt_regs, regs[2]);
+	offset("PT_R3", struct pt_regs, regs[3]);
+	offset("PT_R4", struct pt_regs, regs[4]);
+	offset("PT_R5", struct pt_regs, regs[5]);
+	offset("PT_R6", struct pt_regs, regs[6]);
+	offset("PT_R7", struct pt_regs, regs[7]);
+	offset("PT_R8", struct pt_regs, regs[8]);
+	offset("PT_R9", struct pt_regs, regs[9]);
+	offset("PT_R10", struct pt_regs, regs[10]);
+	offset("PT_R11", struct pt_regs, regs[11]);
+	offset("PT_R12", struct pt_regs, regs[12]);
+	offset("PT_R13", struct pt_regs, regs[13]);
+	offset("PT_R14", struct pt_regs, regs[14]);
+	offset("PT_R15", struct pt_regs, regs[15]);
+	offset("PT_R16", struct pt_regs, regs[16]);
+	offset("PT_R17", struct pt_regs, regs[17]);
+	offset("PT_R18", struct pt_regs, regs[18]);
+	offset("PT_R19", struct pt_regs, regs[19]);
+	offset("PT_R20", struct pt_regs, regs[20]);
+	offset("PT_R21", struct pt_regs, regs[21]);
+	offset("PT_R22", struct pt_regs, regs[22]);
+	offset("PT_R23", struct pt_regs, regs[23]);
+	offset("PT_R24", struct pt_regs, regs[24]);
+	offset("PT_R25", struct pt_regs, regs[25]);
+	offset("PT_R26", struct pt_regs, regs[26]);
+	offset("PT_R27", struct pt_regs, regs[27]);
+	offset("PT_R28", struct pt_regs, regs[28]);
+	offset("PT_R29", struct pt_regs, regs[29]);
+	offset("PT_R30", struct pt_regs, regs[30]);
+	offset("PT_R31", struct pt_regs, regs[31]);
+	offset("PT_LO", struct pt_regs, lo);
+	offset("PT_HI", struct pt_regs, hi);
 #ifdef CONFIG_CPU_HAS_SMARTMIPS
-	offset("#define PT_ACX    ", struct pt_regs, acx);
+	offset("PT_ACX", struct pt_regs, acx);
 #endif
-	offset("#define PT_EPC    ", struct pt_regs, cp0_epc);
-	offset("#define PT_BVADDR ", struct pt_regs, cp0_badvaddr);
-	offset("#define PT_STATUS ", struct pt_regs, cp0_status);
-	offset("#define PT_CAUSE  ", struct pt_regs, cp0_cause);
+	offset("PT_EPC", struct pt_regs, cp0_epc);
+	offset("PT_BVADDR", struct pt_regs, cp0_badvaddr);
+	offset("PT_STATUS", struct pt_regs, cp0_status);
+	offset("PT_CAUSE", struct pt_regs, cp0_cause);
 #ifdef CONFIG_MIPS_MT_SMTC
-	offset("#define PT_TCSTATUS  ", struct pt_regs, cp0_tcstatus);
+	offset("PT_TCSTATUS", struct pt_regs, cp0_tcstatus);
 #endif /* CONFIG_MIPS_MT_SMTC */
-	size("#define PT_SIZE   ", struct pt_regs);
+	size("PT_SIZE", struct pt_regs);
 	linefeed;
 }
 
 void output_task_defines(void)
 {
-	text("/* MIPS task_struct offsets. */");
-	offset("#define TASK_STATE         ", struct task_struct, state);
-	offset("#define TASK_THREAD_INFO   ", struct task_struct, stack);
-	offset("#define TASK_FLAGS         ", struct task_struct, flags);
-	offset("#define TASK_MM            ", struct task_struct, mm);
-	offset("#define TASK_PID           ", struct task_struct, pid);
-	size(  "#define TASK_STRUCT_SIZE   ", struct task_struct);
+	text("MIPS task_struct offsets.");
+	offset("TASK_STATE", struct task_struct, state);
+	offset("TASK_THREAD_INFO", struct task_struct, stack);
+	offset("TASK_FLAGS", struct task_struct, flags);
+	offset("TASK_MM", struct task_struct, mm);
+	offset("TASK_PID", struct task_struct, pid);
+	size(  "TASK_STRUCT_SIZE", struct task_struct);
 	linefeed;
 }
 
 void output_thread_info_defines(void)
 {
-	text("/* MIPS thread_info offsets. */");
-	offset("#define TI_TASK            ", struct thread_info, task);
-	offset("#define TI_EXEC_DOMAIN     ", struct thread_info, exec_domain);
-	offset("#define TI_FLAGS           ", struct thread_info, flags);
-	offset("#define TI_TP_VALUE	   ", struct thread_info, tp_value);
-	offset("#define TI_CPU             ", struct thread_info, cpu);
-	offset("#define TI_PRE_COUNT       ", struct thread_info, preempt_count);
-	offset("#define TI_ADDR_LIMIT      ", struct thread_info, addr_limit);
-	offset("#define TI_RESTART_BLOCK   ", struct thread_info, restart_block);
-	offset("#define TI_REGS            ", struct thread_info, regs);
-	constant("#define _THREAD_SIZE       ", THREAD_SIZE);
-	constant("#define _THREAD_MASK       ", THREAD_MASK);
+	text("MIPS thread_info offsets.");
+	offset("TI_TASK", struct thread_info, task);
+	offset("TI_EXEC_DOMAIN", struct thread_info, exec_domain);
+	offset("TI_FLAGS", struct thread_info, flags);
+	offset("TI_TP_VALUE", struct thread_info, tp_value);
+	offset("TI_CPU", struct thread_info, cpu);
+	offset("TI_PRE_COUNT", struct thread_info, preempt_count);
+	offset("TI_ADDR_LIMIT", struct thread_info, addr_limit);
+	offset("TI_RESTART_BLOCK", struct thread_info, restart_block);
+	offset("TI_REGS", struct thread_info, regs);
+	constant("_THREAD_SIZE", THREAD_SIZE);
+	constant("_THREAD_MASK", THREAD_MASK);
 	linefeed;
 }
 
 void output_thread_defines(void)
 {
-	text("/* MIPS specific thread_struct offsets. */");
-	offset("#define THREAD_REG16   ", struct task_struct, thread.reg16);
-	offset("#define THREAD_REG17   ", struct task_struct, thread.reg17);
-	offset("#define THREAD_REG18   ", struct task_struct, thread.reg18);
-	offset("#define THREAD_REG19   ", struct task_struct, thread.reg19);
-	offset("#define THREAD_REG20   ", struct task_struct, thread.reg20);
-	offset("#define THREAD_REG21   ", struct task_struct, thread.reg21);
-	offset("#define THREAD_REG22   ", struct task_struct, thread.reg22);
-	offset("#define THREAD_REG23   ", struct task_struct, thread.reg23);
-	offset("#define THREAD_REG29   ", struct task_struct, thread.reg29);
-	offset("#define THREAD_REG30   ", struct task_struct, thread.reg30);
-	offset("#define THREAD_REG31   ", struct task_struct, thread.reg31);
-	offset("#define THREAD_STATUS  ", struct task_struct,
+	text("MIPS specific thread_struct offsets.");
+	offset("THREAD_REG16", struct task_struct, thread.reg16);
+	offset("THREAD_REG17", struct task_struct, thread.reg17);
+	offset("THREAD_REG18", struct task_struct, thread.reg18);
+	offset("THREAD_REG19", struct task_struct, thread.reg19);
+	offset("THREAD_REG20", struct task_struct, thread.reg20);
+	offset("THREAD_REG21", struct task_struct, thread.reg21);
+	offset("THREAD_REG22", struct task_struct, thread.reg22);
+	offset("THREAD_REG23", struct task_struct, thread.reg23);
+	offset("THREAD_REG29", struct task_struct, thread.reg29);
+	offset("THREAD_REG30", struct task_struct, thread.reg30);
+	offset("THREAD_REG31", struct task_struct, thread.reg31);
+	offset("THREAD_STATUS", struct task_struct,
 	       thread.cp0_status);
-	offset("#define THREAD_FPU     ", struct task_struct, thread.fpu);
+	offset("THREAD_FPU", struct task_struct, thread.fpu);
 
-	offset("#define THREAD_BVADDR  ", struct task_struct, \
+	offset("THREAD_BVADDR", struct task_struct, \
 	       thread.cp0_badvaddr);
-	offset("#define THREAD_BUADDR  ", struct task_struct, \
+	offset("THREAD_BUADDR", struct task_struct, \
 	       thread.cp0_baduaddr);
-	offset("#define THREAD_ECODE   ", struct task_struct, \
+	offset("THREAD_ECODE", struct task_struct, \
 	       thread.error_code);
-	offset("#define THREAD_TRAPNO  ", struct task_struct, thread.trap_no);
-	offset("#define THREAD_TRAMP   ", struct task_struct, \
+	offset("THREAD_TRAPNO", struct task_struct, thread.trap_no);
+	offset("THREAD_TRAMP", struct task_struct, \
 	       thread.irix_trampoline);
-	offset("#define THREAD_OLDCTX  ", struct task_struct, \
+	offset("THREAD_OLDCTX", struct task_struct, \
 	       thread.irix_oldctx);
 	linefeed;
 }
 
 void output_thread_fpu_defines(void)
 {
-	offset("#define THREAD_FPR0    ",
+	offset("THREAD_FPR0",
 	       struct task_struct, thread.fpu.fpr[0]);
-	offset("#define THREAD_FPR1    ",
+	offset("THREAD_FPR1",
 	       struct task_struct, thread.fpu.fpr[1]);
-	offset("#define THREAD_FPR2    ",
+	offset("THREAD_FPR2",
 	       struct task_struct, thread.fpu.fpr[2]);
-	offset("#define THREAD_FPR3    ",
+	offset("THREAD_FPR3",
 	       struct task_struct, thread.fpu.fpr[3]);
-	offset("#define THREAD_FPR4    ",
+	offset("THREAD_FPR4",
 	       struct task_struct, thread.fpu.fpr[4]);
-	offset("#define THREAD_FPR5    ",
+	offset("THREAD_FPR5",
 	       struct task_struct, thread.fpu.fpr[5]);
-	offset("#define THREAD_FPR6    ",
+	offset("THREAD_FPR6",
 	       struct task_struct, thread.fpu.fpr[6]);
-	offset("#define THREAD_FPR7    ",
+	offset("THREAD_FPR7",
 	       struct task_struct, thread.fpu.fpr[7]);
-	offset("#define THREAD_FPR8    ",
+	offset("THREAD_FPR8",
 	       struct task_struct, thread.fpu.fpr[8]);
-	offset("#define THREAD_FPR9    ",
+	offset("THREAD_FPR9",
 	       struct task_struct, thread.fpu.fpr[9]);
-	offset("#define THREAD_FPR10   ",
+	offset("THREAD_FPR10",
 	       struct task_struct, thread.fpu.fpr[10]);
-	offset("#define THREAD_FPR11   ",
+	offset("THREAD_FPR11",
 	       struct task_struct, thread.fpu.fpr[11]);
-	offset("#define THREAD_FPR12   ",
+	offset("THREAD_FPR12",
 	       struct task_struct, thread.fpu.fpr[12]);
-	offset("#define THREAD_FPR13   ",
+	offset("THREAD_FPR13",
 	       struct task_struct, thread.fpu.fpr[13]);
-	offset("#define THREAD_FPR14   ",
+	offset("THREAD_FPR14",
 	       struct task_struct, thread.fpu.fpr[14]);
-	offset("#define THREAD_FPR15   ",
+	offset("THREAD_FPR15",
 	       struct task_struct, thread.fpu.fpr[15]);
-	offset("#define THREAD_FPR16   ",
+	offset("THREAD_FPR16",
 	       struct task_struct, thread.fpu.fpr[16]);
-	offset("#define THREAD_FPR17   ",
+	offset("THREAD_FPR17",
 	       struct task_struct, thread.fpu.fpr[17]);
-	offset("#define THREAD_FPR18   ",
+	offset("THREAD_FPR18",
 	       struct task_struct, thread.fpu.fpr[18]);
-	offset("#define THREAD_FPR19   ",
+	offset("THREAD_FPR19",
 	       struct task_struct, thread.fpu.fpr[19]);
-	offset("#define THREAD_FPR20   ",
+	offset("THREAD_FPR20",
 	       struct task_struct, thread.fpu.fpr[20]);
-	offset("#define THREAD_FPR21   ",
+	offset("THREAD_FPR21",
 	       struct task_struct, thread.fpu.fpr[21]);
-	offset("#define THREAD_FPR22   ",
+	offset("THREAD_FPR22",
 	       struct task_struct, thread.fpu.fpr[22]);
-	offset("#define THREAD_FPR23   ",
+	offset("THREAD_FPR23",
 	       struct task_struct, thread.fpu.fpr[23]);
-	offset("#define THREAD_FPR24   ",
+	offset("THREAD_FPR24",
 	       struct task_struct, thread.fpu.fpr[24]);
-	offset("#define THREAD_FPR25   ",
+	offset("THREAD_FPR25",
 	       struct task_struct, thread.fpu.fpr[25]);
-	offset("#define THREAD_FPR26   ",
+	offset("THREAD_FPR26",
 	       struct task_struct, thread.fpu.fpr[26]);
-	offset("#define THREAD_FPR27   ",
+	offset("THREAD_FPR27",
 	       struct task_struct, thread.fpu.fpr[27]);
-	offset("#define THREAD_FPR28   ",
+	offset("THREAD_FPR28",
 	       struct task_struct, thread.fpu.fpr[28]);
-	offset("#define THREAD_FPR29   ",
+	offset("THREAD_FPR29",
 	       struct task_struct, thread.fpu.fpr[29]);
-	offset("#define THREAD_FPR30   ",
+	offset("THREAD_FPR30",
 	       struct task_struct, thread.fpu.fpr[30]);
-	offset("#define THREAD_FPR31   ",
+	offset("THREAD_FPR31",
 	       struct task_struct, thread.fpu.fpr[31]);
 
-	offset("#define THREAD_FCR31   ",
+	offset("THREAD_FCR31",
 	       struct task_struct, thread.fpu.fcr31);
 	linefeed;
 }
 
 void output_mm_defines(void)
 {
-	text("/* Size of struct page  */");
-	size("#define STRUCT_PAGE_SIZE   ", struct page);
+	text("Size of struct page");
+	size("STRUCT_PAGE_SIZE", struct page);
 	linefeed;
-	text("/* Linux mm_struct offsets. */");
-	offset("#define MM_USERS      ", struct mm_struct, mm_users);
-	offset("#define MM_PGD        ", struct mm_struct, pgd);
-	offset("#define MM_CONTEXT    ", struct mm_struct, context);
+	text("Linux mm_struct offsets.");
+	offset("MM_USERS", struct mm_struct, mm_users);
+	offset("MM_PGD", struct mm_struct, pgd);
+	offset("MM_CONTEXT", struct mm_struct, context);
 	linefeed;
-	constant("#define _PAGE_SIZE     ", PAGE_SIZE);
-	constant("#define _PAGE_SHIFT    ", PAGE_SHIFT);
+	constant("_PAGE_SIZE", PAGE_SIZE);
+	constant("_PAGE_SHIFT", PAGE_SHIFT);
 	linefeed;
-	constant("#define _PGD_T_SIZE    ", sizeof(pgd_t));
-	constant("#define _PMD_T_SIZE    ", sizeof(pmd_t));
-	constant("#define _PTE_T_SIZE    ", sizeof(pte_t));
+	constant("_PGD_T_SIZE", sizeof(pgd_t));
+	constant("_PMD_T_SIZE", sizeof(pmd_t));
+	constant("_PTE_T_SIZE", sizeof(pte_t));
 	linefeed;
-	constant("#define _PGD_T_LOG2    ", PGD_T_LOG2);
-	constant("#define _PMD_T_LOG2    ", PMD_T_LOG2);
-	constant("#define _PTE_T_LOG2    ", PTE_T_LOG2);
+	constant("_PGD_T_LOG2", PGD_T_LOG2);
+	constant("_PMD_T_LOG2", PMD_T_LOG2);
+	constant("_PTE_T_LOG2", PTE_T_LOG2);
 	linefeed;
-	constant("#define _PGD_ORDER     ", PGD_ORDER);
-	constant("#define _PMD_ORDER     ", PMD_ORDER);
-	constant("#define _PTE_ORDER     ", PTE_ORDER);
+	constant("_PGD_ORDER", PGD_ORDER);
+	constant("_PMD_ORDER", PMD_ORDER);
+	constant("_PTE_ORDER", PTE_ORDER);
 	linefeed;
-	constant("#define _PMD_SHIFT     ", PMD_SHIFT);
-	constant("#define _PGDIR_SHIFT   ", PGDIR_SHIFT);
+	constant("_PMD_SHIFT", PMD_SHIFT);
+	constant("_PGDIR_SHIFT", PGDIR_SHIFT);
 	linefeed;
-	constant("#define _PTRS_PER_PGD  ", PTRS_PER_PGD);
-	constant("#define _PTRS_PER_PMD  ", PTRS_PER_PMD);
-	constant("#define _PTRS_PER_PTE  ", PTRS_PER_PTE);
+	constant("_PTRS_PER_PGD", PTRS_PER_PGD);
+	constant("_PTRS_PER_PMD", PTRS_PER_PMD);
+	constant("_PTRS_PER_PTE", PTRS_PER_PTE);
 	linefeed;
 }
 
 #ifdef CONFIG_32BIT
 void output_sc_defines(void)
 {
-	text("/* Linux sigcontext offsets. */");
-	offset("#define SC_REGS       ", struct sigcontext, sc_regs);
-	offset("#define SC_FPREGS     ", struct sigcontext, sc_fpregs);
-	offset("#define SC_ACX        ", struct sigcontext, sc_acx);
-	offset("#define SC_MDHI       ", struct sigcontext, sc_mdhi);
-	offset("#define SC_MDLO       ", struct sigcontext, sc_mdlo);
-	offset("#define SC_PC         ", struct sigcontext, sc_pc);
-	offset("#define SC_FPC_CSR    ", struct sigcontext, sc_fpc_csr);
-	offset("#define SC_FPC_EIR    ", struct sigcontext, sc_fpc_eir);
-	offset("#define SC_HI1        ", struct sigcontext, sc_hi1);
-	offset("#define SC_LO1        ", struct sigcontext, sc_lo1);
-	offset("#define SC_HI2        ", struct sigcontext, sc_hi2);
-	offset("#define SC_LO2        ", struct sigcontext, sc_lo2);
-	offset("#define SC_HI3        ", struct sigcontext, sc_hi3);
-	offset("#define SC_LO3        ", struct sigcontext, sc_lo3);
+	text("Linux sigcontext offsets.");
+	offset("SC_REGS", struct sigcontext, sc_regs);
+	offset("SC_FPREGS", struct sigcontext, sc_fpregs);
+	offset("SC_ACX", struct sigcontext, sc_acx);
+	offset("SC_MDHI", struct sigcontext, sc_mdhi);
+	offset("SC_MDLO", struct sigcontext, sc_mdlo);
+	offset("SC_PC", struct sigcontext, sc_pc);
+	offset("SC_FPC_CSR", struct sigcontext, sc_fpc_csr);
+	offset("SC_FPC_EIR", struct sigcontext, sc_fpc_eir);
+	offset("SC_HI1", struct sigcontext, sc_hi1);
+	offset("SC_LO1", struct sigcontext, sc_lo1);
+	offset("SC_HI2", struct sigcontext, sc_hi2);
+	offset("SC_LO2", struct sigcontext, sc_lo2);
+	offset("SC_HI3", struct sigcontext, sc_hi3);
+	offset("SC_LO3", struct sigcontext, sc_lo3);
 	linefeed;
 }
 #endif
@@ -270,13 +270,13 @@ void output_sc_defines(void)
 #ifdef CONFIG_64BIT
 void output_sc_defines(void)
 {
-	text("/* Linux sigcontext offsets. */");
-	offset("#define SC_REGS       ", struct sigcontext, sc_regs);
-	offset("#define SC_FPREGS     ", struct sigcontext, sc_fpregs);
-	offset("#define SC_MDHI       ", struct sigcontext, sc_mdhi);
-	offset("#define SC_MDLO       ", struct sigcontext, sc_mdlo);
-	offset("#define SC_PC         ", struct sigcontext, sc_pc);
-	offset("#define SC_FPC_CSR    ", struct sigcontext, sc_fpc_csr);
+	text("Linux sigcontext offsets.");
+	offset("SC_REGS", struct sigcontext, sc_regs);
+	offset("SC_FPREGS", struct sigcontext, sc_fpregs);
+	offset("SC_MDHI", struct sigcontext, sc_mdhi);
+	offset("SC_MDLO", struct sigcontext, sc_mdlo);
+	offset("SC_PC", struct sigcontext, sc_pc);
+	offset("SC_FPC_CSR", struct sigcontext, sc_fpc_csr);
 	linefeed;
 }
 #endif
@@ -284,56 +284,56 @@ void output_sc_defines(void)
 #ifdef CONFIG_MIPS32_COMPAT
 void output_sc32_defines(void)
 {
-	text("/* Linux 32-bit sigcontext offsets. */");
-	offset("#define SC32_FPREGS     ", struct sigcontext32, sc_fpregs);
-	offset("#define SC32_FPC_CSR    ", struct sigcontext32, sc_fpc_csr);
-	offset("#define SC32_FPC_EIR    ", struct sigcontext32, sc_fpc_eir);
+	text("Linux 32-bit sigcontext offsets.");
+	offset("SC32_FPREGS", struct sigcontext32, sc_fpregs);
+	offset("SC32_FPC_CSR", struct sigcontext32, sc_fpc_csr);
+	offset("SC32_FPC_EIR", struct sigcontext32, sc_fpc_eir);
 	linefeed;
 }
 #endif
 
 void output_signal_defined(void)
 {
-	text("/* Linux signal numbers. */");
-	constant("#define _SIGHUP     ", SIGHUP);
-	constant("#define _SIGINT     ", SIGINT);
-	constant("#define _SIGQUIT    ", SIGQUIT);
-	constant("#define _SIGILL     ", SIGILL);
-	constant("#define _SIGTRAP    ", SIGTRAP);
-	constant("#define _SIGIOT     ", SIGIOT);
-	constant("#define _SIGABRT    ", SIGABRT);
-	constant("#define _SIGEMT     ", SIGEMT);
-	constant("#define _SIGFPE     ", SIGFPE);
-	constant("#define _SIGKILL    ", SIGKILL);
-	constant("#define _SIGBUS     ", SIGBUS);
-	constant("#define _SIGSEGV    ", SIGSEGV);
-	constant("#define _SIGSYS     ", SIGSYS);
-	constant("#define _SIGPIPE    ", SIGPIPE);
-	constant("#define _SIGALRM    ", SIGALRM);
-	constant("#define _SIGTERM    ", SIGTERM);
-	constant("#define _SIGUSR1    ", SIGUSR1);
-	constant("#define _SIGUSR2    ", SIGUSR2);
-	constant("#define _SIGCHLD    ", SIGCHLD);
-	constant("#define _SIGPWR     ", SIGPWR);
-	constant("#define _SIGWINCH   ", SIGWINCH);
-	constant("#define _SIGURG     ", SIGURG);
-	constant("#define _SIGIO      ", SIGIO);
-	constant("#define _SIGSTOP    ", SIGSTOP);
-	constant("#define _SIGTSTP    ", SIGTSTP);
-	constant("#define _SIGCONT    ", SIGCONT);
-	constant("#define _SIGTTIN    ", SIGTTIN);
-	constant("#define _SIGTTOU    ", SIGTTOU);
-	constant("#define _SIGVTALRM  ", SIGVTALRM);
-	constant("#define _SIGPROF    ", SIGPROF);
-	constant("#define _SIGXCPU    ", SIGXCPU);
-	constant("#define _SIGXFSZ    ", SIGXFSZ);
+	text("Linux signal numbers.");
+	constant("_SIGHUP", SIGHUP);
+	constant("_SIGINT", SIGINT);
+	constant("_SIGQUIT", SIGQUIT);
+	constant("_SIGILL", SIGILL);
+	constant("_SIGTRAP", SIGTRAP);
+	constant("_SIGIOT", SIGIOT);
+	constant("_SIGABRT", SIGABRT);
+	constant("_SIGEMT", SIGEMT);
+	constant("_SIGFPE", SIGFPE);
+	constant("_SIGKILL", SIGKILL);
+	constant("_SIGBUS", SIGBUS);
+	constant("_SIGSEGV", SIGSEGV);
+	constant("_SIGSYS", SIGSYS);
+	constant("_SIGPIPE", SIGPIPE);
+	constant("_SIGALRM", SIGALRM);
+	constant("_SIGTERM", SIGTERM);
+	constant("_SIGUSR1", SIGUSR1);
+	constant("_SIGUSR2", SIGUSR2);
+	constant("_SIGCHLD", SIGCHLD);
+	constant("_SIGPWR", SIGPWR);
+	constant("_SIGWINCH", SIGWINCH);
+	constant("_SIGURG", SIGURG);
+	constant("_SIGIO", SIGIO);
+	constant("_SIGSTOP", SIGSTOP);
+	constant("_SIGTSTP", SIGTSTP);
+	constant("_SIGCONT", SIGCONT);
+	constant("_SIGTTIN", SIGTTIN);
+	constant("_SIGTTOU", SIGTTOU);
+	constant("_SIGVTALRM", SIGVTALRM);
+	constant("_SIGPROF", SIGPROF);
+	constant("_SIGXCPU", SIGXCPU);
+	constant("_SIGXFSZ", SIGXFSZ);
 	linefeed;
 }
 
 void output_irq_cpustat_t_defines(void)
 {
-	text("/* Linux irq_cpustat_t offsets. */");
-	offset("#define IC_SOFTIRQ_PENDING ", irq_cpustat_t, __softirq_pending);
-	size("#define IC_IRQ_CPUSTAT_T   ", irq_cpustat_t);
+	text("Linux irq_cpustat_t offsets.");
+	offset("IC_SOFTIRQ_PENDING", irq_cpustat_t, __softirq_pending);
+	size("IC_IRQ_CPUSTAT_T", irq_cpustat_t);
 	linefeed;
 }
diff --git a/arch/mips/kernel/cevt-ds1287.c b/arch/mips/kernel/cevt-ds1287.c
new file mode 100644
index 000000000000..df4acb68bfb5
--- /dev/null
+++ b/arch/mips/kernel/cevt-ds1287.c
@@ -0,0 +1,129 @@
+/*
+ *  DS1287 clockevent driver
+ *
+ *  Copyright (C) 2008  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/clockchips.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/mc146818rtc.h>
+
+#include <asm/time.h>
+
+int ds1287_timer_state(void)
+{
+	return (CMOS_READ(RTC_REG_C) & RTC_PF) != 0;
+}
+
+int ds1287_set_base_clock(unsigned int hz)
+{
+	u8 rate;
+
+	switch (hz) {
+	case 128:
+		rate = 0x9;
+		break;
+	case 256:
+		rate = 0x8;
+		break;
+	case 1024:
+		rate = 0x6;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	CMOS_WRITE(RTC_REF_CLCK_32KHZ | rate, RTC_REG_A);
+
+	return 0;
+}
+
+static int ds1287_set_next_event(unsigned long delta,
+				 struct clock_event_device *evt)
+{
+	return -EINVAL;
+}
+
+static void ds1287_set_mode(enum clock_event_mode mode,
+			    struct clock_event_device *evt)
+{
+	u8 val;
+
+	spin_lock(&rtc_lock);
+
+	val = CMOS_READ(RTC_REG_B);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+		val |= RTC_PIE;
+		break;
+	default:
+		val &= ~RTC_PIE;
+		break;
+	}
+
+	CMOS_WRITE(val, RTC_REG_B);
+
+	spin_unlock(&rtc_lock);
+}
+
+static void ds1287_event_handler(struct clock_event_device *dev)
+{
+}
+
+static struct clock_event_device ds1287_clockevent = {
+	.name		= "ds1287",
+	.features	= CLOCK_EVT_FEAT_PERIODIC,
+	.cpumask	= CPU_MASK_CPU0,
+	.set_next_event	= ds1287_set_next_event,
+	.set_mode	= ds1287_set_mode,
+	.event_handler	= ds1287_event_handler,
+};
+
+static irqreturn_t ds1287_interrupt(int irq, void *dev_id)
+{
+	struct clock_event_device *cd = &ds1287_clockevent;
+
+	/* Ack the RTC interrupt. */
+	CMOS_READ(RTC_REG_C);
+
+	cd->event_handler(cd);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction ds1287_irqaction = {
+	.handler	= ds1287_interrupt,
+	.flags		= IRQF_DISABLED | IRQF_PERCPU,
+	.name		= "ds1287",
+};
+
+int __init ds1287_clockevent_init(int irq)
+{
+	struct clock_event_device *cd;
+
+	cd = &ds1287_clockevent;
+	cd->rating = 100;
+	cd->irq = irq;
+	clockevent_set_clock(cd, 32768);
+	cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd);
+	cd->min_delta_ns = clockevent_delta2ns(0x300, cd);
+
+	clockevents_register_device(&ds1287_clockevent);
+
+	return setup_irq(irq, &ds1287_irqaction);
+}
diff --git a/arch/mips/kernel/cevt-gt641xx.c b/arch/mips/kernel/cevt-gt641xx.c
index c36772631fe0..6e2f58520afb 100644
--- a/arch/mips/kernel/cevt-gt641xx.c
+++ b/arch/mips/kernel/cevt-gt641xx.c
@@ -25,8 +25,6 @@
 #include <asm/gt64120.h>
 #include <asm/time.h>
 
-#include <irq.h>
-
 static DEFINE_SPINLOCK(gt641xx_timer_lock);
 static unsigned int gt641xx_base_clock;
 
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 89c3304cb93c..335a6ae3d594 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -169,6 +169,7 @@ static inline void check_wait(void)
 
 	case CPU_24K:
 	case CPU_34K:
+	case CPU_1004K:
 		cpu_wait = r4k_wait;
 		if (read_c0_config7() & MIPS_CONF7_WII)
 			cpu_wait = r4k_wait_irqoff;
@@ -675,6 +676,12 @@ static void __cpuinit decode_configs(struct cpuinfo_mips *c)
 		return;
 }
 
+#ifdef CONFIG_CPU_MIPSR2
+extern void spram_config(void);
+#else
+static inline void spram_config(void) {}
+#endif
+
 static inline void cpu_probe_mips(struct cpuinfo_mips *c)
 {
 	decode_configs(c);
@@ -711,7 +718,12 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c)
 	case PRID_IMP_74K:
 		c->cputype = CPU_74K;
 		break;
+	case PRID_IMP_1004K:
+		c->cputype = CPU_1004K;
+		break;
 	}
+
+	spram_config();
 }
 
 static inline void cpu_probe_alchemy(struct cpuinfo_mips *c)
@@ -778,7 +790,7 @@ static inline void cpu_probe_sandcraft(struct cpuinfo_mips *c)
 	}
 }
 
-static inline void cpu_probe_philips(struct cpuinfo_mips *c)
+static inline void cpu_probe_nxp(struct cpuinfo_mips *c)
 {
 	decode_configs(c);
 	switch (c->processor_id & 0xff00) {
@@ -787,7 +799,7 @@ static inline void cpu_probe_philips(struct cpuinfo_mips *c)
 		c->isa_level = MIPS_CPU_ISA_M32R1;
 		break;
 	default:
-		panic("Unknown Philips Core!"); /* REVISIT: die? */
+		panic("Unknown NXP Core!"); /* REVISIT: die? */
 		break;
 	}
 }
@@ -876,6 +888,7 @@ static __cpuinit const char *cpu_to_name(struct cpuinfo_mips *c)
 	case CPU_24K:		name = "MIPS 24K"; break;
 	case CPU_25KF:		name = "MIPS 25Kf"; break;
 	case CPU_34K:		name = "MIPS 34K"; break;
+	case CPU_1004K:		name = "MIPS 1004K"; break;
 	case CPU_74K:		name = "MIPS 74K"; break;
 	case CPU_VR4111:	name = "NEC VR4111"; break;
 	case CPU_VR4121:	name = "NEC VR4121"; break;
@@ -925,8 +938,8 @@ __cpuinit void cpu_probe(void)
 	case PRID_COMP_SANDCRAFT:
 		cpu_probe_sandcraft(c);
 		break;
- 	case PRID_COMP_PHILIPS:
-		cpu_probe_philips(c);
+	case PRID_COMP_NXP:
+		cpu_probe_nxp(c);
 		break;
 	default:
 		c->cputype = CPU_UNKNOWN;
diff --git a/arch/mips/kernel/csrc-ioasic.c b/arch/mips/kernel/csrc-ioasic.c
new file mode 100644
index 000000000000..1d5f63cf8997
--- /dev/null
+++ b/arch/mips/kernel/csrc-ioasic.c
@@ -0,0 +1,65 @@
+/*
+ *  DEC I/O ASIC's counter clocksource
+ *
+ *  Copyright (C) 2008  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+
+#include <asm/ds1287.h>
+#include <asm/time.h>
+#include <asm/dec/ioasic.h>
+#include <asm/dec/ioasic_addrs.h>
+
+static cycle_t dec_ioasic_hpt_read(void)
+{
+	return ioasic_read(IO_REG_FCTR);
+}
+
+static struct clocksource clocksource_dec = {
+	.name		= "dec-ioasic",
+	.read		= dec_ioasic_hpt_read,
+	.mask		= CLOCKSOURCE_MASK(32),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init dec_ioasic_clocksource_init(void)
+{
+	unsigned int freq;
+	u32 start, end;
+	int i = HZ / 10;
+
+
+	while (!ds1287_timer_state())
+		;
+
+	start = dec_ioasic_hpt_read();
+
+	while (i--)
+		while (!ds1287_timer_state())
+			;
+
+	end = dec_ioasic_hpt_read();
+
+	freq = (end - start) * 10;
+	printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq);
+
+	clocksource_dec.rating = 200 + freq / 10000000;
+	clocksource_set_clock(&clocksource_dec, freq);
+
+	clocksource_register(&clocksource_dec);
+}
diff --git a/arch/mips/kernel/gpio_txx9.c b/arch/mips/kernel/gpio_txx9.c
new file mode 100644
index 000000000000..b1436a857998
--- /dev/null
+++ b/arch/mips/kernel/gpio_txx9.c
@@ -0,0 +1,87 @@
+/*
+ * A gpio chip driver for TXx9 SoCs
+ *
+ * Copyright (C) 2008 Atsushi Nemoto <anemo@mba.ocn.ne.jp>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/gpio.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <asm/txx9pio.h>
+
+static DEFINE_SPINLOCK(txx9_gpio_lock);
+
+static struct txx9_pio_reg __iomem *txx9_pioptr;
+
+static int txx9_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	return __raw_readl(&txx9_pioptr->din) & (1 << offset);
+}
+
+static void txx9_gpio_set_raw(unsigned int offset, int value)
+{
+	u32 val;
+	val = __raw_readl(&txx9_pioptr->dout);
+	if (value)
+		val |= 1 << offset;
+	else
+		val &= ~(1 << offset);
+	__raw_writel(val, &txx9_pioptr->dout);
+}
+
+static void txx9_gpio_set(struct gpio_chip *chip, unsigned int offset,
+			  int value)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&txx9_gpio_lock, flags);
+	txx9_gpio_set_raw(offset, value);
+	mmiowb();
+	spin_unlock_irqrestore(&txx9_gpio_lock, flags);
+}
+
+static int txx9_gpio_dir_in(struct gpio_chip *chip, unsigned int offset)
+{
+	spin_lock_irq(&txx9_gpio_lock);
+	__raw_writel(__raw_readl(&txx9_pioptr->dir) & ~(1 << offset),
+		     &txx9_pioptr->dir);
+	mmiowb();
+	spin_unlock_irq(&txx9_gpio_lock);
+	return 0;
+}
+
+static int txx9_gpio_dir_out(struct gpio_chip *chip, unsigned int offset,
+			     int value)
+{
+	spin_lock_irq(&txx9_gpio_lock);
+	txx9_gpio_set_raw(offset, value);
+	__raw_writel(__raw_readl(&txx9_pioptr->dir) | (1 << offset),
+		     &txx9_pioptr->dir);
+	mmiowb();
+	spin_unlock_irq(&txx9_gpio_lock);
+	return 0;
+}
+
+static struct gpio_chip txx9_gpio_chip = {
+	.get = txx9_gpio_get,
+	.set = txx9_gpio_set,
+	.direction_input = txx9_gpio_dir_in,
+	.direction_output = txx9_gpio_dir_out,
+	.label = "TXx9",
+};
+
+int __init txx9_gpio_init(unsigned long baseaddr,
+			  unsigned int base, unsigned int num)
+{
+	txx9_pioptr = ioremap(baseaddr, sizeof(struct txx9_pio_reg));
+	if (!txx9_pioptr)
+		return -ENODEV;
+	txx9_gpio_chip.base = base;
+	txx9_gpio_chip.ngpio = num;
+	return gpiochip_add(&txx9_gpio_chip);
+}
diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c
new file mode 100644
index 000000000000..f0a4bb19e096
--- /dev/null
+++ b/arch/mips/kernel/irq-gic.c
@@ -0,0 +1,295 @@
+#undef DEBUG
+
+#include <linux/bitmap.h>
+#include <linux/init.h>
+
+#include <asm/io.h>
+#include <asm/gic.h>
+#include <asm/gcmpregs.h>
+#include <asm/mips-boards/maltaint.h>
+#include <asm/irq.h>
+#include <linux/hardirq.h>
+#include <asm-generic/bitops/find.h>
+
+
+static unsigned long _gic_base;
+static unsigned int _irqbase, _mapsize, numvpes, numintrs;
+static struct gic_intr_map *_intrmap;
+
+static struct gic_pcpu_mask pcpu_masks[NR_CPUS];
+static struct gic_pending_regs pending_regs[NR_CPUS];
+static struct gic_intrmask_regs intrmask_regs[NR_CPUS];
+
+#define gic_wedgeb2bok 0	/*
+				 * Can GIC handle b2b writes to wedge register?
+				 */
+#if gic_wedgeb2bok == 0
+static DEFINE_SPINLOCK(gic_wedgeb2b_lock);
+#endif
+
+void gic_send_ipi(unsigned int intr)
+{
+#if gic_wedgeb2bok == 0
+	unsigned long flags;
+#endif
+	pr_debug("CPU%d: %s status %08x\n", smp_processor_id(), __func__,
+		 read_c0_status());
+	if (!gic_wedgeb2bok)
+		spin_lock_irqsave(&gic_wedgeb2b_lock, flags);
+	GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), 0x80000000 | intr);
+	if (!gic_wedgeb2bok) {
+		(void) GIC_REG(SHARED, GIC_SH_CONFIG);
+		spin_unlock_irqrestore(&gic_wedgeb2b_lock, flags);
+	}
+}
+
+/* This is Malta specific and needs to be exported */
+static void vpe_local_setup(unsigned int numvpes)
+{
+	int i;
+	unsigned long timer_interrupt = 5, perf_interrupt = 5;
+	unsigned int vpe_ctl;
+
+	/*
+	 * Setup the default performance counter timer interrupts
+	 * for all VPEs
+	 */
+	for (i = 0; i < numvpes; i++) {
+		GICWRITE(GIC_REG(VPE_LOCAL, GIC_VPE_OTHER_ADDR), i);
+
+		/* Are Interrupts locally routable? */
+		GICREAD(GIC_REG(VPE_OTHER, GIC_VPE_CTL), vpe_ctl);
+		if (vpe_ctl & GIC_VPE_CTL_TIMER_RTBL_MSK)
+			GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_TIMER_MAP),
+				 GIC_MAP_TO_PIN_MSK | timer_interrupt);
+
+		if (vpe_ctl & GIC_VPE_CTL_PERFCNT_RTBL_MSK)
+			GICWRITE(GIC_REG(VPE_OTHER, GIC_VPE_PERFCTR_MAP),
+				 GIC_MAP_TO_PIN_MSK | perf_interrupt);
+	}
+}
+
+unsigned int gic_get_int(void)
+{
+	unsigned int i;
+	unsigned long *pending, *intrmask, *pcpu_mask;
+	unsigned long *pending_abs, *intrmask_abs;
+
+	/* Get per-cpu bitmaps */
+	pending = pending_regs[smp_processor_id()].pending;
+	intrmask = intrmask_regs[smp_processor_id()].intrmask;
+	pcpu_mask = pcpu_masks[smp_processor_id()].pcpu_mask;
+
+	pending_abs = (unsigned long *) GIC_REG_ABS_ADDR(SHARED,
+							 GIC_SH_PEND_31_0_OFS);
+	intrmask_abs = (unsigned long *) GIC_REG_ABS_ADDR(SHARED,
+							  GIC_SH_MASK_31_0_OFS);
+
+	for (i = 0; i < BITS_TO_LONGS(GIC_NUM_INTRS); i++) {
+		GICREAD(*pending_abs, pending[i]);
+		GICREAD(*intrmask_abs, intrmask[i]);
+		pending_abs++;
+		intrmask_abs++;
+	}
+
+	bitmap_and(pending, pending, intrmask, GIC_NUM_INTRS);
+	bitmap_and(pending, pending, pcpu_mask, GIC_NUM_INTRS);
+
+	i = find_first_bit(pending, GIC_NUM_INTRS);
+
+	pr_debug("CPU%d: %s pend=%d\n", smp_processor_id(), __func__, i);
+
+	return i;
+}
+
+static unsigned int gic_irq_startup(unsigned int irq)
+{
+	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+	irq -= _irqbase;
+	/* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */
+	GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_SMASK_31_0_OFS + (irq / 32))),
+		 1 << (irq % 32));
+	return 0;
+}
+
+static void gic_irq_ack(unsigned int irq)
+{
+#if gic_wedgeb2bok == 0
+	unsigned long flags;
+#endif
+	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+	irq -= _irqbase;
+	GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_RMASK_31_0_OFS + (irq / 32))),
+		 1 << (irq % 32));
+
+	if (_intrmap[irq].trigtype == GIC_TRIG_EDGE) {
+		if (!gic_wedgeb2bok)
+			spin_lock_irqsave(&gic_wedgeb2b_lock, flags);
+		GICWRITE(GIC_REG(SHARED, GIC_SH_WEDGE), irq);
+		if (!gic_wedgeb2bok) {
+			(void) GIC_REG(SHARED, GIC_SH_CONFIG);
+			spin_unlock_irqrestore(&gic_wedgeb2b_lock, flags);
+		}
+	}
+}
+
+static void gic_mask_irq(unsigned int irq)
+{
+	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+	irq -= _irqbase;
+	/* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */
+	GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_RMASK_31_0_OFS + (irq / 32))),
+		 1 << (irq % 32));
+}
+
+static void gic_unmask_irq(unsigned int irq)
+{
+	pr_debug("CPU%d: %s: irq%d\n", smp_processor_id(), __func__, irq);
+	irq -= _irqbase;
+	/* FIXME: this is wrong for !GICISWORDLITTLEENDIAN */
+	GICWRITE(GIC_REG_ADDR(SHARED, (GIC_SH_SMASK_31_0_OFS + (irq / 32))),
+		 1 << (irq % 32));
+}
+
+#ifdef CONFIG_SMP
+
+static DEFINE_SPINLOCK(gic_lock);
+
+static void gic_set_affinity(unsigned int irq, cpumask_t cpumask)
+{
+	cpumask_t	tmp = CPU_MASK_NONE;
+	unsigned long	flags;
+	int		i;
+
+	pr_debug(KERN_DEBUG "%s called\n", __func__);
+	irq -= _irqbase;
+
+	cpus_and(tmp, cpumask, cpu_online_map);
+	if (cpus_empty(tmp))
+		return;
+
+	/* Assumption : cpumask refers to a single CPU */
+	spin_lock_irqsave(&gic_lock, flags);
+	for (;;) {
+		/* Re-route this IRQ */
+		GIC_SH_MAP_TO_VPE_SMASK(irq, first_cpu(tmp));
+
+		/*
+		 * FIXME: assumption that _intrmap is ordered and has no holes
+		 */
+
+		/* Update the intr_map */
+		_intrmap[irq].cpunum = first_cpu(tmp);
+
+		/* Update the pcpu_masks */
+		for (i = 0; i < NR_CPUS; i++)
+			clear_bit(irq, pcpu_masks[i].pcpu_mask);
+		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
+
+	}
+	irq_desc[irq].affinity = cpumask;
+	spin_unlock_irqrestore(&gic_lock, flags);
+
+}
+#endif
+
+static struct irq_chip gic_irq_controller = {
+	.name		=	"MIPS GIC",
+	.startup	=	gic_irq_startup,
+	.ack		=	gic_irq_ack,
+	.mask		=	gic_mask_irq,
+	.mask_ack	=	gic_mask_irq,
+	.unmask		=	gic_unmask_irq,
+	.eoi		=	gic_unmask_irq,
+#ifdef CONFIG_SMP
+	.set_affinity	=	gic_set_affinity,
+#endif
+};
+
+static void __init setup_intr(unsigned int intr, unsigned int cpu,
+	unsigned int pin, unsigned int polarity, unsigned int trigtype)
+{
+	/* Setup Intr to Pin mapping */
+	if (pin & GIC_MAP_TO_NMI_MSK) {
+		GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)), pin);
+		/* FIXME: hack to route NMI to all cpu's */
+		for (cpu = 0; cpu < NR_CPUS; cpu += 32) {
+			GICWRITE(GIC_REG_ADDR(SHARED,
+					  GIC_SH_MAP_TO_VPE_REG_OFF(intr, cpu)),
+				 0xffffffff);
+		}
+	} else {
+		GICWRITE(GIC_REG_ADDR(SHARED, GIC_SH_MAP_TO_PIN(intr)),
+			 GIC_MAP_TO_PIN_MSK | pin);
+		/* Setup Intr to CPU mapping */
+		GIC_SH_MAP_TO_VPE_SMASK(intr, cpu);
+	}
+
+	/* Setup Intr Polarity */
+	GIC_SET_POLARITY(intr, polarity);
+
+	/* Setup Intr Trigger Type */
+	GIC_SET_TRIGGER(intr, trigtype);
+
+	/* Init Intr Masks */
+	GIC_SET_INTR_MASK(intr, 0);
+}
+
+static void __init gic_basic_init(void)
+{
+	unsigned int i, cpu;
+
+	/* Setup defaults */
+	for (i = 0; i < GIC_NUM_INTRS; i++) {
+		GIC_SET_POLARITY(i, GIC_POL_POS);
+		GIC_SET_TRIGGER(i, GIC_TRIG_LEVEL);
+		GIC_SET_INTR_MASK(i, 0);
+	}
+
+	/* Setup specifics */
+	for (i = 0; i < _mapsize; i++) {
+		cpu = _intrmap[i].cpunum;
+		if (cpu == X)
+			continue;
+
+		setup_intr(_intrmap[i].intrnum,
+				_intrmap[i].cpunum,
+				_intrmap[i].pin,
+				_intrmap[i].polarity,
+				_intrmap[i].trigtype);
+		/* Initialise per-cpu Interrupt software masks */
+		if (_intrmap[i].ipiflag)
+			set_bit(_intrmap[i].intrnum, pcpu_masks[cpu].pcpu_mask);
+	}
+
+	vpe_local_setup(numvpes);
+
+	for (i = _irqbase; i < (_irqbase + numintrs); i++)
+		set_irq_chip(i, &gic_irq_controller);
+}
+
+void __init gic_init(unsigned long gic_base_addr,
+		     unsigned long gic_addrspace_size,
+		     struct gic_intr_map *intr_map, unsigned int intr_map_size,
+		     unsigned int irqbase)
+{
+	unsigned int gicconfig;
+
+	_gic_base = (unsigned long) ioremap_nocache(gic_base_addr,
+						    gic_addrspace_size);
+	_irqbase = irqbase;
+	_intrmap = intr_map;
+	_mapsize = intr_map_size;
+
+	GICREAD(GIC_REG(SHARED, GIC_SH_CONFIG), gicconfig);
+	numintrs = (gicconfig & GIC_SH_CONFIG_NUMINTRS_MSK) >>
+		   GIC_SH_CONFIG_NUMINTRS_SHF;
+	numintrs = ((numintrs + 1) * 8);
+
+	numvpes = (gicconfig & GIC_SH_CONFIG_NUMVPES_MSK) >>
+		  GIC_SH_CONFIG_NUMVPES_SHF;
+
+	pr_debug("%s called\n", __func__);
+
+	gic_basic_init();
+}
diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c
index 4edc7e451d91..963c16d266ab 100644
--- a/arch/mips/kernel/irq-msc01.c
+++ b/arch/mips/kernel/irq-msc01.c
@@ -17,6 +17,7 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/msc01_ic.h>
+#include <asm/traps.h>
 
 static unsigned long _icctrl_msc;
 #define MSC01_IC_REG_BASE	_icctrl_msc
@@ -98,14 +99,13 @@ void ll_msc_irq(void)
 	}
 }
 
-void
-msc_bind_eic_interrupt(unsigned int irq, unsigned int set)
+static void msc_bind_eic_interrupt(int irq, int set)
 {
 	MSCIC_WRITE(MSC01_IC_RAMW,
 		    (irq<<MSC01_IC_RAMW_ADDR_SHF) | (set<<MSC01_IC_RAMW_DATA_SHF));
 }
 
-struct irq_chip msc_levelirq_type = {
+static struct irq_chip msc_levelirq_type = {
 	.name = "SOC-it-Level",
 	.ack = level_mask_and_ack_msc_irq,
 	.mask = mask_msc_irq,
@@ -115,7 +115,7 @@ struct irq_chip msc_levelirq_type = {
 	.end = end_msc_irq,
 };
 
-struct irq_chip msc_edgeirq_type = {
+static struct irq_chip msc_edgeirq_type = {
 	.name = "SOC-it-Edge",
 	.ack = edge_mask_and_ack_msc_irq,
 	.mask = mask_msc_irq,
@@ -128,8 +128,6 @@ struct irq_chip msc_edgeirq_type = {
 
 void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqmap_t *imp, int nirq)
 {
-	extern void (*board_bind_eic_interrupt)(unsigned int irq, unsigned int regset);
-
 	_icctrl_msc = (unsigned long) ioremap(icubase, 0x40000);
 
 	/* Reset interrupt controller - initialises all registers to 0 */
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
index c0faabd52010..6c8e8c4246f7 100644
--- a/arch/mips/kernel/signal-common.h
+++ b/arch/mips/kernel/signal-common.h
@@ -14,7 +14,7 @@
 /* #define DEBUG_SIG */
 
 #ifdef DEBUG_SIG
-#  define DEBUGP(fmt, args...) printk("%s: " fmt, __FUNCTION__ , ##args)
+#  define DEBUGP(fmt, args...) printk("%s: " fmt, __func__, ##args)
 #else
 #  define DEBUGP(fmt, args...)
 #endif
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
new file mode 100644
index 000000000000..ca476c4f62a5
--- /dev/null
+++ b/arch/mips/kernel/smp-cmp.c
@@ -0,0 +1,265 @@
+/*
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 MIPS Technologies, Inc.
+ *    Chris Dearman (chris@mips.com)
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+
+#include <asm/atomic.h>
+#include <asm/cacheflush.h>
+#include <asm/cpu.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/hardirq.h>
+#include <asm/mmu_context.h>
+#include <asm/smp.h>
+#include <asm/time.h>
+#include <asm/mipsregs.h>
+#include <asm/mipsmtregs.h>
+#include <asm/mips_mt.h>
+
+/*
+ * Crude manipulation of the CPU masks to control which
+ * which CPU's are brought online during initialisation
+ *
+ * Beware... this needs to be called after CPU discovery
+ * but before CPU bringup
+ */
+static int __init allowcpus(char *str)
+{
+	cpumask_t cpu_allow_map;
+	char buf[256];
+	int len;
+
+	cpus_clear(cpu_allow_map);
+	if (cpulist_parse(str, cpu_allow_map) == 0) {
+		cpu_set(0, cpu_allow_map);
+		cpus_and(cpu_possible_map, cpu_possible_map, cpu_allow_map);
+		len = cpulist_scnprintf(buf, sizeof(buf)-1, cpu_possible_map);
+		buf[len] = '\0';
+		pr_debug("Allowable CPUs: %s\n", buf);
+		return 1;
+	} else
+		return 0;
+}
+__setup("allowcpus=", allowcpus);
+
+static void ipi_call_function(unsigned int cpu)
+{
+	unsigned int action = 0;
+
+	pr_debug("CPU%d: %s cpu %d status %08x\n",
+		 smp_processor_id(), __func__, cpu, read_c0_status());
+
+	switch (cpu) {
+	case 0:
+		action = GIC_IPI_EXT_INTR_CALLFNC_VPE0;
+		break;
+	case 1:
+		action = GIC_IPI_EXT_INTR_CALLFNC_VPE1;
+		break;
+	case 2:
+		action = GIC_IPI_EXT_INTR_CALLFNC_VPE2;
+		break;
+	case 3:
+		action = GIC_IPI_EXT_INTR_CALLFNC_VPE3;
+		break;
+	}
+	gic_send_ipi(action);
+}
+
+
+static void ipi_resched(unsigned int cpu)
+{
+	unsigned int action = 0;
+
+	pr_debug("CPU%d: %s cpu %d status %08x\n",
+		 smp_processor_id(), __func__, cpu, read_c0_status());
+
+	switch (cpu) {
+	case 0:
+		action = GIC_IPI_EXT_INTR_RESCHED_VPE0;
+		break;
+	case 1:
+		action = GIC_IPI_EXT_INTR_RESCHED_VPE1;
+		break;
+	case 2:
+		action = GIC_IPI_EXT_INTR_RESCHED_VPE2;
+		break;
+	case 3:
+		action = GIC_IPI_EXT_INTR_RESCHED_VPE3;
+		break;
+	}
+	gic_send_ipi(action);
+}
+
+/*
+ * FIXME: This isn't restricted to CMP
+ * The SMVP kernel could use GIC interrupts if available
+ */
+void cmp_send_ipi_single(int cpu, unsigned int action)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	switch (action) {
+	case SMP_CALL_FUNCTION:
+		ipi_call_function(cpu);
+		break;
+
+	case SMP_RESCHEDULE_YOURSELF:
+		ipi_resched(cpu);
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+static void cmp_send_ipi_mask(cpumask_t mask, unsigned int action)
+{
+	unsigned int i;
+
+	for_each_cpu_mask(i, mask)
+		cmp_send_ipi_single(i, action);
+}
+
+static void cmp_init_secondary(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	/* Assume GIC is present */
+	change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 | STATUSF_IP6 |
+				 STATUSF_IP7);
+
+	/* Enable per-cpu interrupts: platform specific */
+
+	c->core = (read_c0_ebase() >> 1) & 0xff;
+#if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
+	c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE;
+#endif
+#ifdef CONFIG_MIPS_MT_SMTC
+	c->tc_id  = (read_c0_tcbind() >> TCBIND_CURTC_SHIFT) & TCBIND_CURTC;
+#endif
+}
+
+static void cmp_smp_finish(void)
+{
+	pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
+
+	/* CDFIXME: remove this? */
+	write_c0_compare(read_c0_count() + (8 * mips_hpt_frequency / HZ));
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(smp_processor_id(), mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+	local_irq_enable();
+}
+
+static void cmp_cpus_done(void)
+{
+	pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
+}
+
+/*
+ * Setup the PC, SP, and GP of a secondary processor and start it running
+ * smp_bootstrap is the place to resume from
+ * __KSTK_TOS(idle) is apparently the stack pointer
+ * (unsigned long)idle->thread_info the gp
+ */
+static void cmp_boot_secondary(int cpu, struct task_struct *idle)
+{
+	struct thread_info *gp = task_thread_info(idle);
+	unsigned long sp = __KSTK_TOS(idle);
+	unsigned long pc = (unsigned long)&smp_bootstrap;
+	unsigned long a0 = 0;
+
+	pr_debug("SMPCMP: CPU%d: %s cpu %d\n", smp_processor_id(),
+		__func__, cpu);
+
+#if 0
+	/* Needed? */
+	flush_icache_range((unsigned long)gp,
+			   (unsigned long)(gp + sizeof(struct thread_info)));
+#endif
+
+	amon_cpu_start(cpu, pc, sp, gp, a0);
+}
+
+/*
+ * Common setup before any secondaries are started
+ */
+void __init cmp_smp_setup(void)
+{
+	int i;
+	int ncpu = 0;
+
+	pr_debug("SMPCMP: CPU%d: %s\n", smp_processor_id(), __func__);
+
+#ifdef CONFIG_MIPS_MT_FPAFF
+	/* If we have an FPU, enroll ourselves in the FPU-full mask */
+	if (cpu_has_fpu)
+		cpu_set(0, mt_fpu_cpumask);
+#endif /* CONFIG_MIPS_MT_FPAFF */
+
+	for (i = 1; i < NR_CPUS; i++) {
+		if (amon_cpu_avail(i)) {
+			cpu_set(i, phys_cpu_present_map);
+			__cpu_number_map[i]	= ++ncpu;
+			__cpu_logical_map[ncpu]	= i;
+		}
+	}
+
+	if (cpu_has_mipsmt) {
+		unsigned int nvpe, mvpconf0 = read_c0_mvpconf0();
+
+		nvpe = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+		smp_num_siblings = nvpe;
+	}
+	pr_info("Detected %i available secondary CPU(s)\n", ncpu);
+}
+
+void __init cmp_prepare_cpus(unsigned int max_cpus)
+{
+	pr_debug("SMPCMP: CPU%d: %s max_cpus=%d\n",
+		 smp_processor_id(), __func__, max_cpus);
+
+	/*
+	 * FIXME: some of these options are per-system, some per-core and
+	 * some per-cpu
+	 */
+	mips_mt_set_cpuoptions();
+}
+
+struct plat_smp_ops cmp_smp_ops = {
+	.send_ipi_single	= cmp_send_ipi_single,
+	.send_ipi_mask		= cmp_send_ipi_mask,
+	.init_secondary		= cmp_init_secondary,
+	.smp_finish		= cmp_smp_finish,
+	.cpus_done		= cmp_cpus_done,
+	.boot_secondary		= cmp_boot_secondary,
+	.smp_setup		= cmp_smp_setup,
+	.prepare_cpus		= cmp_prepare_cpus,
+};
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 89e6f6aa5166..87a1816c1f45 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -36,110 +36,7 @@
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
 
-#define MIPS_CPU_IPI_RESCHED_IRQ 0
-#define MIPS_CPU_IPI_CALL_IRQ 1
-
-static int cpu_ipi_resched_irq, cpu_ipi_call_irq;
-
-#if 0
-static void dump_mtregisters(int vpe, int tc)
-{
-	printk("vpe %d tc %d\n", vpe, tc);
-
-	settc(tc);
-
-	printk("  c0 status  0x%lx\n", read_vpe_c0_status());
-	printk("  vpecontrol 0x%lx\n", read_vpe_c0_vpecontrol());
-	printk("  vpeconf0    0x%lx\n", read_vpe_c0_vpeconf0());
-	printk("  tcstatus 0x%lx\n", read_tc_c0_tcstatus());
-	printk("  tcrestart 0x%lx\n", read_tc_c0_tcrestart());
-	printk("  tcbind 0x%lx\n", read_tc_c0_tcbind());
-	printk("  tchalt 0x%lx\n", read_tc_c0_tchalt());
-}
-#endif
-
-void __init sanitize_tlb_entries(void)
-{
-	int i, tlbsiz;
-	unsigned long mvpconf0, ncpu;
-
-	if (!cpu_has_mipsmt)
-		return;
-
-	/* Enable VPC */
-	set_c0_mvpcontrol(MVPCONTROL_VPC);
-
-	back_to_back_c0_hazard();
-
-	/* Disable TLB sharing */
-	clear_c0_mvpcontrol(MVPCONTROL_STLB);
-
-	mvpconf0 = read_c0_mvpconf0();
-
-	printk(KERN_INFO "MVPConf0 0x%lx TLBS %lx PTLBE %ld\n", mvpconf0,
-		   (mvpconf0 & MVPCONF0_TLBS) >> MVPCONF0_TLBS_SHIFT,
-			   (mvpconf0 & MVPCONF0_PTLBE) >> MVPCONF0_PTLBE_SHIFT);
-
-	tlbsiz = (mvpconf0 & MVPCONF0_PTLBE) >> MVPCONF0_PTLBE_SHIFT;
-	ncpu = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
-
-	printk(" tlbsiz %d ncpu %ld\n", tlbsiz, ncpu);
-
-	if (tlbsiz > 0) {
-		/* share them out across the vpe's */
-		tlbsiz /= ncpu;
-
-		printk(KERN_INFO "setting Config1.MMU_size to %d\n", tlbsiz);
-
-		for (i = 0; i < ncpu; i++) {
-			settc(i);
-
-			if (i == 0)
-				write_c0_config1((read_c0_config1() & ~(0x3f << 25)) | (tlbsiz << 25));
-			else
-				write_vpe_c0_config1((read_vpe_c0_config1() & ~(0x3f << 25)) |
-						   (tlbsiz << 25));
-		}
-	}
-
-	clear_c0_mvpcontrol(MVPCONTROL_VPC);
-}
-
-static void ipi_resched_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
-}
-
-static void ipi_call_dispatch(void)
-{
-	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
-}
-
-static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
-{
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
-{
-	smp_call_function_interrupt();
-
-	return IRQ_HANDLED;
-}
-
-static struct irqaction irq_resched = {
-	.handler	= ipi_resched_interrupt,
-	.flags		= IRQF_DISABLED|IRQF_PERCPU,
-	.name		= "IPI_resched"
-};
-
-static struct irqaction irq_call = {
-	.handler	= ipi_call_interrupt,
-	.flags		= IRQF_DISABLED|IRQF_PERCPU,
-	.name		= "IPI_call"
-};
-
-static void __init smp_copy_vpe_config(void)
+static void __init smvp_copy_vpe_config(void)
 {
 	write_vpe_c0_status(
 		(read_c0_status() & ~(ST0_IM | ST0_IE | ST0_KSU)) | ST0_CU0);
@@ -156,7 +53,7 @@ static void __init smp_copy_vpe_config(void)
 	write_vpe_c0_count(read_c0_count());
 }
 
-static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0,
+static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
 	unsigned int ncpu)
 {
 	if (tc > ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT))
@@ -182,12 +79,12 @@ static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0,
 	write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);
 
 	if (tc != 0)
-		smp_copy_vpe_config();
+		smvp_copy_vpe_config();
 
 	return ncpu;
 }
 
-static void __init smp_tc_init(unsigned int tc, unsigned int mvpconf0)
+static void __init smvp_tc_init(unsigned int tc, unsigned int mvpconf0)
 {
 	unsigned long tmp;
 
@@ -254,15 +151,20 @@ static void vsmp_send_ipi_mask(cpumask_t mask, unsigned int action)
 
 static void __cpuinit vsmp_init_secondary(void)
 {
-	/* Enable per-cpu interrupts */
+	extern int gic_present;
 
 	/* This is Malta specific: IPI,performance and timer inetrrupts */
-	write_c0_status((read_c0_status() & ~ST0_IM ) |
-	                (STATUSF_IP0 | STATUSF_IP1 | STATUSF_IP6 | STATUSF_IP7));
+	if (gic_present)
+		change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 |
+					 STATUSF_IP6 | STATUSF_IP7);
+	else
+		change_c0_status(ST0_IM, STATUSF_IP0 | STATUSF_IP1 |
+					 STATUSF_IP6 | STATUSF_IP7);
 }
 
 static void __cpuinit vsmp_smp_finish(void)
 {
+	/* CDFIXME: remove this? */
 	write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
 
 #ifdef CONFIG_MIPS_MT_FPAFF
@@ -323,7 +225,7 @@ static void __cpuinit vsmp_boot_secondary(int cpu, struct task_struct *idle)
 /*
  * Common setup before any secondaries are started
  * Make sure all CPU's are in a sensible state before we boot any of the
- * secondarys
+ * secondaries
  */
 static void __init vsmp_smp_setup(void)
 {
@@ -356,8 +258,8 @@ static void __init vsmp_smp_setup(void)
 	for (tc = 0; tc <= ntc; tc++) {
 		settc(tc);
 
-		smp_tc_init(tc, mvpconf0);
-		ncpu = smp_vpe_init(tc, mvpconf0, ncpu);
+		smvp_tc_init(tc, mvpconf0);
+		ncpu = smvp_vpe_init(tc, mvpconf0, ncpu);
 	}
 
 	/* Release config state */
@@ -371,21 +273,6 @@ static void __init vsmp_smp_setup(void)
 static void __init vsmp_prepare_cpus(unsigned int max_cpus)
 {
 	mips_mt_set_cpuoptions();
-
-	/* set up ipi interrupts */
-	if (cpu_has_vint) {
-		set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
-		set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
-	}
-
-	cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
-	cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ;
-
-	setup_irq(cpu_ipi_resched_irq, &irq_resched);
-	setup_irq(cpu_ipi_call_irq, &irq_call);
-
-	set_irq_handler(cpu_ipi_resched_irq, handle_percpu_irq);
-	set_irq_handler(cpu_ipi_call_irq, handle_percpu_irq);
 }
 
 struct plat_smp_ops vsmp_smp_ops = {
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 9d41dab90a80..33780cc61ce9 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -35,6 +35,7 @@
 #include <asm/atomic.h>
 #include <asm/cpu.h>
 #include <asm/processor.h>
+#include <asm/r4k-timer.h>
 #include <asm/system.h>
 #include <asm/mmu_context.h>
 #include <asm/time.h>
@@ -125,6 +126,8 @@ asmlinkage __cpuinit void start_secondary(void)
 
 	cpu_set(cpu, cpu_callin_map);
 
+	synchronise_count_slave();
+
 	cpu_idle();
 }
 
@@ -287,6 +290,7 @@ void smp_send_stop(void)
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	mp_ops->cpus_done();
+	synchronise_count_master();
 }
 
 /* called from main before smp_init() */
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index b42e71c71119..3e863186cd22 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -174,14 +174,6 @@ static int clock_hang_reported[NR_CPUS];
 
 #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */
 
-/* Initialize shared TLB - the should probably migrate to smtc_setup_cpus() */
-
-void __init sanitize_tlb_entries(void)
-{
-	printk("Deprecated sanitize_tlb_entries() invoked\n");
-}
-
-
 /*
  * Configure shared TLB - VPC configuration bit must be set by caller
  */
@@ -339,7 +331,8 @@ static void smtc_tc_setup(int vpe, int tc, int cpu)
 	/* In general, all TCs should have the same cpu_data indications */
 	memcpy(&cpu_data[cpu], &cpu_data[0], sizeof(struct cpuinfo_mips));
 	/* For 34Kf, start with TC/CPU 0 as sole owner of single FPU context */
-	if (cpu_data[0].cputype == CPU_34K)
+	if (cpu_data[0].cputype == CPU_34K ||
+	    cpu_data[0].cputype == CPU_1004K)
 		cpu_data[cpu].options &= ~MIPS_CPU_FPU;
 	cpu_data[cpu].vpe_id = vpe;
 	cpu_data[cpu].tc_id = tc;
diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c
new file mode 100644
index 000000000000..6ddb507a87ef
--- /dev/null
+++ b/arch/mips/kernel/spram.c
@@ -0,0 +1,221 @@
+/*
+ * MIPS SPRAM support
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Copyright (C) 2007, 2008 MIPS Technologies, Inc.
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <linux/stddef.h>
+
+#include <asm/cpu.h>
+#include <asm/fpu.h>
+#include <asm/mipsregs.h>
+#include <asm/system.h>
+#include <asm/r4kcache.h>
+#include <asm/hazards.h>
+
+/*
+ * These definitions are correct for the 24K/34K/74K SPRAM sample
+ * implementation. The 4KS interpreted the tags differently...
+ */
+#define SPRAM_TAG0_ENABLE	0x00000080
+#define SPRAM_TAG0_PA_MASK	0xfffff000
+#define SPRAM_TAG1_SIZE_MASK	0xfffff000
+
+#define SPRAM_TAG_STRIDE	8
+
+#define ERRCTL_SPRAM		(1 << 28)
+
+/* errctl access */
+#define read_c0_errctl(x) read_c0_ecc(x)
+#define write_c0_errctl(x) write_c0_ecc(x)
+
+/*
+ * Different semantics to the set_c0_* function built by __BUILD_SET_C0
+ */
+static __cpuinit unsigned int bis_c0_errctl(unsigned int set)
+{
+	unsigned int res;
+	res = read_c0_errctl();
+	write_c0_errctl(res | set);
+	return res;
+}
+
+static __cpuinit void ispram_store_tag(unsigned int offset, unsigned int data)
+{
+	unsigned int errctl;
+
+	/* enable SPRAM tag access */
+	errctl = bis_c0_errctl(ERRCTL_SPRAM);
+	ehb();
+
+	write_c0_taglo(data);
+	ehb();
+
+	cache_op(Index_Store_Tag_I, CKSEG0|offset);
+	ehb();
+
+	write_c0_errctl(errctl);
+	ehb();
+}
+
+
+static __cpuinit unsigned int ispram_load_tag(unsigned int offset)
+{
+	unsigned int data;
+	unsigned int errctl;
+
+	/* enable SPRAM tag access */
+	errctl = bis_c0_errctl(ERRCTL_SPRAM);
+	ehb();
+	cache_op(Index_Load_Tag_I, CKSEG0 | offset);
+	ehb();
+	data = read_c0_taglo();
+	ehb();
+	write_c0_errctl(errctl);
+	ehb();
+
+	return data;
+}
+
+static __cpuinit void dspram_store_tag(unsigned int offset, unsigned int data)
+{
+	unsigned int errctl;
+
+	/* enable SPRAM tag access */
+	errctl = bis_c0_errctl(ERRCTL_SPRAM);
+	ehb();
+	write_c0_dtaglo(data);
+	ehb();
+	cache_op(Index_Store_Tag_D, CKSEG0 | offset);
+	ehb();
+	write_c0_errctl(errctl);
+	ehb();
+}
+
+
+static __cpuinit unsigned int dspram_load_tag(unsigned int offset)
+{
+	unsigned int data;
+	unsigned int errctl;
+
+	errctl = bis_c0_errctl(ERRCTL_SPRAM);
+	ehb();
+	cache_op(Index_Load_Tag_D, CKSEG0 | offset);
+	ehb();
+	data = read_c0_dtaglo();
+	ehb();
+	write_c0_errctl(errctl);
+	ehb();
+
+	return data;
+}
+
+static __cpuinit void probe_spram(char *type,
+	    unsigned int base,
+	    unsigned int (*read)(unsigned int),
+	    void (*write)(unsigned int, unsigned int))
+{
+	unsigned int firstsize = 0, lastsize = 0;
+	unsigned int firstpa = 0, lastpa = 0, pa = 0;
+	unsigned int offset = 0;
+	unsigned int size, tag0, tag1;
+	unsigned int enabled;
+	int i;
+
+	/*
+	 * The limit is arbitrary but avoids the loop running away if
+	 * the SPRAM tags are implemented differently
+	 */
+
+	for (i = 0; i < 8; i++) {
+		tag0 = read(offset);
+		tag1 = read(offset+SPRAM_TAG_STRIDE);
+		pr_debug("DBG %s%d: tag0=%08x tag1=%08x\n",
+			 type, i, tag0, tag1);
+
+		size = tag1 & SPRAM_TAG1_SIZE_MASK;
+
+		if (size == 0)
+			break;
+
+		if (i != 0) {
+			/* tags may repeat... */
+			if ((pa == firstpa && size == firstsize) ||
+			    (pa == lastpa && size == lastsize))
+				break;
+		}
+
+		/* Align base with size */
+		base = (base + size - 1) & ~(size-1);
+
+		/* reprogram the base address base address and enable */
+		tag0 = (base & SPRAM_TAG0_PA_MASK) | SPRAM_TAG0_ENABLE;
+		write(offset, tag0);
+
+		base += size;
+
+		/* reread the tag */
+		tag0 = read(offset);
+		pa = tag0 & SPRAM_TAG0_PA_MASK;
+		enabled = tag0 & SPRAM_TAG0_ENABLE;
+
+		if (i == 0) {
+			firstpa = pa;
+			firstsize = size;
+		}
+
+		lastpa = pa;
+		lastsize = size;
+
+		if (strcmp(type, "DSPRAM") == 0) {
+			unsigned int *vp = (unsigned int *)(CKSEG1 | pa);
+			unsigned int v;
+#define TDAT	0x5a5aa5a5
+			vp[0] = TDAT;
+			vp[1] = ~TDAT;
+
+			mb();
+
+			v = vp[0];
+			if (v != TDAT)
+				printk(KERN_ERR "vp=%p wrote=%08x got=%08x\n",
+				       vp, TDAT, v);
+			v = vp[1];
+			if (v != ~TDAT)
+				printk(KERN_ERR "vp=%p wrote=%08x got=%08x\n",
+				       vp+1, ~TDAT, v);
+		}
+
+		pr_info("%s%d: PA=%08x,Size=%08x%s\n",
+			type, i, pa, size, enabled ? ",enabled" : "");
+		offset += 2 * SPRAM_TAG_STRIDE;
+	}
+}
+
+__cpuinit void spram_config(void)
+{
+	struct cpuinfo_mips *c = &current_cpu_data;
+	unsigned int config0;
+
+	switch (c->cputype) {
+	case CPU_24K:
+	case CPU_34K:
+	case CPU_74K:
+		config0 = read_c0_config();
+		/* FIXME: addresses are Malta specific */
+		if (config0 & (1<<24)) {
+			probe_spram("ISPRAM", 0x1c000000,
+				    &ispram_load_tag, &ispram_store_tag);
+		}
+		if (config0 & (1<<23))
+			probe_spram("DSPRAM", 0x1c100000,
+				    &dspram_load_tag, &dspram_store_tag);
+	}
+}
diff --git a/arch/mips/kernel/sync-r4k.c b/arch/mips/kernel/sync-r4k.c
new file mode 100644
index 000000000000..9021108eb9c1
--- /dev/null
+++ b/arch/mips/kernel/sync-r4k.c
@@ -0,0 +1,159 @@
+/*
+ * Count register synchronisation.
+ *
+ * All CPUs will have their count registers synchronised to the CPU0 expirelo
+ * value. This can cause a small timewarp for CPU0. All other CPU's should
+ * not have done anything significant (but they may have had interrupts
+ * enabled briefly - prom_smp_finish() should not be responsible for enabling
+ * interrupts...)
+ *
+ * FIXME: broken for SMTC
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/r4k-timer.h>
+
+#include <asm/atomic.h>
+#include <asm/barrier.h>
+#include <asm/cpumask.h>
+#include <asm/mipsregs.h>
+
+static atomic_t __initdata count_start_flag = ATOMIC_INIT(0);
+static atomic_t __initdata count_count_start = ATOMIC_INIT(0);
+static atomic_t __initdata count_count_stop = ATOMIC_INIT(0);
+
+#define COUNTON	100
+#define NR_LOOPS 5
+
+void __init synchronise_count_master(void)
+{
+	int i;
+	unsigned long flags;
+	unsigned int initcount;
+	int nslaves;
+
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC needs to synchronise per VPE, not per CPU
+	 * ignore for now
+	 */
+	return;
+#endif
+
+	pr_info("Checking COUNT synchronization across %u CPUs: ",
+		num_online_cpus());
+
+	local_irq_save(flags);
+
+	/*
+	 * Notify the slaves that it's time to start
+	 */
+	atomic_set(&count_start_flag, 1);
+	smp_wmb();
+
+	/* Count will be initialised to expirelo for all CPU's */
+	initcount = expirelo;
+
+	/*
+	 * We loop a few times to get a primed instruction cache,
+	 * then the last pass is more or less synchronised and
+	 * the master and slaves each set their cycle counters to a known
+	 * value all at once. This reduces the chance of having random offsets
+	 * between the processors, and guarantees that the maximum
+	 * delay between the cycle counters is never bigger than
+	 * the latency of information-passing (cachelines) between
+	 * two CPUs.
+	 */
+
+	nslaves = num_online_cpus()-1;
+	for (i = 0; i < NR_LOOPS; i++) {
+		/* slaves loop on '!= ncpus' */
+		while (atomic_read(&count_count_start) != nslaves)
+			mb();
+		atomic_set(&count_count_stop, 0);
+		smp_wmb();
+
+		/* this lets the slaves write their count register */
+		atomic_inc(&count_count_start);
+
+		/*
+		 * Everyone initialises count in the last loop:
+		 */
+		if (i == NR_LOOPS-1)
+			write_c0_count(initcount);
+
+		/*
+		 * Wait for all slaves to leave the synchronization point:
+		 */
+		while (atomic_read(&count_count_stop) != nslaves)
+			mb();
+		atomic_set(&count_count_start, 0);
+		smp_wmb();
+		atomic_inc(&count_count_stop);
+	}
+	/* Arrange for an interrupt in a short while */
+	write_c0_compare(read_c0_count() + COUNTON);
+
+	local_irq_restore(flags);
+
+	/*
+	 * i386 code reported the skew here, but the
+	 * count registers were almost certainly out of sync
+	 * so no point in alarming people
+	 */
+	printk("done.\n");
+}
+
+void __init synchronise_count_slave(void)
+{
+	int i;
+	unsigned long flags;
+	unsigned int initcount;
+	int ncpus;
+
+#ifdef CONFIG_MIPS_MT_SMTC
+	/*
+	 * SMTC needs to synchronise per VPE, not per CPU
+	 * ignore for now
+	 */
+	return;
+#endif
+
+	local_irq_save(flags);
+
+	/*
+	 * Not every cpu is online at the time this gets called,
+	 * so we first wait for the master to say everyone is ready
+	 */
+
+	while (!atomic_read(&count_start_flag))
+		mb();
+
+	/* Count will be initialised to expirelo for all CPU's */
+	initcount = expirelo;
+
+	ncpus = num_online_cpus();
+	for (i = 0; i < NR_LOOPS; i++) {
+		atomic_inc(&count_count_start);
+		while (atomic_read(&count_count_start) != ncpus)
+			mb();
+
+		/*
+		 * Everyone initialises count in the last loop:
+		 */
+		if (i == NR_LOOPS-1)
+			write_c0_count(initcount);
+
+		atomic_inc(&count_count_stop);
+		while (atomic_read(&count_count_stop) != ncpus)
+			mb();
+	}
+	/* Arrange for an interrupt in a short while */
+	write_c0_compare(read_c0_count() + COUNTON);
+
+	local_irq_restore(flags);
+}
+#undef NR_LOOPS
+#endif
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index b45a7093ca2d..1f467d534642 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -38,7 +38,6 @@ int __weak rtc_mips_set_time(unsigned long sec)
 {
 	return 0;
 }
-EXPORT_SYMBOL(rtc_mips_set_time);
 
 int __weak rtc_mips_set_mmss(unsigned long nowtime)
 {
@@ -50,13 +49,11 @@ int update_persistent_clock(struct timespec now)
 	return rtc_mips_set_mmss(now.tv_sec);
 }
 
-int null_perf_irq(void)
+static int null_perf_irq(void)
 {
 	return 0;
 }
 
-EXPORT_SYMBOL(null_perf_irq);
-
 int (*perf_irq)(void) = null_perf_irq;
 
 EXPORT_SYMBOL(perf_irq);
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 984c0d0a7b4d..cb8b0e2c7954 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -22,6 +22,7 @@
 #include <linux/kallsyms.h>
 #include <linux/bootmem.h>
 #include <linux/interrupt.h>
+#include <linux/ptrace.h>
 
 #include <asm/bootinfo.h>
 #include <asm/branch.h>
@@ -80,19 +81,22 @@ void (*board_bind_eic_interrupt)(int irq, int regset);
 
 static void show_raw_backtrace(unsigned long reg29)
 {
-	unsigned long *sp = (unsigned long *)reg29;
+	unsigned long *sp = (unsigned long *)(reg29 & ~3);
 	unsigned long addr;
 
 	printk("Call Trace:");
 #ifdef CONFIG_KALLSYMS
 	printk("\n");
 #endif
-	while (!kstack_end(sp)) {
-		addr = *sp++;
-		if (__kernel_text_address(addr))
-			print_ip_sym(addr);
+#define IS_KVA01(a) ((((unsigned int)a) & 0xc0000000) == 0x80000000)
+	if (IS_KVA01(sp)) {
+		while (!kstack_end(sp)) {
+			addr = *sp++;
+			if (__kernel_text_address(addr))
+				print_ip_sym(addr);
+		}
+		printk("\n");
 	}
-	printk("\n");
 }
 
 #ifdef CONFIG_KALLSYMS
@@ -192,16 +196,19 @@ EXPORT_SYMBOL(dump_stack);
 static void show_code(unsigned int __user *pc)
 {
 	long i;
+	unsigned short __user *pc16 = NULL;
 
 	printk("\nCode:");
 
+	if ((unsigned long)pc & 1)
+		pc16 = (unsigned short __user *)((unsigned long)pc & ~1);
 	for(i = -3 ; i < 6 ; i++) {
 		unsigned int insn;
-		if (__get_user(insn, pc + i)) {
+		if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) {
 			printk(" (Bad address in epc)\n");
 			break;
 		}
-		printk("%c%08x%c", (i?' ':'<'), insn, (i?' ':'>'));
+		printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>'));
 	}
 }
 
@@ -311,10 +318,21 @@ void show_regs(struct pt_regs *regs)
 
 void show_registers(const struct pt_regs *regs)
 {
+	const int field = 2 * sizeof(unsigned long);
+
 	__show_regs(regs);
 	print_modules();
-	printk("Process %s (pid: %d, threadinfo=%p, task=%p)\n",
-	        current->comm, task_pid_nr(current), current_thread_info(), current);
+	printk("Process %s (pid: %d, threadinfo=%p, task=%p, tls=%0*lx)\n",
+	       current->comm, current->pid, current_thread_info(), current,
+	      field, current_thread_info()->tp_value);
+	if (cpu_has_userlocal) {
+		unsigned long tls;
+
+		tls = read_c0_userlocal();
+		if (tls != current_thread_info()->tp_value)
+			printk("*HwTLS: %0*lx\n", field, tls);
+	}
+
 	show_stacktrace(current, regs);
 	show_code((unsigned int __user *) regs->cp0_epc);
 	printk("\n");
@@ -657,35 +675,24 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
 	force_sig_info(SIGFPE, &info, current);
 }
 
-asmlinkage void do_bp(struct pt_regs *regs)
+static void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
+	const char *str)
 {
-	unsigned int opcode, bcode;
 	siginfo_t info;
-
-	if (__get_user(opcode, (unsigned int __user *) exception_epc(regs)))
-		goto out_sigsegv;
-
-	/*
-	 * There is the ancient bug in the MIPS assemblers that the break
-	 * code starts left to bit 16 instead to bit 6 in the opcode.
-	 * Gas is bug-compatible, but not always, grrr...
-	 * We handle both cases with a simple heuristics.  --macro
-	 */
-	bcode = ((opcode >> 6) & ((1 << 20) - 1));
-	if (bcode < (1 << 10))
-		bcode <<= 10;
+	char b[40];
 
 	/*
-	 * (A short test says that IRIX 5.3 sends SIGTRAP for all break
-	 * insns, even for break codes that indicate arithmetic failures.
-	 * Weird ...)
+	 * A short test says that IRIX 5.3 sends SIGTRAP for all trap
+	 * insns, even for trap and break codes that indicate arithmetic
+	 * failures.  Weird ...
 	 * But should we continue the brokenness???  --macro
 	 */
-	switch (bcode) {
-	case BRK_OVERFLOW << 10:
-	case BRK_DIVZERO << 10:
-		die_if_kernel("Break instruction in kernel code", regs);
-		if (bcode == (BRK_DIVZERO << 10))
+	switch (code) {
+	case BRK_OVERFLOW:
+	case BRK_DIVZERO:
+		scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
+		die_if_kernel(b, regs);
+		if (code == BRK_DIVZERO)
 			info.si_code = FPE_INTDIV;
 		else
 			info.si_code = FPE_INTOVF;
@@ -695,12 +702,34 @@ asmlinkage void do_bp(struct pt_regs *regs)
 		force_sig_info(SIGFPE, &info, current);
 		break;
 	case BRK_BUG:
-		die("Kernel bug detected", regs);
+		die_if_kernel("Kernel bug detected", regs);
+		force_sig(SIGTRAP, current);
 		break;
 	default:
-		die_if_kernel("Break instruction in kernel code", regs);
+		scnprintf(b, sizeof(b), "%s instruction in kernel code", str);
+		die_if_kernel(b, regs);
 		force_sig(SIGTRAP, current);
 	}
+}
+
+asmlinkage void do_bp(struct pt_regs *regs)
+{
+	unsigned int opcode, bcode;
+
+	if (__get_user(opcode, (unsigned int __user *) exception_epc(regs)))
+		goto out_sigsegv;
+
+	/*
+	 * There is the ancient bug in the MIPS assemblers that the break
+	 * code starts left to bit 16 instead to bit 6 in the opcode.
+	 * Gas is bug-compatible, but not always, grrr...
+	 * We handle both cases with a simple heuristics.  --macro
+	 */
+	bcode = ((opcode >> 6) & ((1 << 20) - 1));
+	if (bcode >= (1 << 10))
+		bcode >>= 10;
+
+	do_trap_or_bp(regs, bcode, "Break");
 	return;
 
 out_sigsegv:
@@ -710,7 +739,6 @@ out_sigsegv:
 asmlinkage void do_tr(struct pt_regs *regs)
 {
 	unsigned int opcode, tcode = 0;
-	siginfo_t info;
 
 	if (__get_user(opcode, (unsigned int __user *) exception_epc(regs)))
 		goto out_sigsegv;
@@ -719,32 +747,7 @@ asmlinkage void do_tr(struct pt_regs *regs)
 	if (!(opcode & OPCODE))
 		tcode = ((opcode >> 6) & ((1 << 10) - 1));
 
-	/*
-	 * (A short test says that IRIX 5.3 sends SIGTRAP for all trap
-	 * insns, even for trap codes that indicate arithmetic failures.
-	 * Weird ...)
-	 * But should we continue the brokenness???  --macro
-	 */
-	switch (tcode) {
-	case BRK_OVERFLOW:
-	case BRK_DIVZERO:
-		die_if_kernel("Trap instruction in kernel code", regs);
-		if (tcode == BRK_DIVZERO)
-			info.si_code = FPE_INTDIV;
-		else
-			info.si_code = FPE_INTOVF;
-		info.si_signo = SIGFPE;
-		info.si_errno = 0;
-		info.si_addr = (void __user *) regs->cp0_epc;
-		force_sig_info(SIGFPE, &info, current);
-		break;
-	case BRK_BUG:
-		die("Kernel bug detected", regs);
-		break;
-	default:
-		die_if_kernel("Trap instruction in kernel code", regs);
-		force_sig(SIGTRAP, current);
-	}
+	do_trap_or_bp(regs, tcode, "Trap");
 	return;
 
 out_sigsegv:
@@ -985,6 +988,21 @@ asmlinkage void do_reserved(struct pt_regs *regs)
 	      (regs->cp0_cause & 0x7f) >> 2);
 }
 
+static int __initdata l1parity = 1;
+static int __init nol1parity(char *s)
+{
+	l1parity = 0;
+	return 1;
+}
+__setup("nol1par", nol1parity);
+static int __initdata l2parity = 1;
+static int __init nol2parity(char *s)
+{
+	l2parity = 0;
+	return 1;
+}
+__setup("nol2par", nol2parity);
+
 /*
  * Some MIPS CPUs can enable/disable for cache parity detection, but do
  * it different ways.
@@ -994,6 +1012,62 @@ static inline void parity_protection_init(void)
 	switch (current_cpu_type()) {
 	case CPU_24K:
 	case CPU_34K:
+	case CPU_74K:
+	case CPU_1004K:
+		{
+#define ERRCTL_PE	0x80000000
+#define ERRCTL_L2P	0x00800000
+			unsigned long errctl;
+			unsigned int l1parity_present, l2parity_present;
+
+			errctl = read_c0_ecc();
+			errctl &= ~(ERRCTL_PE|ERRCTL_L2P);
+
+			/* probe L1 parity support */
+			write_c0_ecc(errctl | ERRCTL_PE);
+			back_to_back_c0_hazard();
+			l1parity_present = (read_c0_ecc() & ERRCTL_PE);
+
+			/* probe L2 parity support */
+			write_c0_ecc(errctl|ERRCTL_L2P);
+			back_to_back_c0_hazard();
+			l2parity_present = (read_c0_ecc() & ERRCTL_L2P);
+
+			if (l1parity_present && l2parity_present) {
+				if (l1parity)
+					errctl |= ERRCTL_PE;
+				if (l1parity ^ l2parity)
+					errctl |= ERRCTL_L2P;
+			} else if (l1parity_present) {
+				if (l1parity)
+					errctl |= ERRCTL_PE;
+			} else if (l2parity_present) {
+				if (l2parity)
+					errctl |= ERRCTL_L2P;
+			} else {
+				/* No parity available */
+			}
+
+			printk(KERN_INFO "Writing ErrCtl register=%08lx\n", errctl);
+
+			write_c0_ecc(errctl);
+			back_to_back_c0_hazard();
+			errctl = read_c0_ecc();
+			printk(KERN_INFO "Readback ErrCtl register=%08lx\n", errctl);
+
+			if (l1parity_present)
+				printk(KERN_INFO "Cache parity protection %sabled\n",
+				       (errctl & ERRCTL_PE) ? "en" : "dis");
+
+			if (l2parity_present) {
+				if (l1parity_present && l1parity)
+					errctl ^= ERRCTL_L2P;
+				printk(KERN_INFO "L2 cache parity protection %sabled\n",
+				       (errctl & ERRCTL_L2P) ? "en" : "dis");
+			}
+		}
+		break;
+
 	case CPU_5KC:
 		write_c0_ecc(0x80000000);
 		back_to_back_c0_hazard();
@@ -1306,6 +1380,17 @@ int cp0_compare_irq;
 int cp0_perfcount_irq;
 EXPORT_SYMBOL_GPL(cp0_perfcount_irq);
 
+static int __cpuinitdata noulri;
+
+static int __init ulri_disable(char *s)
+{
+	pr_info("Disabling ulri\n");
+	noulri = 1;
+
+	return 1;
+}
+__setup("noulri", ulri_disable);
+
 void __cpuinit per_cpu_trap_init(void)
 {
 	unsigned int cpu = smp_processor_id();
@@ -1342,16 +1427,14 @@ void __cpuinit per_cpu_trap_init(void)
 	change_c0_status(ST0_CU|ST0_MX|ST0_RE|ST0_FR|ST0_BEV|ST0_TS|ST0_KX|ST0_SX|ST0_UX,
 			 status_set);
 
-#ifdef CONFIG_CPU_MIPSR2
 	if (cpu_has_mips_r2) {
 		unsigned int enable = 0x0000000f;
 
-		if (cpu_has_userlocal)
+		if (!noulri && cpu_has_userlocal)
 			enable |= (1 << 29);
 
 		write_c0_hwrena(enable);
 	}
-#endif
 
 #ifdef CONFIG_MIPS_MT_SMTC
 	if (!secondaryTC) {
diff --git a/arch/mips/math-emu/ieee754dp.h b/arch/mips/math-emu/ieee754dp.h
index 8977eb585a37..762786538449 100644
--- a/arch/mips/math-emu/ieee754dp.h
+++ b/arch/mips/math-emu/ieee754dp.h
@@ -46,7 +46,7 @@
 #define DPDNORMX	DPDNORMx(xm, xe)
 #define DPDNORMY	DPDNORMx(ym, ye)
 
-static __inline ieee754dp builddp(int s, int bx, u64 m)
+static inline ieee754dp builddp(int s, int bx, u64 m)
 {
 	ieee754dp r;
 
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 9917c1e4d947..d9e3586b5bce 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -51,7 +51,7 @@
 #define SPDNORMX	SPDNORMx(xm, xe)
 #define SPDNORMY	SPDNORMx(ym, ye)
 
-static __inline ieee754sp buildsp(int s, int bx, unsigned m)
+static inline ieee754sp buildsp(int s, int bx, unsigned m)
 {
 	ieee754sp r;
 
diff --git a/arch/mips/mips-boards/generic/Makefile b/arch/mips/mips-boards/generic/Makefile
index b31d8dfed1be..f7f87fc09d1e 100644
--- a/arch/mips/mips-boards/generic/Makefile
+++ b/arch/mips/mips-boards/generic/Makefile
@@ -20,6 +20,7 @@
 
 obj-y				:= reset.o display.o init.o memory.o \
 				   cmdline.o time.o
+obj-y				+= amon.o
 
 obj-$(CONFIG_EARLY_PRINTK)	+= console.o
 obj-$(CONFIG_PCI)		+= pci.o
diff --git a/arch/mips/mips-boards/generic/amon.c b/arch/mips/mips-boards/generic/amon.c
new file mode 100644
index 000000000000..b7633fda4180
--- /dev/null
+++ b/arch/mips/mips-boards/generic/amon.c
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2007  MIPS Technologies, Inc.
+ *	All rights reserved.
+
+ *  This program is free software; you can distribute it and/or modify it
+ *  under the terms of the GNU General Public License (Version 2) as
+ *  published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope it will be useful, but WITHOUT
+ *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ *  for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+ *
+ * Arbitrary Monitor interface
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm-mips/addrspace.h>
+#include <asm-mips/mips-boards/launch.h>
+#include <asm-mips/mipsmtregs.h>
+
+int amon_cpu_avail(int cpu)
+{
+	struct cpulaunch *launch = (struct cpulaunch *)KSEG0ADDR(CPULAUNCH);
+
+	if (cpu < 0 || cpu >= NCPULAUNCH) {
+		pr_debug("avail: cpu%d is out of range\n", cpu);
+		return 0;
+	}
+
+	launch += cpu;
+	if (!(launch->flags & LAUNCH_FREADY)) {
+		pr_debug("avail: cpu%d is not ready\n", cpu);
+		return 0;
+	}
+	if (launch->flags & (LAUNCH_FGO|LAUNCH_FGONE)) {
+		pr_debug("avail: too late.. cpu%d is already gone\n", cpu);
+		return 0;
+	}
+
+	return 1;
+}
+
+void amon_cpu_start(int cpu,
+		    unsigned long pc, unsigned long sp,
+		    unsigned long gp, unsigned long a0)
+{
+	volatile struct cpulaunch *launch =
+		(struct cpulaunch  *)KSEG0ADDR(CPULAUNCH);
+
+	if (!amon_cpu_avail(cpu))
+		return;
+	if (cpu == smp_processor_id()) {
+		pr_debug("launch: I am cpu%d!\n", cpu);
+		return;
+	}
+	launch += cpu;
+
+	pr_debug("launch: starting cpu%d\n", cpu);
+
+	launch->pc = pc;
+	launch->gp = gp;
+	launch->sp = sp;
+	launch->a0 = a0;
+
+	/* Make sure target sees parameters before the go bit */
+	smp_mb();
+
+	launch->flags |= LAUNCH_FGO;
+	while ((launch->flags & LAUNCH_FGONE) == 0)
+		;
+	pr_debug("launch: cpu%d gone!\n", cpu);
+}
diff --git a/arch/mips/mips-boards/generic/init.c b/arch/mips/mips-boards/generic/init.c
index 1695dca5506b..83b9dc739203 100644
--- a/arch/mips/mips-boards/generic/init.c
+++ b/arch/mips/mips-boards/generic/init.c
@@ -226,7 +226,7 @@ void __init kgdb_config(void)
 }
 #endif
 
-void __init mips_nmi_setup(void)
+static void __init mips_nmi_setup(void)
 {
 	void *base;
 	extern char except_vec_nmi;
@@ -238,7 +238,7 @@ void __init mips_nmi_setup(void)
 	flush_icache_range((unsigned long)base, (unsigned long)base + 0x80);
 }
 
-void __init mips_ejtag_setup(void)
+static void __init mips_ejtag_setup(void)
 {
 	void *base;
 	extern char except_vec_ejtag_debug;
@@ -295,15 +295,21 @@ void __init prom_init(void)
 			break;
 		case MIPS_REVISION_CORID_CORE_MSC:
 		case MIPS_REVISION_CORID_CORE_FPGA2:
-		case MIPS_REVISION_CORID_CORE_FPGA3:
-		case MIPS_REVISION_CORID_CORE_FPGA4:
 		case MIPS_REVISION_CORID_CORE_24K:
-		case MIPS_REVISION_CORID_CORE_EMUL_MSC:
+			/*
+			 * SOCit/ROCit support is essentially identical
+			 * but make an attempt to distinguish them
+			 */
 			mips_revision_sconid = MIPS_REVISION_SCON_SOCIT;
 			break;
+		case MIPS_REVISION_CORID_CORE_FPGA3:
+		case MIPS_REVISION_CORID_CORE_FPGA4:
+		case MIPS_REVISION_CORID_CORE_FPGA5:
+		case MIPS_REVISION_CORID_CORE_EMUL_MSC:
 		default:
-			mips_display_message("CC Error");
-			while (1);   /* We die here... */
+			/* See above */
+			mips_revision_sconid = MIPS_REVISION_SCON_ROCIT;
+			break;
 		}
 	}
 
@@ -418,6 +424,9 @@ void __init prom_init(void)
 #ifdef CONFIG_SERIAL_8250_CONSOLE
 	console_config();
 #endif
+#ifdef CONFIG_MIPS_CMP
+	register_smp_ops(&cmp_smp_ops);
+#endif
 #ifdef CONFIG_MIPS_MT_SMP
 	register_smp_ops(&vsmp_smp_ops);
 #endif
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c
index dc272c188233..5e443bba5662 100644
--- a/arch/mips/mips-boards/generic/memory.c
+++ b/arch/mips/mips-boards/generic/memory.c
@@ -37,7 +37,7 @@ enum yamon_memtypes {
 	yamon_prom,
 	yamon_free,
 };
-struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
+static struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS];
 
 #ifdef DEBUG
 static char *mtypes[3] = {
@@ -50,7 +50,7 @@ static char *mtypes[3] = {
 /* determined physical memory size, not overridden by command line args  */
 unsigned long physical_memsize = 0L;
 
-struct prom_pmemblock * __init prom_getmdesc(void)
+static struct prom_pmemblock * __init prom_getmdesc(void)
 {
 	char *memsize_str;
 	unsigned int memsize;
diff --git a/arch/mips/mips-boards/generic/time.c b/arch/mips/mips-boards/generic/time.c
index b50e0fc406ac..008fd82b5840 100644
--- a/arch/mips/mips-boards/generic/time.c
+++ b/arch/mips/mips-boards/generic/time.c
@@ -55,16 +55,36 @@
 unsigned long cpu_khz;
 
 static int mips_cpu_timer_irq;
+static int mips_cpu_perf_irq;
 extern int cp0_perfcount_irq;
 
+DEFINE_PER_CPU(unsigned int, tickcount);
+#define tickcount_this_cpu __get_cpu_var(tickcount)
+static unsigned long ledbitmask;
+
 static void mips_timer_dispatch(void)
 {
+#if defined(CONFIG_MIPS_MALTA) || defined(CONFIG_MIPS_ATLAS)
+	/*
+	 * Yes, this is very tacky, won't work as expected with SMTC and
+	 * dyntick will break it,
+	 * but it gives me a nice warm feeling during debug
+	 */
+#define LEDBAR 0xbf000408
+	if (tickcount_this_cpu++ >= HZ) {
+		tickcount_this_cpu = 0;
+		change_bit(smp_processor_id(), &ledbitmask);
+		smp_wmb(); /* Make sure every one else sees the change */
+		/* This will pick up any recent changes made by other CPU's */
+		*(unsigned int *)LEDBAR = ledbitmask;
+	}
+#endif
 	do_IRQ(mips_cpu_timer_irq);
 }
 
 static void mips_perf_dispatch(void)
 {
-	do_IRQ(cp0_perfcount_irq);
+	do_IRQ(mips_cpu_perf_irq);
 }
 
 /*
@@ -127,21 +147,20 @@ unsigned long read_persistent_clock(void)
 	return mc146818_get_cmos_time();
 }
 
-void __init plat_perf_setup(void)
+static void __init plat_perf_setup(void)
 {
-	cp0_perfcount_irq = -1;
-
 #ifdef MSC01E_INT_BASE
 	if (cpu_has_veic) {
 		set_vi_handler(MSC01E_INT_PERFCTR, mips_perf_dispatch);
-		cp0_perfcount_irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR;
+		mips_cpu_perf_irq = MSC01E_INT_BASE + MSC01E_INT_PERFCTR;
 	} else
 #endif
 	if (cp0_perfcount_irq >= 0) {
 		if (cpu_has_vint)
 			set_vi_handler(cp0_perfcount_irq, mips_perf_dispatch);
+		mips_cpu_perf_irq = MIPS_CPU_IRQ_BASE + cp0_perfcount_irq;
 #ifdef CONFIG_SMP
-		set_irq_handler(cp0_perfcount_irq, handle_percpu_irq);
+		set_irq_handler(mips_cpu_perf_irq, handle_percpu_irq);
 #endif
 	}
 }
diff --git a/arch/mips/mips-boards/malta/Makefile b/arch/mips/mips-boards/malta/Makefile
index 931ca4600a63..8dc6e2ac4c03 100644
--- a/arch/mips/mips-boards/malta/Makefile
+++ b/arch/mips/mips-boards/malta/Makefile
@@ -22,6 +22,7 @@
 obj-y := malta_int.o malta_platform.o malta_setup.o
 
 obj-$(CONFIG_MTD) += malta_mtd.o
+# FIXME FIXME FIXME
 obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o
 
 EXTRA_CFLAGS += -Werror
diff --git a/arch/mips/mips-boards/malta/malta_int.c b/arch/mips/mips-boards/malta/malta_int.c
index dbe60eb55e29..8c495104b321 100644
--- a/arch/mips/mips-boards/malta/malta_int.c
+++ b/arch/mips/mips-boards/malta/malta_int.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/random.h>
 
+#include <asm/traps.h>
 #include <asm/i8259.h>
 #include <asm/irq_cpu.h>
 #include <asm/irq_regs.h>
@@ -41,6 +42,14 @@
 #include <asm/mips-boards/generic.h>
 #include <asm/mips-boards/msc01_pci.h>
 #include <asm/msc01_ic.h>
+#include <asm/gic.h>
+#include <asm/gcmpregs.h>
+
+int gcmp_present = -1;
+int gic_present;
+static unsigned long _msc01_biu_base;
+static unsigned long _gcmp_base;
+static unsigned int ipi_map[NR_CPUS];
 
 static DEFINE_SPINLOCK(mips_irq_lock);
 
@@ -121,6 +130,17 @@ static void malta_hw0_irqdispatch(void)
 	do_IRQ(MALTA_INT_BASE + irq);
 }
 
+static void malta_ipi_irqdispatch(void)
+{
+	int irq;
+
+	irq = gic_get_int();
+	if (irq < 0)
+		return;  /* interrupt has already been cleared */
+
+	do_IRQ(MIPS_GIC_IRQ_BASE + irq);
+}
+
 static void corehi_irqdispatch(void)
 {
 	unsigned int intedge, intsteer, pcicmd, pcibadaddr;
@@ -257,12 +277,61 @@ asmlinkage void plat_irq_dispatch(void)
 
 	if (irq == MIPSCPU_INT_I8259A)
 		malta_hw0_irqdispatch();
+	else if (gic_present && ((1 << irq) & ipi_map[smp_processor_id()]))
+		malta_ipi_irqdispatch();
 	else if (irq >= 0)
 		do_IRQ(MIPS_CPU_IRQ_BASE + irq);
 	else
 		spurious_interrupt();
 }
 
+#ifdef CONFIG_MIPS_MT_SMP
+
+
+#define GIC_MIPS_CPU_IPI_RESCHED_IRQ	3
+#define GIC_MIPS_CPU_IPI_CALL_IRQ	4
+
+#define MIPS_CPU_IPI_RESCHED_IRQ 0	/* SW int 0 for resched */
+#define C_RESCHED C_SW0
+#define MIPS_CPU_IPI_CALL_IRQ 1		/* SW int 1 for resched */
+#define C_CALL C_SW1
+static int cpu_ipi_resched_irq, cpu_ipi_call_irq;
+
+static void ipi_resched_dispatch(void)
+{
+	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ);
+}
+
+static void ipi_call_dispatch(void)
+{
+	do_IRQ(MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ);
+}
+
+static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
+{
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ipi_call_interrupt(int irq, void *dev_id)
+{
+	smp_call_function_interrupt();
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq_resched = {
+	.handler	= ipi_resched_interrupt,
+	.flags		= IRQF_DISABLED|IRQF_PERCPU,
+	.name		= "IPI_resched"
+};
+
+static struct irqaction irq_call = {
+	.handler	= ipi_call_interrupt,
+	.flags		= IRQF_DISABLED|IRQF_PERCPU,
+	.name		= "IPI_call"
+};
+#endif /* CONFIG_MIPS_MT_SMP */
+
 static struct irqaction i8259irq = {
 	.handler = no_action,
 	.name = "XT-PIC cascade"
@@ -273,13 +342,13 @@ static struct irqaction corehi_irqaction = {
 	.name = "CoreHi"
 };
 
-msc_irqmap_t __initdata msc_irqmap[] = {
+static msc_irqmap_t __initdata msc_irqmap[] = {
 	{MSC01C_INT_TMR,		MSC01_IRQ_EDGE, 0},
 	{MSC01C_INT_PCI,		MSC01_IRQ_LEVEL, 0},
 };
-int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap);
+static int __initdata msc_nr_irqs = ARRAY_SIZE(msc_irqmap);
 
-msc_irqmap_t __initdata msc_eicirqmap[] = {
+static msc_irqmap_t __initdata msc_eicirqmap[] = {
 	{MSC01E_INT_SW0,		MSC01_IRQ_LEVEL, 0},
 	{MSC01E_INT_SW1,		MSC01_IRQ_LEVEL, 0},
 	{MSC01E_INT_I8259A,		MSC01_IRQ_LEVEL, 0},
@@ -291,15 +360,90 @@ msc_irqmap_t __initdata msc_eicirqmap[] = {
 	{MSC01E_INT_PERFCTR,		MSC01_IRQ_LEVEL, 0},
 	{MSC01E_INT_CPUCTR,		MSC01_IRQ_LEVEL, 0}
 };
-int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap);
+
+static int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap);
+
+/*
+ * This GIC specific tabular array defines the association between External
+ * Interrupts and CPUs/Core Interrupts. The nature of the External
+ * Interrupts is also defined here - polarity/trigger.
+ */
+static struct gic_intr_map gic_intr_map[] = {
+	{ GIC_EXT_INTR(0), 	X,	X,		X, 		X,		0 },
+	{ GIC_EXT_INTR(1), 	X,	X,		X, 		X,		0 },
+	{ GIC_EXT_INTR(2), 	X,	X,		X, 		X,		0 },
+	{ GIC_EXT_INTR(3), 	0,	GIC_CPU_INT0,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(4), 	0,	GIC_CPU_INT1,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(5), 	0,	GIC_CPU_INT2,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(6), 	0,	GIC_CPU_INT3,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(7), 	0,	GIC_CPU_INT4,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(8), 	0,	GIC_CPU_INT3,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(9), 	0,	GIC_CPU_INT3,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(10), 	X,	X,		X, 		X,		0 },
+	{ GIC_EXT_INTR(11), 	X,	X,		X, 		X,		0 },
+	{ GIC_EXT_INTR(12), 	0,	GIC_CPU_INT3,	GIC_POL_POS, 	GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(13), 	0,	GIC_MAP_TO_NMI_MSK,	GIC_POL_POS, GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(14), 	0,	GIC_MAP_TO_NMI_MSK,	GIC_POL_POS, GIC_TRIG_LEVEL,	0 },
+	{ GIC_EXT_INTR(15), 	X,	X,		X, 		X,		0 },
+	{ GIC_EXT_INTR(16), 	0,	GIC_CPU_INT1,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(17), 	0,	GIC_CPU_INT2,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(18), 	1,	GIC_CPU_INT1,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(19), 	1,	GIC_CPU_INT2,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(20), 	2,	GIC_CPU_INT1,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(21), 	2,	GIC_CPU_INT2,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(22), 	3,	GIC_CPU_INT1,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+	{ GIC_EXT_INTR(23), 	3,	GIC_CPU_INT2,	GIC_POL_POS, GIC_TRIG_EDGE,	1 },
+};
+
+/*
+ * GCMP needs to be detected before any SMP initialisation
+ */
+int __init gcmp_probe(unsigned long addr, unsigned long size)
+{
+	if (gcmp_present >= 0)
+		return gcmp_present;
+
+	_gcmp_base = (unsigned long) ioremap_nocache(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
+	_msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ);
+	gcmp_present = (GCMPGCB(GCMPB) & GCMP_GCB_GCMPB_GCMPBASE_MSK) == GCMP_BASE_ADDR;
+
+	if (gcmp_present)
+		printk(KERN_DEBUG "GCMP present\n");
+	return gcmp_present;
+}
+
+void __init fill_ipi_map(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(gic_intr_map); i++) {
+		if (gic_intr_map[i].ipiflag && (gic_intr_map[i].cpunum != X))
+			ipi_map[gic_intr_map[i].cpunum] |=
+				(1 << (gic_intr_map[i].pin + 2));
+	}
+}
 
 void __init arch_init_irq(void)
 {
+	int gic_present, gcmp_present;
+
 	init_i8259_irqs();
 
 	if (!cpu_has_veic)
 		mips_cpu_irq_init();
 
+	gcmp_present = gcmp_probe(GCMP_BASE_ADDR, GCMP_ADDRSPACE_SZ);
+	if (gcmp_present)  {
+		GCMPGCB(GICBA) = GIC_BASE_ADDR | GCMP_GCB_GICBA_EN_MSK;
+		gic_present = 1;
+	} else {
+		_msc01_biu_base = (unsigned long) ioremap_nocache(MSC01_BIU_REG_BASE, MSC01_BIU_ADDRSPACE_SZ);
+		gic_present = (REG(_msc01_biu_base, MSC01_SC_CFG) &
+		MSC01_SC_CFG_GICPRES_MSK) >> MSC01_SC_CFG_GICPRES_SHF;
+	}
+	if (gic_present)
+		printk(KERN_DEBUG "GIC present\n");
+
 	switch (mips_revision_sconid) {
 	case MIPS_REVISION_SCON_SOCIT:
 	case MIPS_REVISION_SCON_ROCIT:
@@ -360,4 +504,206 @@ void __init arch_init_irq(void)
 		setup_irq(MIPS_CPU_IRQ_BASE+MIPSCPU_INT_COREHI,
 						&corehi_irqaction);
 	}
+
+#if defined(CONFIG_MIPS_MT_SMP)
+	if (gic_present) {
+		/* FIXME */
+		int i;
+		struct {
+			unsigned int resched;
+			unsigned int call;
+		} ipiirq[] = {
+			{
+				.resched = GIC_IPI_EXT_INTR_RESCHED_VPE0,
+				.call =  GIC_IPI_EXT_INTR_CALLFNC_VPE0},
+			{
+				.resched = GIC_IPI_EXT_INTR_RESCHED_VPE1,
+				.call =  GIC_IPI_EXT_INTR_CALLFNC_VPE1
+			}, {
+				.resched = GIC_IPI_EXT_INTR_RESCHED_VPE2,
+				.call =  GIC_IPI_EXT_INTR_CALLFNC_VPE2
+			}, {
+				.resched = GIC_IPI_EXT_INTR_RESCHED_VPE3,
+				.call =  GIC_IPI_EXT_INTR_CALLFNC_VPE3
+			}
+		};
+#define NIPI (sizeof(ipiirq)/sizeof(ipiirq[0]))
+		fill_ipi_map();
+		gic_init(GIC_BASE_ADDR, GIC_ADDRSPACE_SZ, gic_intr_map, ARRAY_SIZE(gic_intr_map), MIPS_GIC_IRQ_BASE);
+		if (!gcmp_present) {
+			/* Enable the GIC */
+			i = REG(_msc01_biu_base, MSC01_SC_CFG);
+			REG(_msc01_biu_base, MSC01_SC_CFG) =
+				(i | (0x1 << MSC01_SC_CFG_GICENA_SHF));
+			pr_debug("GIC Enabled\n");
+		}
+
+		/* set up ipi interrupts */
+		if (cpu_has_vint) {
+			set_vi_handler(MIPSCPU_INT_IPI0, malta_ipi_irqdispatch);
+			set_vi_handler(MIPSCPU_INT_IPI1, malta_ipi_irqdispatch);
+		}
+		/* Argh.. this really needs sorting out.. */
+		printk("CPU%d: status register was %08x\n", smp_processor_id(), read_c0_status());
+		write_c0_status(read_c0_status() | STATUSF_IP3 | STATUSF_IP4);
+		printk("CPU%d: status register now %08x\n", smp_processor_id(), read_c0_status());
+		write_c0_status(0x1100dc00);
+		printk("CPU%d: status register frc %08x\n", smp_processor_id(), read_c0_status());
+		for (i = 0; i < NIPI; i++) {
+			setup_irq(MIPS_GIC_IRQ_BASE + ipiirq[i].resched, &irq_resched);
+			setup_irq(MIPS_GIC_IRQ_BASE + ipiirq[i].call, &irq_call);
+
+			set_irq_handler(MIPS_GIC_IRQ_BASE + ipiirq[i].resched, handle_percpu_irq);
+			set_irq_handler(MIPS_GIC_IRQ_BASE + ipiirq[i].call, handle_percpu_irq);
+		}
+	} else {
+		/* set up ipi interrupts */
+		if (cpu_has_veic) {
+			set_vi_handler (MSC01E_INT_SW0, ipi_resched_dispatch);
+			set_vi_handler (MSC01E_INT_SW1, ipi_call_dispatch);
+			cpu_ipi_resched_irq = MSC01E_INT_SW0;
+			cpu_ipi_call_irq = MSC01E_INT_SW1;
+		} else {
+			if (cpu_has_vint) {
+				set_vi_handler (MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch);
+				set_vi_handler (MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch);
+			}
+			cpu_ipi_resched_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_RESCHED_IRQ;
+			cpu_ipi_call_irq = MIPS_CPU_IRQ_BASE + MIPS_CPU_IPI_CALL_IRQ;
+		}
+
+		setup_irq(cpu_ipi_resched_irq, &irq_resched);
+		setup_irq(cpu_ipi_call_irq, &irq_call);
+
+		set_irq_handler(cpu_ipi_resched_irq, handle_percpu_irq);
+		set_irq_handler(cpu_ipi_call_irq, handle_percpu_irq);
+	}
+#endif
+}
+
+void malta_be_init(void)
+{
+	if (gcmp_present) {
+		/* Could change CM error mask register */
+	}
+}
+
+
+static char *tr[8] = {
+	"mem",	"gcr",	"gic",	"mmio",
+	"0x04",	"0x05",	"0x06",	"0x07"
+};
+
+static char *mcmd[32] = {
+	[0x00] = "0x00",
+	[0x01] = "Legacy Write",
+	[0x02] = "Legacy Read",
+	[0x03] = "0x03",
+	[0x04] = "0x04",
+	[0x05] = "0x05",
+	[0x06] = "0x06",
+	[0x07] = "0x07",
+	[0x08] = "Coherent Read Own",
+	[0x09] = "Coherent Read Share",
+	[0x0a] = "Coherent Read Discard",
+	[0x0b] = "Coherent Ready Share Always",
+	[0x0c] = "Coherent Upgrade",
+	[0x0d] = "Coherent Writeback",
+	[0x0e] = "0x0e",
+	[0x0f] = "0x0f",
+	[0x10] = "Coherent Copyback",
+	[0x11] = "Coherent Copyback Invalidate",
+	[0x12] = "Coherent Invalidate",
+	[0x13] = "Coherent Write Invalidate",
+	[0x14] = "Coherent Completion Sync",
+	[0x15] = "0x15",
+	[0x16] = "0x16",
+	[0x17] = "0x17",
+	[0x18] = "0x18",
+	[0x19] = "0x19",
+	[0x1a] = "0x1a",
+	[0x1b] = "0x1b",
+	[0x1c] = "0x1c",
+	[0x1d] = "0x1d",
+	[0x1e] = "0x1e",
+	[0x1f] = "0x1f"
+};
+
+static char *core[8] = {
+	"Invalid/OK", 	"Invalid/Data",
+	"Shared/OK",	"Shared/Data",
+	"Modified/OK",	"Modified/Data",
+	"Exclusive/OK",	"Exclusive/Data"
+};
+
+static char *causes[32] = {
+	"None", "GC_WR_ERR", "GC_RD_ERR", "COH_WR_ERR",
+	"COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07",
+	"0x08", "0x09", "0x0a", "0x0b",
+	"0x0c", "0x0d", "0x0e", "0x0f",
+	"0x10", "0x11", "0x12", "0x13",
+	"0x14", "0x15", "0x16", "INTVN_WR_ERR",
+	"INTVN_RD_ERR", "0x19", "0x1a", "0x1b",
+	"0x1c", "0x1d", "0x1e", "0x1f"
+};
+
+int malta_be_handler(struct pt_regs *regs, int is_fixup)
+{
+	/* This duplicates the handling in do_be which seems wrong */
+	int retval = is_fixup ? MIPS_BE_FIXUP : MIPS_BE_FATAL;
+
+	if (gcmp_present) {
+		unsigned long cm_error = GCMPGCB(GCMEC);
+		unsigned long cm_addr = GCMPGCB(GCMEA);
+		unsigned long cm_other = GCMPGCB(GCMEO);
+		unsigned long cause, ocause;
+		char buf[256];
+
+		cause = (cm_error & GCMP_GCB_GMEC_ERROR_TYPE_MSK);
+		if (cause != 0) {
+			cause >>= GCMP_GCB_GMEC_ERROR_TYPE_SHF;
+			if (cause < 16) {
+				unsigned long cca_bits = (cm_error >> 15) & 7;
+				unsigned long tr_bits = (cm_error >> 12) & 7;
+				unsigned long mcmd_bits = (cm_error >> 7) & 0x1f;
+				unsigned long stag_bits = (cm_error >> 3) & 15;
+				unsigned long sport_bits = (cm_error >> 0) & 7;
+
+				snprintf(buf, sizeof(buf),
+					 "CCA=%lu TR=%s MCmd=%s STag=%lu "
+					 "SPort=%lu\n",
+					 cca_bits, tr[tr_bits], mcmd[mcmd_bits],
+					 stag_bits, sport_bits);
+			} else {
+				/* glob state & sresp together */
+				unsigned long c3_bits = (cm_error >> 18) & 7;
+				unsigned long c2_bits = (cm_error >> 15) & 7;
+				unsigned long c1_bits = (cm_error >> 12) & 7;
+				unsigned long c0_bits = (cm_error >> 9) & 7;
+				unsigned long sc_bit = (cm_error >> 8) & 1;
+				unsigned long mcmd_bits = (cm_error >> 3) & 0x1f;
+				unsigned long sport_bits = (cm_error >> 0) & 7;
+				snprintf(buf, sizeof(buf),
+					 "C3=%s C2=%s C1=%s C0=%s SC=%s "
+					 "MCmd=%s SPort=%lu\n",
+					 core[c3_bits], core[c2_bits],
+					 core[c1_bits], core[c0_bits],
+					 sc_bit ? "True" : "False",
+					 mcmd[mcmd_bits], sport_bits);
+			}
+
+			ocause = (cm_other & GCMP_GCB_GMEO_ERROR_2ND_MSK) >>
+				 GCMP_GCB_GMEO_ERROR_2ND_SHF;
+
+			printk("CM_ERROR=%08lx %s <%s>\n", cm_error,
+			       causes[cause], buf);
+			printk("CM_ADDR =%08lx\n", cm_addr);
+			printk("CM_OTHER=%08lx %s\n", cm_other, causes[ocause]);
+
+			/* reprime cause register */
+			GCMPGCB(GCMEC) = 0;
+		}
+	}
+
+	return retval;
 }
diff --git a/arch/mips/mips-boards/malta/malta_setup.c b/arch/mips/mips-boards/malta/malta_setup.c
index 2cd8f5734b36..e7cad54936ca 100644
--- a/arch/mips/mips-boards/malta/malta_setup.c
+++ b/arch/mips/mips-boards/malta/malta_setup.c
@@ -1,7 +1,7 @@
 /*
  * Carsten Langgaard, carstenl@mips.com
  * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
- * Copyright (C) Dmitri Vorobiev
+ * Copyright (C) 2008 Dmitri Vorobiev
  *
  *  This program is free software; you can distribute it and/or modify it
  *  under the terms of the GNU General Public License (Version 2) as
@@ -36,7 +36,10 @@
 #include <linux/console.h>
 #endif
 
-struct resource standard_io_resources[] = {
+extern void malta_be_init(void);
+extern int malta_be_handler(struct pt_regs *regs, int is_fixup);
+
+static struct resource standard_io_resources[] = {
 	{
 		.name = "dma1",
 		.start = 0x00,
@@ -220,4 +223,7 @@ void __init plat_mem_setup(void)
 	screen_info_setup();
 #endif
 	mips_reboot_setup();
+
+	board_be_init = malta_be_init;
+	board_be_handler = malta_be_handler;
 }
diff --git a/arch/mips/mipssim/sim_setup.c b/arch/mips/mipssim/sim_setup.c
index d49fe73426b7..7c7148ef2646 100644
--- a/arch/mips/mipssim/sim_setup.c
+++ b/arch/mips/mipssim/sim_setup.c
@@ -39,9 +39,6 @@
 static void __init serial_init(void);
 unsigned int _isbonito = 0;
 
-extern void __init sanitize_tlb_entries(void);
-
-
 const char *get_system_type(void)
 {
 	return "MIPSsim";
@@ -55,9 +52,6 @@ void __init plat_mem_setup(void)
 
 	pr_info("Linux started...\n");
 
-#ifdef CONFIG_MIPS_MT_SMP
-	sanitize_tlb_entries();
-#endif
 }
 
 extern struct plat_smp_ops ssmtc_smp_ops;
diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile
index c6f832e0f41c..48731020ca0e 100644
--- a/arch/mips/mm/Makefile
+++ b/arch/mips/mm/Makefile
@@ -4,30 +4,29 @@
 
 obj-y				+= cache.o dma-default.o extable.o fault.o \
 				   init.o pgtable.o tlbex.o tlbex-fault.o \
-				   uasm.o
+				   uasm.o page.o
 
 obj-$(CONFIG_32BIT)		+= ioremap.o pgtable-32.o
 obj-$(CONFIG_64BIT)		+= pgtable-64.o
 obj-$(CONFIG_HIGHMEM)		+= highmem.o
 
-obj-$(CONFIG_CPU_LOONGSON2)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_MIPS32)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_MIPS64)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_NEVADA)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R10000)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R3000)		+= c-r3k.o tlb-r3k.o pg-r4k.o
-obj-$(CONFIG_CPU_R4300)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R4X00)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R5000)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R5432)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_R8000)		+= c-r4k.o cex-gen.o pg-r4k.o tlb-r8k.o
-obj-$(CONFIG_CPU_RM7000)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_RM9000)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_SB1)		+= c-r4k.o cerr-sb1.o cex-sb1.o pg-sb1.o \
-				   tlb-r4k.o
-obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o pg-r4k.o tlb-r3k.o
-obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
-obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o pg-r4k.o tlb-r4k.o
+obj-$(CONFIG_CPU_LOONGSON2)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_MIPS32)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_MIPS64)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_NEVADA)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R10000)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R3000)		+= c-r3k.o tlb-r3k.o
+obj-$(CONFIG_CPU_R4300)		+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R4X00)		+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R5000)		+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R5432)		+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_R8000)		+= c-r4k.o cex-gen.o tlb-r8k.o
+obj-$(CONFIG_CPU_RM7000)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_RM9000)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_SB1)		+= c-r4k.o cerr-sb1.o cex-sb1.o tlb-r4k.o
+obj-$(CONFIG_CPU_TX39XX)	+= c-tx39.o tlb-r3k.o
+obj-$(CONFIG_CPU_TX49XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
+obj-$(CONFIG_CPU_VR41XX)	+= c-r4k.o cex-gen.o tlb-r4k.o
 
 obj-$(CONFIG_IP22_CPU_SCACHE)	+= sc-ip22.o
 obj-$(CONFIG_R5000_CPU_SCACHE)  += sc-r5k.o
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 77aefb4ebedd..643c8bcffff3 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -14,6 +14,7 @@
 #include <linux/linkage.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
+#include <linux/module.h>
 #include <linux/bitops.h>
 
 #include <asm/bcache.h>
@@ -53,6 +54,12 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info,
 	preempt_enable();
 }
 
+#if defined(CONFIG_MIPS_CMP)
+#define cpu_has_safe_index_cacheops 0
+#else
+#define cpu_has_safe_index_cacheops 1
+#endif
+
 /*
  * Must die.
  */
@@ -481,6 +488,8 @@ static inline void local_r4k_flush_cache_page(void *args)
 
 	if (cpu_has_dc_aliases || (exec && !cpu_has_ic_fills_f_dc)) {
 		r4k_blast_dcache_page(addr);
+		if (exec && !cpu_icache_snoops_remote_store)
+			r4k_blast_scache_page(addr);
 	}
 	if (exec) {
 		if (vaddr && cpu_has_vtag_icache && mm == current->active_mm) {
@@ -583,7 +592,7 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
 	 * subset property so we have to flush the primary caches
 	 * explicitly
 	 */
-	if (size >= dcache_size) {
+	if (cpu_has_safe_index_cacheops && size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -606,7 +615,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
 		return;
 	}
 
-	if (size >= dcache_size) {
+	if (cpu_has_safe_index_cacheops && size >= dcache_size) {
 		r4k_blast_dcache();
 	} else {
 		R4600_HIT_CACHEOP_WAR_IMPL;
@@ -968,6 +977,7 @@ static void __cpuinit probe_pcache(void)
 	case CPU_24K:
 	case CPU_34K:
 	case CPU_74K:
+	case CPU_1004K:
 		if ((read_c0_config7() & (1 << 16))) {
 			/* effectively physically indexed dcache,
 			   thus no virtual aliases. */
@@ -1216,9 +1226,25 @@ void au1x00_fixup_config_od(void)
 	}
 }
 
+static int __cpuinitdata cca = -1;
+
+static int __init cca_setup(char *str)
+{
+	get_option(&str, &cca);
+
+	return 1;
+}
+
+__setup("cca=", cca_setup);
+
 static void __cpuinit coherency_setup(void)
 {
-	change_c0_config(CONF_CM_CMASK, CONF_CM_DEFAULT);
+	if (cca < 0 || cca > 7)
+		cca = read_c0_config() & CONF_CM_CMASK;
+	_page_cachable_default = cca << _CACHE_SHIFT;
+
+	pr_debug("Using cache attribute %d\n", cca);
+	change_c0_config(CONF_CM_CMASK, cca);
 
 	/*
 	 * c0_status.cu=0 specifies that updates by the sc instruction use
@@ -1248,6 +1274,20 @@ static void __cpuinit coherency_setup(void)
 	}
 }
 
+#if defined(CONFIG_DMA_NONCOHERENT)
+
+static int __cpuinitdata coherentio;
+
+static int __init setcoherentio(char *str)
+{
+	coherentio = 1;
+
+	return 1;
+}
+
+__setup("coherentio", setcoherentio);
+#endif
+
 void __cpuinit r4k_cache_init(void)
 {
 	extern void build_clear_page(void);
@@ -1307,14 +1347,22 @@ void __cpuinit r4k_cache_init(void)
 	flush_data_cache_page	= r4k_flush_data_cache_page;
 	flush_icache_range	= r4k_flush_icache_range;
 
-#ifdef CONFIG_DMA_NONCOHERENT
-	_dma_cache_wback_inv	= r4k_dma_cache_wback_inv;
-	_dma_cache_wback	= r4k_dma_cache_wback_inv;
-	_dma_cache_inv		= r4k_dma_cache_inv;
+#if defined(CONFIG_DMA_NONCOHERENT)
+	if (coherentio) {
+		_dma_cache_wback_inv	= (void *)cache_noop;
+		_dma_cache_wback	= (void *)cache_noop;
+		_dma_cache_inv		= (void *)cache_noop;
+	} else {
+		_dma_cache_wback_inv	= r4k_dma_cache_wback_inv;
+		_dma_cache_wback	= r4k_dma_cache_wback_inv;
+		_dma_cache_inv		= r4k_dma_cache_inv;
+	}
 #endif
 
 	build_clear_page();
 	build_copy_page();
+#if !defined(CONFIG_MIPS_CMP)
 	local_r4k___flush_cache_all(NULL);
+#endif
 	coherency_setup();
 }
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index f5903679ee6a..034e8506f6ea 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -130,8 +130,28 @@ void __update_cache(struct vm_area_struct *vma, unsigned long address,
 	}
 }
 
-static char cache_panic[] __cpuinitdata =
-	"Yeee, unsupported cache architecture.";
+unsigned long _page_cachable_default;
+EXPORT_SYMBOL_GPL(_page_cachable_default);
+
+static inline void setup_protection_map(void)
+{
+	protection_map[0] = PAGE_NONE;
+	protection_map[1] = PAGE_READONLY;
+	protection_map[2] = PAGE_COPY;
+	protection_map[3] = PAGE_COPY;
+	protection_map[4] = PAGE_READONLY;
+	protection_map[5] = PAGE_READONLY;
+	protection_map[6] = PAGE_COPY;
+	protection_map[7] = PAGE_COPY;
+	protection_map[8] = PAGE_NONE;
+	protection_map[9] = PAGE_READONLY;
+	protection_map[10] = PAGE_SHARED;
+	protection_map[11] = PAGE_SHARED;
+	protection_map[12] = PAGE_READONLY;
+	protection_map[13] = PAGE_READONLY;
+	protection_map[14] = PAGE_SHARED;
+	protection_map[15] = PAGE_SHARED;
+}
 
 void __devinit cpu_cache_init(void)
 {
@@ -139,34 +159,29 @@ void __devinit cpu_cache_init(void)
 		extern void __weak r3k_cache_init(void);
 
 		r3k_cache_init();
-		return;
 	}
 	if (cpu_has_6k_cache) {
 		extern void __weak r6k_cache_init(void);
 
 		r6k_cache_init();
-		return;
 	}
 	if (cpu_has_4k_cache) {
 		extern void __weak r4k_cache_init(void);
 
 		r4k_cache_init();
-		return;
 	}
 	if (cpu_has_8k_cache) {
 		extern void __weak r8k_cache_init(void);
 
 		r8k_cache_init();
-		return;
 	}
 	if (cpu_has_tx39_cache) {
 		extern void __weak tx39_cache_init(void);
 
 		tx39_cache_init();
-		return;
 	}
 
-	panic(cache_panic);
+	setup_protection_map();
 }
 
 int __weak __uncached_access(struct file *file, unsigned long addr)
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index c7aed133d11d..ecd562d2c348 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -142,7 +142,7 @@ void *kmap_coherent(struct page *page, unsigned long addr)
 #endif
 	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
 	pte = mk_pte(page, PAGE_KERNEL);
-#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
+#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
 	entrylo = pte.pte_high;
 #else
 	entrylo = pte_val(pte) >> 6;
@@ -221,7 +221,7 @@ void copy_user_highpage(struct page *to, struct page *from,
 		copy_page(vto, vfrom);
 		kunmap_atomic(vfrom, KM_USER0);
 	}
-	if (((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc) ||
+	if ((!cpu_has_ic_fills_f_dc) ||
 	    pages_do_alias((unsigned long)vto, vaddr & PAGE_MASK))
 		flush_data_cache_page((unsigned long)vto);
 	kunmap_atomic(vto, KM_USER1);
@@ -229,8 +229,6 @@ void copy_user_highpage(struct page *to, struct page *from,
 	smp_wmb();
 }
 
-EXPORT_SYMBOL(copy_user_highpage);
-
 void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len)
@@ -249,8 +247,6 @@ void copy_to_user_page(struct vm_area_struct *vma,
 		flush_cache_page(vma, vaddr, page_to_pfn(page));
 }
 
-EXPORT_SYMBOL(copy_to_user_page);
-
 void copy_from_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
 	unsigned long len)
@@ -267,9 +263,6 @@ void copy_from_user_page(struct vm_area_struct *vma,
 	}
 }
 
-EXPORT_SYMBOL(copy_from_user_page);
-
-
 #ifdef CONFIG_HIGHMEM
 unsigned long highstart_pfn, highend_pfn;
 
diff --git a/arch/mips/mm/page.c b/arch/mips/mm/page.c
new file mode 100644
index 000000000000..d827d6144369
--- /dev/null
+++ b/arch/mips/mm/page.c
@@ -0,0 +1,684 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
+ * Copyright (C) 2007  Maciej W. Rozycki
+ * Copyright (C) 2008  Thiemo Seufer
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/proc_fs.h>
+
+#include <asm/bugs.h>
+#include <asm/cacheops.h>
+#include <asm/inst.h>
+#include <asm/io.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/prefetch.h>
+#include <asm/system.h>
+#include <asm/bootinfo.h>
+#include <asm/mipsregs.h>
+#include <asm/mmu_context.h>
+#include <asm/cpu.h>
+#include <asm/war.h>
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+#include <asm/sibyte/sb1250.h>
+#include <asm/sibyte/sb1250_regs.h>
+#include <asm/sibyte/sb1250_dma.h>
+#endif
+
+#include "uasm.h"
+
+/* Registers used in the assembled routines. */
+#define ZERO 0
+#define AT 2
+#define A0 4
+#define A1 5
+#define A2 6
+#define T0 8
+#define T1 9
+#define T2 10
+#define T3 11
+#define T9 25
+#define RA 31
+
+/* Handle labels (which must be positive integers). */
+enum label_id {
+	label_clear_nopref = 1,
+	label_clear_pref,
+	label_copy_nopref,
+	label_copy_pref_both,
+	label_copy_pref_store,
+};
+
+UASM_L_LA(_clear_nopref)
+UASM_L_LA(_clear_pref)
+UASM_L_LA(_copy_nopref)
+UASM_L_LA(_copy_pref_both)
+UASM_L_LA(_copy_pref_store)
+
+/* We need one branch and therefore one relocation per target label. */
+static struct uasm_label __cpuinitdata labels[5];
+static struct uasm_reloc __cpuinitdata relocs[5];
+
+#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
+#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x058 bytes
+ * R4600 v1.7:				0x05c bytes
+ * R4600 v2.0:				0x060 bytes
+ * With prefetching, 16 word strides	0x120 bytes
+ */
+
+static u32 clear_page_array[0x120 / 4];
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
+#else
+void clear_page(void *page) __attribute__((alias("clear_page_array")));
+#endif
+
+EXPORT_SYMBOL(clear_page);
+
+/*
+ * Maximum sizes:
+ *
+ * R4000 128 bytes S-cache:		0x11c bytes
+ * R4600 v1.7:				0x080 bytes
+ * R4600 v2.0:				0x07c bytes
+ * With prefetching, 16 word strides	0x540 bytes
+ */
+static u32 copy_page_array[0x540 / 4];
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+void
+copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
+#else
+void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
+#endif
+
+EXPORT_SYMBOL(copy_page);
+
+
+static int pref_bias_clear_store __cpuinitdata;
+static int pref_bias_copy_load __cpuinitdata;
+static int pref_bias_copy_store __cpuinitdata;
+
+static u32 pref_src_mode __cpuinitdata;
+static u32 pref_dst_mode __cpuinitdata;
+
+static int clear_word_size __cpuinitdata;
+static int copy_word_size __cpuinitdata;
+
+static int half_clear_loop_size __cpuinitdata;
+static int half_copy_loop_size __cpuinitdata;
+
+static int cache_line_size __cpuinitdata;
+#define cache_line_mask() (cache_line_size - 1)
+
+static inline void __cpuinit
+pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
+{
+	if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
+		if (off > 0x7fff) {
+			uasm_i_lui(buf, T9, uasm_rel_hi(off));
+			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+		} else
+			uasm_i_addiu(buf, T9, ZERO, off);
+		uasm_i_daddu(buf, reg1, reg2, T9);
+	} else {
+		if (off > 0x7fff) {
+			uasm_i_lui(buf, T9, uasm_rel_hi(off));
+			uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
+			UASM_i_ADDU(buf, reg1, reg2, T9);
+		} else
+			UASM_i_ADDIU(buf, reg1, reg2, off);
+	}
+}
+
+static void __cpuinit set_prefetch_parameters(void)
+{
+	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
+		clear_word_size = 8;
+	else
+		clear_word_size = 4;
+
+	if (cpu_has_64bit_gp_regs)
+		copy_word_size = 8;
+	else
+		copy_word_size = 4;
+
+	/*
+	 * The pref's used here are using "streaming" hints, which cause the
+	 * copied data to be kicked out of the cache sooner.  A page copy often
+	 * ends up copying a lot more data than is commonly used, so this seems
+	 * to make sense in terms of reducing cache pollution, but I've no real
+	 * performance data to back this up.
+	 */
+	if (cpu_has_prefetch) {
+		/*
+		 * XXX: Most prefetch bias values in here are based on
+		 * guesswork.
+		 */
+		cache_line_size = cpu_dcache_line_size();
+		switch (current_cpu_type()) {
+		case CPU_TX49XX:
+			/* TX49 supports only Pref_Load */
+			pref_bias_copy_load = 256;
+			break;
+
+		case CPU_RM9000:
+			/*
+			 * As a workaround for erratum G105 which make the
+			 * PrepareForStore hint unusable we fall back to
+			 * StoreRetained on the RM9000.  Once it is known which
+			 * versions of the RM9000 we'll be able to condition-
+			 * alize this.
+			 */
+
+		case CPU_R10000:
+		case CPU_R12000:
+		case CPU_R14000:
+			/*
+			 * Those values have been experimentally tuned for an
+			 * Origin 200.
+			 */
+			pref_bias_clear_store = 512;
+			pref_bias_copy_load = 256;
+			pref_bias_copy_store = 256;
+			pref_src_mode = Pref_LoadStreamed;
+			pref_dst_mode = Pref_StoreStreamed;
+			break;
+
+		case CPU_SB1:
+		case CPU_SB1A:
+			pref_bias_clear_store = 128;
+			pref_bias_copy_load = 128;
+			pref_bias_copy_store = 128;
+			/*
+			 * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
+			 * hints are broken.
+			 */
+			if (current_cpu_type() == CPU_SB1 &&
+			    (current_cpu_data.processor_id & 0xff) < 0x02) {
+				pref_src_mode = Pref_Load;
+				pref_dst_mode = Pref_Store;
+			} else {
+				pref_src_mode = Pref_LoadStreamed;
+				pref_dst_mode = Pref_StoreStreamed;
+			}
+			break;
+
+		default:
+			pref_bias_clear_store = 128;
+			pref_bias_copy_load = 256;
+			pref_bias_copy_store = 128;
+			pref_src_mode = Pref_LoadStreamed;
+			pref_dst_mode = Pref_PrepareForStore;
+			break;
+		}
+	} else {
+		if (cpu_has_cache_cdex_s)
+			cache_line_size = cpu_scache_line_size();
+		else if (cpu_has_cache_cdex_p)
+			cache_line_size = cpu_dcache_line_size();
+	}
+	/*
+	 * Too much unrolling will overflow the available space in
+	 * clear_space_array / copy_page_array. 8 words sounds generous,
+	 * but a R4000 with 128 byte L2 line length can exceed even that.
+	 */
+	half_clear_loop_size = min(8 * clear_word_size,
+				   max(cache_line_size >> 1,
+				       4 * clear_word_size));
+	half_copy_loop_size = min(8 * copy_word_size,
+				  max(cache_line_size >> 1,
+				      4 * copy_word_size));
+}
+
+static void __cpuinit build_clear_store(u32 **buf, int off)
+{
+	if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
+		uasm_i_sd(buf, ZERO, off, A0);
+	} else {
+		uasm_i_sw(buf, ZERO, off, A0);
+	}
+}
+
+static inline void __cpuinit build_clear_pref(u32 **buf, int off)
+{
+	if (off & cache_line_mask())
+		return;
+
+	if (pref_bias_clear_store) {
+		uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
+			    A0);
+	} else if (cpu_has_cache_cdex_s) {
+		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+	} else if (cpu_has_cache_cdex_p) {
+		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+			uasm_i_nop(buf);
+			uasm_i_nop(buf);
+			uasm_i_nop(buf);
+			uasm_i_nop(buf);
+		}
+
+		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+			uasm_i_lw(buf, ZERO, ZERO, AT);
+
+		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+	}
+}
+
+void __cpuinit build_clear_page(void)
+{
+	int off;
+	u32 *buf = (u32 *)&clear_page_array;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	int i;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	set_prefetch_parameters();
+
+	/*
+	 * This algorithm makes the following assumptions:
+	 *   - The prefetch bias is a multiple of 2 words.
+	 *   - The prefetch bias is less than one page.
+	 */
+	BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
+	BUG_ON(PAGE_SIZE < pref_bias_clear_store);
+
+	off = PAGE_SIZE - pref_bias_clear_store;
+	if (off > 0xffff || !pref_bias_clear_store)
+		pg_addiu(&buf, A2, A0, off);
+	else
+		uasm_i_ori(&buf, A2, A0, off);
+
+	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+		uasm_i_lui(&buf, AT, 0xa000);
+
+	off = min(8, pref_bias_clear_store / cache_line_size) *
+	      cache_line_size;
+	while (off) {
+		build_clear_pref(&buf, -off);
+		off -= cache_line_size;
+	}
+	uasm_l_clear_pref(&l, buf);
+	do {
+		build_clear_pref(&buf, off);
+		build_clear_store(&buf, off);
+		off += clear_word_size;
+	} while (off < half_clear_loop_size);
+	pg_addiu(&buf, A0, A0, 2 * off);
+	off = -off;
+	do {
+		build_clear_pref(&buf, off);
+		if (off == -clear_word_size)
+			uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
+		build_clear_store(&buf, off);
+		off += clear_word_size;
+	} while (off < 0);
+
+	if (pref_bias_clear_store) {
+		pg_addiu(&buf, A2, A0, pref_bias_clear_store);
+		uasm_l_clear_nopref(&l, buf);
+		off = 0;
+		do {
+			build_clear_store(&buf, off);
+			off += clear_word_size;
+		} while (off < half_clear_loop_size);
+		pg_addiu(&buf, A0, A0, 2 * off);
+		off = -off;
+		do {
+			if (off == -clear_word_size)
+				uasm_il_bne(&buf, &r, A0, A2,
+					    label_clear_nopref);
+			build_clear_store(&buf, off);
+			off += clear_word_size;
+		} while (off < 0);
+	}
+
+	uasm_i_jr(&buf, RA);
+	uasm_i_nop(&buf);
+
+	BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array));
+
+	uasm_resolve_relocs(relocs, labels);
+
+	pr_debug("Synthesized clear page handler (%u instructions).\n",
+		 (u32)(buf - clear_page_array));
+
+	pr_debug("\t.set push\n");
+	pr_debug("\t.set noreorder\n");
+	for (i = 0; i < (buf - clear_page_array); i++)
+		pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
+	pr_debug("\t.set pop\n");
+}
+
+static void __cpuinit build_copy_load(u32 **buf, int reg, int off)
+{
+	if (cpu_has_64bit_gp_regs) {
+		uasm_i_ld(buf, reg, off, A1);
+	} else {
+		uasm_i_lw(buf, reg, off, A1);
+	}
+}
+
+static void __cpuinit build_copy_store(u32 **buf, int reg, int off)
+{
+	if (cpu_has_64bit_gp_regs) {
+		uasm_i_sd(buf, reg, off, A0);
+	} else {
+		uasm_i_sw(buf, reg, off, A0);
+	}
+}
+
+static inline void build_copy_load_pref(u32 **buf, int off)
+{
+	if (off & cache_line_mask())
+		return;
+
+	if (pref_bias_copy_load)
+		uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
+}
+
+static inline void build_copy_store_pref(u32 **buf, int off)
+{
+	if (off & cache_line_mask())
+		return;
+
+	if (pref_bias_copy_store) {
+		uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
+			    A0);
+	} else if (cpu_has_cache_cdex_s) {
+		uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
+	} else if (cpu_has_cache_cdex_p) {
+		if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
+			uasm_i_nop(buf);
+			uasm_i_nop(buf);
+			uasm_i_nop(buf);
+			uasm_i_nop(buf);
+		}
+
+		if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+			uasm_i_lw(buf, ZERO, ZERO, AT);
+
+		uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
+	}
+}
+
+void __cpuinit build_copy_page(void)
+{
+	int off;
+	u32 *buf = (u32 *)&copy_page_array;
+	struct uasm_label *l = labels;
+	struct uasm_reloc *r = relocs;
+	int i;
+
+	memset(labels, 0, sizeof(labels));
+	memset(relocs, 0, sizeof(relocs));
+
+	set_prefetch_parameters();
+
+	/*
+	 * This algorithm makes the following assumptions:
+	 *   - All prefetch biases are multiples of 8 words.
+	 *   - The prefetch biases are less than one page.
+	 *   - The store prefetch bias isn't greater than the load
+	 *     prefetch bias.
+	 */
+	BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
+	BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
+	BUG_ON(PAGE_SIZE < pref_bias_copy_load);
+	BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
+
+	off = PAGE_SIZE - pref_bias_copy_load;
+	if (off > 0xffff || !pref_bias_copy_load)
+		pg_addiu(&buf, A2, A0, off);
+	else
+		uasm_i_ori(&buf, A2, A0, off);
+
+	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
+		uasm_i_lui(&buf, AT, 0xa000);
+
+	off = min(8, pref_bias_copy_load / cache_line_size) * cache_line_size;
+	while (off) {
+		build_copy_load_pref(&buf, -off);
+		off -= cache_line_size;
+	}
+	off = min(8, pref_bias_copy_store / cache_line_size) * cache_line_size;
+	while (off) {
+		build_copy_store_pref(&buf, -off);
+		off -= cache_line_size;
+	}
+	uasm_l_copy_pref_both(&l, buf);
+	do {
+		build_copy_load_pref(&buf, off);
+		build_copy_load(&buf, T0, off);
+		build_copy_load_pref(&buf, off + copy_word_size);
+		build_copy_load(&buf, T1, off + copy_word_size);
+		build_copy_load_pref(&buf, off + 2 * copy_word_size);
+		build_copy_load(&buf, T2, off + 2 * copy_word_size);
+		build_copy_load_pref(&buf, off + 3 * copy_word_size);
+		build_copy_load(&buf, T3, off + 3 * copy_word_size);
+		build_copy_store_pref(&buf, off);
+		build_copy_store(&buf, T0, off);
+		build_copy_store_pref(&buf, off + copy_word_size);
+		build_copy_store(&buf, T1, off + copy_word_size);
+		build_copy_store_pref(&buf, off + 2 * copy_word_size);
+		build_copy_store(&buf, T2, off + 2 * copy_word_size);
+		build_copy_store_pref(&buf, off + 3 * copy_word_size);
+		build_copy_store(&buf, T3, off + 3 * copy_word_size);
+		off += 4 * copy_word_size;
+	} while (off < half_copy_loop_size);
+	pg_addiu(&buf, A1, A1, 2 * off);
+	pg_addiu(&buf, A0, A0, 2 * off);
+	off = -off;
+	do {
+		build_copy_load_pref(&buf, off);
+		build_copy_load(&buf, T0, off);
+		build_copy_load_pref(&buf, off + copy_word_size);
+		build_copy_load(&buf, T1, off + copy_word_size);
+		build_copy_load_pref(&buf, off + 2 * copy_word_size);
+		build_copy_load(&buf, T2, off + 2 * copy_word_size);
+		build_copy_load_pref(&buf, off + 3 * copy_word_size);
+		build_copy_load(&buf, T3, off + 3 * copy_word_size);
+		build_copy_store_pref(&buf, off);
+		build_copy_store(&buf, T0, off);
+		build_copy_store_pref(&buf, off + copy_word_size);
+		build_copy_store(&buf, T1, off + copy_word_size);
+		build_copy_store_pref(&buf, off + 2 * copy_word_size);
+		build_copy_store(&buf, T2, off + 2 * copy_word_size);
+		build_copy_store_pref(&buf, off + 3 * copy_word_size);
+		if (off == -(4 * copy_word_size))
+			uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
+		build_copy_store(&buf, T3, off + 3 * copy_word_size);
+		off += 4 * copy_word_size;
+	} while (off < 0);
+
+	if (pref_bias_copy_load - pref_bias_copy_store) {
+		pg_addiu(&buf, A2, A0,
+			 pref_bias_copy_load - pref_bias_copy_store);
+		uasm_l_copy_pref_store(&l, buf);
+		off = 0;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store_pref(&buf, off);
+			build_copy_store(&buf, T0, off);
+			build_copy_store_pref(&buf, off + copy_word_size);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store_pref(&buf, off + 2 * copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			build_copy_store_pref(&buf, off + 3 * copy_word_size);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < half_copy_loop_size);
+		pg_addiu(&buf, A1, A1, 2 * off);
+		pg_addiu(&buf, A0, A0, 2 * off);
+		off = -off;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store_pref(&buf, off);
+			build_copy_store(&buf, T0, off);
+			build_copy_store_pref(&buf, off + copy_word_size);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store_pref(&buf, off + 2 * copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			build_copy_store_pref(&buf, off + 3 * copy_word_size);
+			if (off == -(4 * copy_word_size))
+				uasm_il_bne(&buf, &r, A2, A0,
+					    label_copy_pref_store);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < 0);
+	}
+
+	if (pref_bias_copy_store) {
+		pg_addiu(&buf, A2, A0, pref_bias_copy_store);
+		uasm_l_copy_nopref(&l, buf);
+		off = 0;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store(&buf, T0, off);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < half_copy_loop_size);
+		pg_addiu(&buf, A1, A1, 2 * off);
+		pg_addiu(&buf, A0, A0, 2 * off);
+		off = -off;
+		do {
+			build_copy_load(&buf, T0, off);
+			build_copy_load(&buf, T1, off + copy_word_size);
+			build_copy_load(&buf, T2, off + 2 * copy_word_size);
+			build_copy_load(&buf, T3, off + 3 * copy_word_size);
+			build_copy_store(&buf, T0, off);
+			build_copy_store(&buf, T1, off + copy_word_size);
+			build_copy_store(&buf, T2, off + 2 * copy_word_size);
+			if (off == -(4 * copy_word_size))
+				uasm_il_bne(&buf, &r, A2, A0,
+					    label_copy_nopref);
+			build_copy_store(&buf, T3, off + 3 * copy_word_size);
+			off += 4 * copy_word_size;
+		} while (off < 0);
+	}
+
+	uasm_i_jr(&buf, RA);
+	uasm_i_nop(&buf);
+
+	BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array));
+
+	uasm_resolve_relocs(relocs, labels);
+
+	pr_debug("Synthesized copy page handler (%u instructions).\n",
+		 (u32)(buf - copy_page_array));
+
+	pr_debug("\t.set push\n");
+	pr_debug("\t.set noreorder\n");
+	for (i = 0; i < (buf - copy_page_array); i++)
+		pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
+	pr_debug("\t.set pop\n");
+}
+
+#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
+
+/*
+ * Pad descriptors to cacheline, since each is exclusively owned by a
+ * particular CPU.
+ */
+struct dmadscr {
+	u64 dscr_a;
+	u64 dscr_b;
+	u64 pad_a;
+	u64 pad_b;
+} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
+
+void sb1_dma_init(void)
+{
+	int i;
+
+	for (i = 0; i < DM_NUM_CHANNELS; i++) {
+		const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
+				     V_DM_DSCR_BASE_RINGSZ(1);
+		void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
+
+		__raw_writeq(base_val, base_reg);
+		__raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
+		__raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
+	}
+}
+
+void clear_page(void *page)
+{
+	u64 to_phys = CPHYSADDR((unsigned long)page);
+	unsigned int cpu = smp_processor_id();
+
+	/* if the page is not in KSEG0, use old way */
+	if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
+		return clear_page_cpu(page);
+
+	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
+				 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
+	page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+	/*
+	 * Don't really want to do it this way, but there's no
+	 * reliable way to delay completion detection.
+	 */
+	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
+		 & M_DM_DSCR_BASE_INTERRUPT))
+		;
+	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+void copy_page(void *to, void *from)
+{
+	u64 from_phys = CPHYSADDR((unsigned long)from);
+	u64 to_phys = CPHYSADDR((unsigned long)to);
+	unsigned int cpu = smp_processor_id();
+
+	/* if any page is not in KSEG0, use old way */
+	if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
+	    || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
+		return copy_page_cpu(to, from);
+
+	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
+				 M_DM_DSCRA_INTERRUPT;
+	page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
+	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
+
+	/*
+	 * Don't really want to do it this way, but there's no
+	 * reliable way to delay completion detection.
+	 */
+	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
+		 & M_DM_DSCR_BASE_INTERRUPT))
+		;
+	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
+}
+
+#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c
deleted file mode 100644
index 455dedb5b39e..000000000000
--- a/arch/mips/mm/pg-r4k.c
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
- * Copyright (C) 2007  Maciej W. Rozycki
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-
-#include <asm/bugs.h>
-#include <asm/cacheops.h>
-#include <asm/inst.h>
-#include <asm/io.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/prefetch.h>
-#include <asm/system.h>
-#include <asm/bootinfo.h>
-#include <asm/mipsregs.h>
-#include <asm/mmu_context.h>
-#include <asm/cpu.h>
-#include <asm/war.h>
-
-#define half_scache_line_size()	(cpu_scache_line_size() >> 1)
-#define cpu_is_r4600_v1_x()	((read_c0_prid() & 0xfffffff0) == 0x00002010)
-#define cpu_is_r4600_v2_x()	((read_c0_prid() & 0xfffffff0) == 0x00002020)
-
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x58 bytes
- * R4600 v1.7:				0x5c bytes
- * R4600 v2.0:				0x60 bytes
- * With prefetching, 16 byte strides	0xa0 bytes
- */
-
-static unsigned int clear_page_array[0x130 / 4];
-
-void clear_page(void * page) __attribute__((alias("clear_page_array")));
-
-EXPORT_SYMBOL(clear_page);
-
-/*
- * Maximum sizes:
- *
- * R4000 128 bytes S-cache:		0x11c bytes
- * R4600 v1.7:				0x080 bytes
- * R4600 v2.0:				0x07c bytes
- * With prefetching, 16 byte strides	0x0b8 bytes
- */
-static unsigned int copy_page_array[0x148 / 4];
-
-void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
-
-EXPORT_SYMBOL(copy_page);
-
-/*
- * This is suboptimal for 32-bit kernels; we assume that R10000 is only used
- * with 64-bit kernels.  The prefetch offsets have been experimentally tuned
- * an Origin 200.
- */
-static int pref_offset_clear __cpuinitdata = 512;
-static int pref_offset_copy  __cpuinitdata = 256;
-
-static unsigned int pref_src_mode __cpuinitdata;
-static unsigned int pref_dst_mode __cpuinitdata;
-
-static int load_offset __cpuinitdata;
-static int store_offset __cpuinitdata;
-
-static unsigned int __cpuinitdata *dest, *epc;
-
-static unsigned int instruction_pending;
-static union mips_instruction delayed_mi;
-
-static void __cpuinit emit_instruction(union mips_instruction mi)
-{
-	if (instruction_pending)
-		*epc++ = delayed_mi.word;
-
-	instruction_pending = 1;
-	delayed_mi = mi;
-}
-
-static inline void flush_delay_slot_or_nop(void)
-{
-	if (instruction_pending) {
-		*epc++ = delayed_mi.word;
-		instruction_pending = 0;
-		return;
-	}
-
-	*epc++ = 0;
-}
-
-static inline unsigned int *label(void)
-{
-	if (instruction_pending) {
-		*epc++ = delayed_mi.word;
-		instruction_pending = 0;
-	}
-
-	return epc;
-}
-
-static inline void build_insn_word(unsigned int word)
-{
-	union mips_instruction mi;
-
-	mi.word		 = word;
-
-	emit_instruction(mi);
-}
-
-static inline void build_nop(void)
-{
-	build_insn_word(0);			/* nop */
-}
-
-static inline void build_src_pref(int advance)
-{
-	if (!(load_offset & (cpu_dcache_line_size() - 1)) && advance) {
-		union mips_instruction mi;
-
-		mi.i_format.opcode     = pref_op;
-		mi.i_format.rs         = 5;		/* $a1 */
-		mi.i_format.rt         = pref_src_mode;
-		mi.i_format.simmediate = load_offset + advance;
-
-		emit_instruction(mi);
-	}
-}
-
-static inline void __build_load_reg(int reg)
-{
-	union mips_instruction mi;
-	unsigned int width;
-
-	if (cpu_has_64bit_gp_regs) {
-		mi.i_format.opcode     = ld_op;
-		width = 8;
-	} else {
-		mi.i_format.opcode     = lw_op;
-		width = 4;
-	}
-	mi.i_format.rs         = 5;		/* $a1 */
-	mi.i_format.rt         = reg;		/* $reg */
-	mi.i_format.simmediate = load_offset;
-
-	load_offset += width;
-	emit_instruction(mi);
-}
-
-static inline void build_load_reg(int reg)
-{
-	if (cpu_has_prefetch)
-		build_src_pref(pref_offset_copy);
-
-	__build_load_reg(reg);
-}
-
-static inline void build_dst_pref(int advance)
-{
-	if (!(store_offset & (cpu_dcache_line_size() - 1)) && advance) {
-		union mips_instruction mi;
-
-		mi.i_format.opcode     = pref_op;
-		mi.i_format.rs         = 4;		/* $a0 */
-		mi.i_format.rt         = pref_dst_mode;
-		mi.i_format.simmediate = store_offset + advance;
-
-		emit_instruction(mi);
-	}
-}
-
-static inline void build_cdex_s(void)
-{
-	union mips_instruction mi;
-
-	if ((store_offset & (cpu_scache_line_size() - 1)))
-		return;
-
-	mi.c_format.opcode     = cache_op;
-	mi.c_format.rs         = 4;		/* $a0 */
-	mi.c_format.c_op       = 3;		/* Create Dirty Exclusive */
-	mi.c_format.cache      = 3;		/* Secondary Data Cache */
-	mi.c_format.simmediate = store_offset;
-
-	emit_instruction(mi);
-}
-
-static inline void build_cdex_p(void)
-{
-	union mips_instruction mi;
-
-	if (store_offset & (cpu_dcache_line_size() - 1))
-		return;
-
-	if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
-		build_nop();
-		build_nop();
-		build_nop();
-		build_nop();
-	}
-
-	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-		build_insn_word(0x8c200000);	/* lw      $zero, ($at) */
-
-	mi.c_format.opcode     = cache_op;
-	mi.c_format.rs         = 4;		/* $a0 */
-	mi.c_format.c_op       = 3;		/* Create Dirty Exclusive */
-	mi.c_format.cache      = 1;		/* Data Cache */
-	mi.c_format.simmediate = store_offset;
-
-	emit_instruction(mi);
-}
-
-static void __cpuinit __build_store_reg(int reg)
-{
-	union mips_instruction mi;
-	unsigned int width;
-
-	if (cpu_has_64bit_gp_regs ||
-	    (cpu_has_64bit_zero_reg && reg == 0)) {
-		mi.i_format.opcode     = sd_op;
-		width = 8;
-	} else {
-		mi.i_format.opcode     = sw_op;
-		width = 4;
-	}
-	mi.i_format.rs         = 4;		/* $a0 */
-	mi.i_format.rt         = reg;		/* $reg */
-	mi.i_format.simmediate = store_offset;
-
-	store_offset += width;
-	emit_instruction(mi);
-}
-
-static inline void build_store_reg(int reg)
-{
-	int pref_off = cpu_has_prefetch ?
-		(reg ? pref_offset_copy : pref_offset_clear) : 0;
-	if (pref_off)
-		build_dst_pref(pref_off);
-	else if (cpu_has_cache_cdex_s)
-		build_cdex_s();
-	else if (cpu_has_cache_cdex_p)
-		build_cdex_p();
-
-	__build_store_reg(reg);
-}
-
-static inline void build_addiu_rt_rs(unsigned int rt, unsigned int rs,
-				     unsigned long offset)
-{
-	union mips_instruction mi;
-
-	BUG_ON(offset > 0x7fff);
-
-	if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
-		mi.i_format.opcode     = addiu_op;
-		mi.i_format.rs         = 0;	/* $zero */
-		mi.i_format.rt         = 25;	/* $t9 */
-		mi.i_format.simmediate = offset;
-		emit_instruction(mi);
-
-		mi.r_format.opcode     = spec_op;
-		mi.r_format.rs         = rs;
-		mi.r_format.rt         = 25;	/* $t9 */
-		mi.r_format.rd         = rt;
-		mi.r_format.re         = 0;
-		mi.r_format.func       = daddu_op;
-	} else {
-		mi.i_format.opcode     = cpu_has_64bit_gp_regs ?
-					 daddiu_op : addiu_op;
-		mi.i_format.rs         = rs;
-		mi.i_format.rt         = rt;
-		mi.i_format.simmediate = offset;
-	}
-	emit_instruction(mi);
-}
-
-static inline void build_addiu_a2_a0(unsigned long offset)
-{
-	build_addiu_rt_rs(6, 4, offset);	/* $a2, $a0, offset */
-}
-
-static inline void build_addiu_a2(unsigned long offset)
-{
-	build_addiu_rt_rs(6, 6, offset);	/* $a2, $a2, offset */
-}
-
-static inline void build_addiu_a1(unsigned long offset)
-{
-	build_addiu_rt_rs(5, 5, offset);	/* $a1, $a1, offset */
-
-	load_offset -= offset;
-}
-
-static inline void build_addiu_a0(unsigned long offset)
-{
-	build_addiu_rt_rs(4, 4, offset);	/* $a0, $a0, offset */
-
-	store_offset -= offset;
-}
-
-static inline void build_bne(unsigned int *dest)
-{
-	union mips_instruction mi;
-
-	mi.i_format.opcode = bne_op;
-	mi.i_format.rs     = 6;			/* $a2 */
-	mi.i_format.rt     = 4;			/* $a0 */
-	mi.i_format.simmediate = dest - epc - 1;
-
-	*epc++ = mi.word;
-	flush_delay_slot_or_nop();
-}
-
-static inline void build_jr_ra(void)
-{
-	union mips_instruction mi;
-
-	mi.r_format.opcode = spec_op;
-	mi.r_format.rs     = 31;
-	mi.r_format.rt     = 0;
-	mi.r_format.rd     = 0;
-	mi.r_format.re     = 0;
-	mi.r_format.func   = jr_op;
-
-	*epc++ = mi.word;
-	flush_delay_slot_or_nop();
-}
-
-void __cpuinit build_clear_page(void)
-{
-	unsigned int loop_start;
-	unsigned long off;
-	int i;
-
-	epc = (unsigned int *) &clear_page_array;
-	instruction_pending = 0;
-	store_offset = 0;
-
-	if (cpu_has_prefetch) {
-		switch (current_cpu_type()) {
-		case CPU_TX49XX:
-			/* TX49 supports only Pref_Load */
-			pref_offset_clear = 0;
-			pref_offset_copy = 0;
-			break;
-
-		case CPU_RM9000:
-			/*
-			 * As a workaround for erratum G105 which make the
-			 * PrepareForStore hint unusable we fall back to
-			 * StoreRetained on the RM9000.  Once it is known which
-			 * versions of the RM9000 we'll be able to condition-
-			 * alize this.
-			 */
-
-		case CPU_R10000:
-		case CPU_R12000:
-		case CPU_R14000:
-			pref_src_mode = Pref_LoadStreamed;
-			pref_dst_mode = Pref_StoreStreamed;
-			break;
-
-		default:
-			pref_src_mode = Pref_LoadStreamed;
-			pref_dst_mode = Pref_PrepareForStore;
-			break;
-		}
-	}
-
-        off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0);
-	if (off > 0x7fff) {
-		build_addiu_a2_a0(off >> 1);
-		build_addiu_a2(off >> 1);
-	} else
-		build_addiu_a2_a0(off);
-
-	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
-
-dest = label();
-	do {
-		build_store_reg(0);
-		build_store_reg(0);
-		build_store_reg(0);
-		build_store_reg(0);
-	} while (store_offset < half_scache_line_size());
-	build_addiu_a0(2 * store_offset);
-	loop_start = store_offset;
-	do {
-		build_store_reg(0);
-		build_store_reg(0);
-		build_store_reg(0);
-		build_store_reg(0);
-	} while ((store_offset - loop_start) < half_scache_line_size());
-	build_bne(dest);
-
-	if (cpu_has_prefetch && pref_offset_clear) {
-		build_addiu_a2_a0(pref_offset_clear);
-	dest = label();
-		loop_start = store_offset;
-		do {
-			__build_store_reg(0);
-			__build_store_reg(0);
-			__build_store_reg(0);
-			__build_store_reg(0);
-		} while ((store_offset - loop_start) < half_scache_line_size());
-		build_addiu_a0(2 * store_offset);
-		loop_start = store_offset;
-		do {
-			__build_store_reg(0);
-			__build_store_reg(0);
-			__build_store_reg(0);
-			__build_store_reg(0);
-		} while ((store_offset - loop_start) < half_scache_line_size());
-		build_bne(dest);
-	}
-
-	build_jr_ra();
-
-	BUG_ON(epc > clear_page_array + ARRAY_SIZE(clear_page_array));
-
-	pr_info("Synthesized clear page handler (%u instructions).\n",
-		(unsigned int)(epc - clear_page_array));
-
-	pr_debug("\t.set push\n");
-	pr_debug("\t.set noreorder\n");
-	for (i = 0; i < (epc - clear_page_array); i++)
-		pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
-	pr_debug("\t.set pop\n");
-}
-
-void __cpuinit build_copy_page(void)
-{
-	unsigned int loop_start;
-	unsigned long off;
-	int i;
-
-	epc = (unsigned int *) &copy_page_array;
-	store_offset = load_offset = 0;
-	instruction_pending = 0;
-
-	off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0);
-	if (off > 0x7fff) {
-		build_addiu_a2_a0(off >> 1);
-		build_addiu_a2(off >> 1);
-	} else
-		build_addiu_a2_a0(off);
-
-	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
-		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
-
-dest = label();
-	loop_start = store_offset;
-	do {
-		build_load_reg( 8);
-		build_load_reg( 9);
-		build_load_reg(10);
-		build_load_reg(11);
-		build_store_reg( 8);
-		build_store_reg( 9);
-		build_store_reg(10);
-		build_store_reg(11);
-	} while ((store_offset - loop_start) < half_scache_line_size());
-	build_addiu_a0(2 * store_offset);
-	build_addiu_a1(2 * load_offset);
-	loop_start = store_offset;
-	do {
-		build_load_reg( 8);
-		build_load_reg( 9);
-		build_load_reg(10);
-		build_load_reg(11);
-		build_store_reg( 8);
-		build_store_reg( 9);
-		build_store_reg(10);
-		build_store_reg(11);
-	} while ((store_offset - loop_start) < half_scache_line_size());
-	build_bne(dest);
-
-	if (cpu_has_prefetch && pref_offset_copy) {
-		build_addiu_a2_a0(pref_offset_copy);
-	dest = label();
-		loop_start = store_offset;
-		do {
-			__build_load_reg( 8);
-			__build_load_reg( 9);
-			__build_load_reg(10);
-			__build_load_reg(11);
-			__build_store_reg( 8);
-			__build_store_reg( 9);
-			__build_store_reg(10);
-			__build_store_reg(11);
-		} while ((store_offset - loop_start) < half_scache_line_size());
-		build_addiu_a0(2 * store_offset);
-		build_addiu_a1(2 * load_offset);
-		loop_start = store_offset;
-		do {
-			__build_load_reg( 8);
-			__build_load_reg( 9);
-			__build_load_reg(10);
-			__build_load_reg(11);
-			__build_store_reg( 8);
-			__build_store_reg( 9);
-			__build_store_reg(10);
-			__build_store_reg(11);
-		} while ((store_offset - loop_start) < half_scache_line_size());
-		build_bne(dest);
-	}
-
-	build_jr_ra();
-
-	BUG_ON(epc > copy_page_array + ARRAY_SIZE(copy_page_array));
-
-	pr_info("Synthesized copy page handler (%u instructions).\n",
-		(unsigned int)(epc - copy_page_array));
-
-	pr_debug("\t.set push\n");
-	pr_debug("\t.set noreorder\n");
-	for (i = 0; i < (epc - copy_page_array); i++)
-		pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
-	pr_debug("\t.set pop\n");
-}
diff --git a/arch/mips/mm/pg-sb1.c b/arch/mips/mm/pg-sb1.c
deleted file mode 100644
index 49e289d05414..000000000000
--- a/arch/mips/mm/pg-sb1.c
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
- * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
- * Copyright (C) 2000 SiByte, Inc.
- * Copyright (C) 2005 Thiemo Seufer
- *
- * Written by Justin Carlson of SiByte, Inc.
- *         and Kip Walker of Broadcom Corp.
- *
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- */
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-
-#include <asm/io.h>
-#include <asm/sibyte/sb1250.h>
-#include <asm/sibyte/sb1250_regs.h>
-#include <asm/sibyte/sb1250_dma.h>
-
-#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
-#define SB1_PREF_LOAD_STREAMED_HINT "0"
-#define SB1_PREF_STORE_STREAMED_HINT "1"
-#else
-#define SB1_PREF_LOAD_STREAMED_HINT "4"
-#define SB1_PREF_STORE_STREAMED_HINT "5"
-#endif
-
-static inline void clear_page_cpu(void *page)
-{
-	unsigned char *addr = (unsigned char *) page;
-	unsigned char *end = addr + PAGE_SIZE;
-
-	/*
-	 * JDCXXX - This should be bottlenecked by the write buffer, but these
-	 * things tend to be mildly unpredictable...should check this on the
-	 * performance model
-	 *
-	 * We prefetch 4 lines ahead.  We're also "cheating" slightly here...
-	 * since we know we're on an SB1, we force the assembler to take
-	 * 64-bit operands to speed things up
-	 */
-	__asm__ __volatile__(
-	"	.set	push		\n"
-	"	.set	mips4		\n"
-	"	.set	noreorder	\n"
-#ifdef CONFIG_CPU_HAS_PREFETCH
-	"	daddiu	%0, %0, 128	\n"
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ", -128(%0)  \n"
-					     /* Prefetch the first 4 lines */
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -96(%0)  \n"
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -64(%0)  \n"
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n"
-	"1:	sd	$0, -128(%0)	\n"  /* Throw out a cacheline of 0's */
-	"	sd	$0, -120(%0)	\n"
-	"	sd	$0, -112(%0)	\n"
-	"	sd	$0, -104(%0)	\n"
-	"	daddiu	%0, %0, 32	\n"
-	"	bnel	%0, %1, 1b	\n"
-	"	 pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%0)  \n"
-	"	daddiu	%0, %0, -128	\n"
-#endif
-	"	sd	$0, 0(%0)	\n"  /* Throw out a cacheline of 0's */
-	"1:	sd	$0, 8(%0)	\n"
-	"	sd	$0, 16(%0)	\n"
-	"	sd	$0, 24(%0)	\n"
-	"	daddiu	%0, %0, 32	\n"
-	"	bnel	%0, %1, 1b	\n"
-	"	 sd	$0, 0(%0)	\n"
-	"	.set	pop		\n"
-	: "+r" (addr)
-	: "r" (end)
-	: "memory");
-}
-
-static inline void copy_page_cpu(void *to, void *from)
-{
-	unsigned char *src = (unsigned char *)from;
-	unsigned char *dst = (unsigned char *)to;
-	unsigned char *end = src + PAGE_SIZE;
-
-	/*
-	 * The pref's used here are using "streaming" hints, which cause the
-	 * copied data to be kicked out of the cache sooner.  A page copy often
-	 * ends up copying a lot more data than is commonly used, so this seems
-	 * to make sense in terms of reducing cache pollution, but I've no real
-	 * performance data to back this up
-	 */
-	__asm__ __volatile__(
-	"	.set	push		\n"
-	"	.set	mips4		\n"
-	"	.set	noreorder	\n"
-#ifdef CONFIG_CPU_HAS_PREFETCH
-	"	daddiu	%0, %0, 128	\n"
-	"	daddiu	%1, %1, 128	\n"
-	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ", -128(%0)\n"
-					     /* Prefetch the first 4 lines */
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ", -128(%1)\n"
-	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -96(%0)\n"
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -96(%1)\n"
-	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -64(%0)\n"
-	"	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -64(%1)\n"
-	"	pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n"
-	"1:	pref	" SB1_PREF_STORE_STREAMED_HINT ",  -32(%1)\n"
-# ifdef CONFIG_64BIT
-	"	ld	$8, -128(%0)	\n"  /* Block copy a cacheline */
-	"	ld	$9, -120(%0)	\n"
-	"	ld	$10, -112(%0)	\n"
-	"	ld	$11, -104(%0)	\n"
-	"	sd	$8, -128(%1)	\n"
-	"	sd	$9, -120(%1)	\n"
-	"	sd	$10, -112(%1)	\n"
-	"	sd	$11, -104(%1)	\n"
-# else
-	"	lw	$2, -128(%0)	\n"  /* Block copy a cacheline */
-	"	lw	$3, -124(%0)	\n"
-	"	lw	$6, -120(%0)	\n"
-	"	lw	$7, -116(%0)	\n"
-	"	lw	$8, -112(%0)	\n"
-	"	lw	$9, -108(%0)	\n"
-	"	lw	$10, -104(%0)	\n"
-	"	lw	$11, -100(%0)	\n"
-	"	sw	$2, -128(%1)	\n"
-	"	sw	$3, -124(%1)	\n"
-	"	sw	$6, -120(%1)	\n"
-	"	sw	$7, -116(%1)	\n"
-	"	sw	$8, -112(%1)	\n"
-	"	sw	$9, -108(%1)	\n"
-	"	sw	$10, -104(%1)	\n"
-	"	sw	$11, -100(%1)	\n"
-# endif
-	"	daddiu	%0, %0, 32	\n"
-	"	daddiu	%1, %1, 32	\n"
-	"	bnel	%0, %2, 1b	\n"
-	"	 pref	" SB1_PREF_LOAD_STREAMED_HINT  ",  -32(%0)\n"
-	"	daddiu	%0, %0, -128	\n"
-	"	daddiu	%1, %1, -128	\n"
-#endif
-#ifdef CONFIG_64BIT
-	"	ld	$8, 0(%0)	\n"  /* Block copy a cacheline */
-	"1:	ld	$9, 8(%0)	\n"
-	"	ld	$10, 16(%0)	\n"
-	"	ld	$11, 24(%0)	\n"
-	"	sd	$8, 0(%1)	\n"
-	"	sd	$9, 8(%1)	\n"
-	"	sd	$10, 16(%1)	\n"
-	"	sd	$11, 24(%1)	\n"
-#else
-	"	lw	$2, 0(%0)	\n"  /* Block copy a cacheline */
-	"1:	lw	$3, 4(%0)	\n"
-	"	lw	$6, 8(%0)	\n"
-	"	lw	$7, 12(%0)	\n"
-	"	lw	$8, 16(%0)	\n"
-	"	lw	$9, 20(%0)	\n"
-	"	lw	$10, 24(%0)	\n"
-	"	lw	$11, 28(%0)	\n"
-	"	sw	$2, 0(%1)	\n"
-	"	sw	$3, 4(%1)	\n"
-	"	sw	$6, 8(%1)	\n"
-	"	sw	$7, 12(%1)	\n"
-	"	sw	$8, 16(%1)	\n"
-	"	sw	$9, 20(%1)	\n"
-	"	sw	$10, 24(%1)	\n"
-	"	sw	$11, 28(%1)	\n"
-#endif
-	"	daddiu	%0, %0, 32	\n"
-	"	daddiu	%1, %1, 32	\n"
-	"	bnel	%0, %2, 1b	\n"
-#ifdef CONFIG_64BIT
-	"	 ld	$8, 0(%0)	\n"
-#else
-	"	 lw	$2, 0(%0)	\n"
-#endif
-	"	.set	pop		\n"
-	: "+r" (src), "+r" (dst)
-	: "r" (end)
-#ifdef CONFIG_64BIT
-	: "$8", "$9", "$10", "$11", "memory");
-#else
-	: "$2", "$3", "$6", "$7", "$8", "$9", "$10", "$11", "memory");
-#endif
-}
-
-
-#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
-
-/*
- * Pad descriptors to cacheline, since each is exclusively owned by a
- * particular CPU.
- */
-typedef struct dmadscr_s {
-	u64 dscr_a;
-	u64 dscr_b;
-	u64 pad_a;
-	u64 pad_b;
-} dmadscr_t;
-
-static dmadscr_t page_descr[DM_NUM_CHANNELS]
-	__attribute__((aligned(SMP_CACHE_BYTES)));
-
-void sb1_dma_init(void)
-{
-	int i;
-
-	for (i = 0; i < DM_NUM_CHANNELS; i++) {
-		const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
-				     V_DM_DSCR_BASE_RINGSZ(1);
-		void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
-
-		__raw_writeq(base_val, base_reg);
-		__raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
-		__raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
-	}
-}
-
-void clear_page(void *page)
-{
-	u64 to_phys = CPHYSADDR((unsigned long)page);
-	unsigned int cpu = smp_processor_id();
-
-	/* if the page is not in KSEG0, use old way */
-	if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
-		return clear_page_cpu(page);
-
-	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
-				 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
-	page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
-	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
-
-	/*
-	 * Don't really want to do it this way, but there's no
-	 * reliable way to delay completion detection.
-	 */
-	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
-		 & M_DM_DSCR_BASE_INTERRUPT))
-		;
-	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
-}
-
-void copy_page(void *to, void *from)
-{
-	u64 from_phys = CPHYSADDR((unsigned long)from);
-	u64 to_phys = CPHYSADDR((unsigned long)to);
-	unsigned int cpu = smp_processor_id();
-
-	/* if any page is not in KSEG0, use old way */
-	if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
-	    || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
-		return copy_page_cpu(to, from);
-
-	page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
-				 M_DM_DSCRA_INTERRUPT;
-	page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
-	__raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
-
-	/*
-	 * Don't really want to do it this way, but there's no
-	 * reliable way to delay completion detection.
-	 */
-	while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
-		 & M_DM_DSCR_BASE_INTERRUPT))
-		;
-	__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
-}
-
-#else /* !CONFIG_SIBYTE_DMA_PAGEOPS */
-
-void clear_page(void *page)
-{
-	return clear_page_cpu(page);
-}
-
-void copy_page(void *to, void *from)
-{
-	return copy_page_cpu(to, from);
-}
-
-#endif /* !CONFIG_SIBYTE_DMA_PAGEOPS */
-
-EXPORT_SYMBOL(clear_page);
-EXPORT_SYMBOL(copy_page);
-
-void __cpuinit build_clear_page(void)
-{
-}
-
-void __cpuinit build_copy_page(void)
-{
-}
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
index 57df1c38e303..7dfa579ab24c 100644
--- a/arch/mips/mm/pgtable.c
+++ b/arch/mips/mm/pgtable.c
@@ -12,7 +12,6 @@ void show_mem(void)
 
 	printk("Mem-info:\n");
 	show_free_areas();
-	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
 	pfn = max_mapnr;
 	while (pfn-- > 0) {
 		if (!pfn_valid(pfn))
diff --git a/arch/mips/mm/tlb-r4k.c b/arch/mips/mm/tlb-r4k.c
index 63065d6e8063..5ce2fa745626 100644
--- a/arch/mips/mm/tlb-r4k.c
+++ b/arch/mips/mm/tlb-r4k.c
@@ -299,7 +299,7 @@ void __update_tlb(struct vm_area_struct * vma, unsigned long address, pte_t pte)
 	idx = read_c0_index();
 	ptep = pte_offset_map(pmdp, address);
 
-#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
+#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32)
 	write_c0_entrylo0(ptep->pte_high);
 	ptep++;
 	write_c0_entrylo1(ptep->pte_high);
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 1a6f7704cc89..1655aa69e133 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -58,13 +58,13 @@ enum opcode {
 	insn_invalid,
 	insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
 	insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
-	insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0,
-	insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32,
-	insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld,
-	insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0,
-	insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll,
-	insn_sra, insn_srl, insn_subu, insn_sw, insn_tlbp, insn_tlbwi,
-	insn_tlbwr, insn_xor, insn_xori
+	insn_bne, insn_cache, insn_daddu, insn_daddiu, insn_dmfc0,
+	insn_dmtc0, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl,
+	insn_dsrl32, insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr,
+	insn_ld, insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0,
+	insn_mtc0, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd,
+	insn_sd, insn_sll, insn_sra, insn_srl, insn_subu, insn_sw,
+	insn_tlbp, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori
 };
 
 struct insn {
@@ -94,6 +94,7 @@ static struct insn insn_table[] __cpuinitdata = {
 	{ insn_bltz, M(bcond_op, 0, bltz_op, 0, 0, 0), RS | BIMM },
 	{ insn_bltzl, M(bcond_op, 0, bltzl_op, 0, 0, 0), RS | BIMM },
 	{ insn_bne, M(bne_op, 0, 0, 0, 0, 0), RS | RT | BIMM },
+	{ insn_cache,  M(cache_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_daddiu, M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM },
 	{ insn_daddu, M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD },
 	{ insn_dmfc0, M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
@@ -116,6 +117,7 @@ static struct insn insn_table[] __cpuinitdata = {
 	{ insn_mfc0,  M(cop0_op, mfc_op, 0, 0, 0, 0),  RT | RD | SET},
 	{ insn_mtc0,  M(cop0_op, mtc_op, 0, 0, 0, 0),  RT | RD | SET},
 	{ insn_ori,  M(ori_op, 0, 0, 0, 0, 0),  RS | RT | UIMM },
+	{ insn_pref,  M(pref_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_rfe,  M(cop0_op, cop_op, 0, 0, 0, rfe_op),  0 },
 	{ insn_sc,  M(sc_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
 	{ insn_scd,  M(scd_op, 0, 0, 0, 0, 0),  RS | RT | SIMM },
@@ -337,6 +339,7 @@ I_u1s2(_bgezl)
 I_u1s2(_bltz)
 I_u1s2(_bltzl)
 I_u1u2s3(_bne)
+I_u2s3u1(_cache)
 I_u1u2u3(_dmfc0)
 I_u1u2u3(_dmtc0)
 I_u2u1s3(_daddiu)
@@ -359,6 +362,7 @@ I_u2s3u1(_lw)
 I_u1u2u3(_mfc0)
 I_u1u2u3(_mtc0)
 I_u2u1u3(_ori)
+I_u2s3u1(_pref)
 I_0(_rfe)
 I_u2s3u1(_sc)
 I_u2s3u1(_scd)
@@ -555,6 +559,14 @@ uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid)
 }
 
 void __cpuinit
+uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+	unsigned int reg2, int lid)
+{
+	uasm_r_mips_pc16(r, *p, lid);
+	uasm_i_bne(p, reg1, reg2, 0);
+}
+
+void __cpuinit
 uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid)
 {
 	uasm_r_mips_pc16(r, *p, lid);
diff --git a/arch/mips/mm/uasm.h b/arch/mips/mm/uasm.h
index fe0574f6e77d..0d6a66f32030 100644
--- a/arch/mips/mm/uasm.h
+++ b/arch/mips/mm/uasm.h
@@ -55,6 +55,7 @@ Ip_u1s2(_bgezl);
 Ip_u1s2(_bltz);
 Ip_u1s2(_bltzl);
 Ip_u1u2s3(_bne);
+Ip_u2s3u1(_cache);
 Ip_u1u2u3(_dmfc0);
 Ip_u1u2u3(_dmtc0);
 Ip_u2u1s3(_daddiu);
@@ -77,6 +78,7 @@ Ip_u2s3u1(_lw);
 Ip_u1u2u3(_mfc0);
 Ip_u1u2u3(_mtc0);
 Ip_u2u1u3(_ori);
+Ip_u2s3u1(_pref);
 Ip_0(_rfe);
 Ip_u2s3u1(_sc);
 Ip_u2s3u1(_scd);
@@ -177,6 +179,8 @@ void uasm_il_bltz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
 void uasm_il_b(u32 **p, struct uasm_reloc **r, int lid);
 void uasm_il_beqz(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
 void uasm_il_beqzl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
+void uasm_il_bne(u32 **p, struct uasm_reloc **r, unsigned int reg1,
+		 unsigned int reg2, int lid);
 void uasm_il_bnez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
 void uasm_il_bgezl(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
 void uasm_il_bgez(u32 **p, struct uasm_reloc **r, unsigned int reg, int lid);
diff --git a/arch/mips/philips/pnx8550/common/Makefile b/arch/mips/nxp/pnx8550/common/Makefile
index 31cc1a5cec3b..31cc1a5cec3b 100644
--- a/arch/mips/philips/pnx8550/common/Makefile
+++ b/arch/mips/nxp/pnx8550/common/Makefile
diff --git a/arch/mips/philips/pnx8550/common/gdb_hook.c b/arch/mips/nxp/pnx8550/common/gdb_hook.c
index ad4624f6d9bc..ad4624f6d9bc 100644
--- a/arch/mips/philips/pnx8550/common/gdb_hook.c
+++ b/arch/mips/nxp/pnx8550/common/gdb_hook.c
diff --git a/arch/mips/philips/pnx8550/common/int.c b/arch/mips/nxp/pnx8550/common/int.c
index aad03429a5e3..aad03429a5e3 100644
--- a/arch/mips/philips/pnx8550/common/int.c
+++ b/arch/mips/nxp/pnx8550/common/int.c
diff --git a/arch/mips/philips/pnx8550/common/pci.c b/arch/mips/nxp/pnx8550/common/pci.c
index eee4f3dfc410..eee4f3dfc410 100644
--- a/arch/mips/philips/pnx8550/common/pci.c
+++ b/arch/mips/nxp/pnx8550/common/pci.c
diff --git a/arch/mips/philips/pnx8550/common/platform.c b/arch/mips/nxp/pnx8550/common/platform.c
index c839436bd012..c7c763dbe588 100644
--- a/arch/mips/philips/pnx8550/common/platform.c
+++ b/arch/mips/nxp/pnx8550/common/platform.c
@@ -1,5 +1,5 @@
 /*
- * Platform device support for Philips PNX8550 SoCs
+ * Platform device support for NXP PNX8550 SoCs
  *
  * Copyright 2005, Embedded Alley Solutions, Inc
  *
diff --git a/arch/mips/philips/pnx8550/common/proc.c b/arch/mips/nxp/pnx8550/common/proc.c
index 18b125e3b65d..18b125e3b65d 100644
--- a/arch/mips/philips/pnx8550/common/proc.c
+++ b/arch/mips/nxp/pnx8550/common/proc.c
diff --git a/arch/mips/philips/pnx8550/common/prom.c b/arch/mips/nxp/pnx8550/common/prom.c
index 2f567452e7ac..2f567452e7ac 100644
--- a/arch/mips/philips/pnx8550/common/prom.c
+++ b/arch/mips/nxp/pnx8550/common/prom.c
diff --git a/arch/mips/philips/pnx8550/common/reset.c b/arch/mips/nxp/pnx8550/common/reset.c
index 7b2cbc5b2c7c..7b2cbc5b2c7c 100644
--- a/arch/mips/philips/pnx8550/common/reset.c
+++ b/arch/mips/nxp/pnx8550/common/reset.c
diff --git a/arch/mips/philips/pnx8550/common/setup.c b/arch/mips/nxp/pnx8550/common/setup.c
index 92d764c97701..92d764c97701 100644
--- a/arch/mips/philips/pnx8550/common/setup.c
+++ b/arch/mips/nxp/pnx8550/common/setup.c
diff --git a/arch/mips/philips/pnx8550/common/time.c b/arch/mips/nxp/pnx8550/common/time.c
index 62f495b57f93..62f495b57f93 100644
--- a/arch/mips/philips/pnx8550/common/time.c
+++ b/arch/mips/nxp/pnx8550/common/time.c
diff --git a/arch/mips/philips/pnx8550/jbs/Makefile b/arch/mips/nxp/pnx8550/jbs/Makefile
index e8228dbca8f6..ad6a8ca7d8ce 100644
--- a/arch/mips/philips/pnx8550/jbs/Makefile
+++ b/arch/mips/nxp/pnx8550/jbs/Makefile
@@ -1,4 +1,4 @@
 
-# Makefile for the Philips JBS Board.
+# Makefile for the NXP JBS Board.
 
 lib-y := init.o board_setup.o irqmap.o
diff --git a/arch/mips/philips/pnx8550/jbs/board_setup.c b/arch/mips/nxp/pnx8550/jbs/board_setup.c
index f92826e0096d..f92826e0096d 100644
--- a/arch/mips/philips/pnx8550/jbs/board_setup.c
+++ b/arch/mips/nxp/pnx8550/jbs/board_setup.c
diff --git a/arch/mips/philips/pnx8550/jbs/init.c b/arch/mips/nxp/pnx8550/jbs/init.c
index 90b4d35f3ece..d59b4a4e5e8b 100644
--- a/arch/mips/philips/pnx8550/jbs/init.c
+++ b/arch/mips/nxp/pnx8550/jbs/init.c
@@ -40,7 +40,7 @@ extern char *prom_getenv(char *envname);
 
 const char *get_system_type(void)
 {
-	return "Philips PNX8550/JBS";
+	return "NXP PNX8550/JBS";
 }
 
 void __init prom_init(void)
diff --git a/arch/mips/philips/pnx8550/jbs/irqmap.c b/arch/mips/nxp/pnx8550/jbs/irqmap.c
index 98c3429e6e50..7fc89842002c 100644
--- a/arch/mips/philips/pnx8550/jbs/irqmap.c
+++ b/arch/mips/nxp/pnx8550/jbs/irqmap.c
@@ -1,5 +1,5 @@
 /*
- *  Philips JBS board irqmap.
+ *  NXP JBS board irqmap.
  *
  *  Copyright 2005 Embedded Alley Solutions, Inc
  *  source@embeddealley.com
@@ -33,4 +33,3 @@ char pnx8550_irq_tab[][5] __initdata = {
 	[9]	= { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
 	[17]	= { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
 };
-
diff --git a/arch/mips/philips/pnx8550/stb810/Makefile b/arch/mips/nxp/pnx8550/stb810/Makefile
index f14b592af398..ab91d72c5664 100644
--- a/arch/mips/philips/pnx8550/stb810/Makefile
+++ b/arch/mips/nxp/pnx8550/stb810/Makefile
@@ -1,4 +1,4 @@
 
-# Makefile for the Philips STB810 Board.
+# Makefile for the NXP STB810 Board.
 
 lib-y := prom_init.o board_setup.o irqmap.o
diff --git a/arch/mips/philips/pnx8550/stb810/board_setup.c b/arch/mips/nxp/pnx8550/stb810/board_setup.c
index 345d71e53cf2..1282c27cfcb7 100644
--- a/arch/mips/philips/pnx8550/stb810/board_setup.c
+++ b/arch/mips/nxp/pnx8550/stb810/board_setup.c
@@ -1,7 +1,7 @@
 /*
  *  STB810 specific board startup routines.
  *
- *  Based on the arch/mips/philips/pnx8550/jbs/board_setup.c
+ *  Based on the arch/mips/nxp/pnx8550/jbs/board_setup.c
  *
  *  Author: MontaVista Software, Inc.
  *          source@mvista.com
diff --git a/arch/mips/philips/pnx8550/stb810/irqmap.c b/arch/mips/nxp/pnx8550/stb810/irqmap.c
index 5ee11e19975e..8c034963ddcd 100644
--- a/arch/mips/philips/pnx8550/stb810/irqmap.c
+++ b/arch/mips/nxp/pnx8550/stb810/irqmap.c
@@ -1,5 +1,5 @@
 /*
- *  Philips STB810 board irqmap.
+ *  NXP STB810 board irqmap.
  *
  *  Author: MontaVista Software, Inc.
  *          source@mvista.com
@@ -20,4 +20,3 @@ char pnx8550_irq_tab[][5] __initdata = {
 	[9]	= { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
 	[10]	= { -1, PNX8550_INT_PCI_INTA, 0xff, 0xff, 0xff},
 };
-
diff --git a/arch/mips/philips/pnx8550/stb810/prom_init.c b/arch/mips/nxp/pnx8550/stb810/prom_init.c
index 832dd60b0a7a..ca7f4ada0640 100644
--- a/arch/mips/philips/pnx8550/stb810/prom_init.c
+++ b/arch/mips/nxp/pnx8550/stb810/prom_init.c
@@ -28,7 +28,7 @@ extern char *prom_getenv(char *envname);
 
 const char *get_system_type(void)
 {
-	return "Philips PNX8550/STB810";
+	return "NXP PNX8950/STB810";
 }
 
 void __init prom_init(void)
diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c
index aa52aa146cea..b5f6f71b27bc 100644
--- a/arch/mips/oprofile/common.c
+++ b/arch/mips/oprofile/common.c
@@ -80,6 +80,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 	case CPU_24K:
 	case CPU_25KF:
 	case CPU_34K:
+	case CPU_1004K:
 	case CPU_74K:
 	case CPU_SB1:
 	case CPU_SB1A:
diff --git a/arch/mips/oprofile/op_impl.h b/arch/mips/oprofile/op_impl.h
index fa6b4aae7523..2bfc17c30106 100644
--- a/arch/mips/oprofile/op_impl.h
+++ b/arch/mips/oprofile/op_impl.h
@@ -10,7 +10,6 @@
 #ifndef OP_IMPL_H
 #define OP_IMPL_H 1
 
-extern int null_perf_irq(void);
 extern int (*perf_irq)(void);
 
 /* Per-counter configuration as set via oprofilefs.  */
diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c
index ccbea229a0e6..da8cbb6899dc 100644
--- a/arch/mips/oprofile/op_model_mipsxx.c
+++ b/arch/mips/oprofile/op_model_mipsxx.c
@@ -31,9 +31,14 @@
 
 #define M_COUNTER_OVERFLOW		(1UL      << 31)
 
+static int (*save_perf_irq)(void);
+
 #ifdef CONFIG_MIPS_MT_SMP
-#define WHAT		(M_TC_EN_VPE | M_PERFCTL_VPEID(smp_processor_id()))
-#define vpe_id()	smp_processor_id()
+static int cpu_has_mipsmt_pertccounters;
+#define WHAT		(M_TC_EN_VPE | \
+			 M_PERFCTL_VPEID(cpu_data[smp_processor_id()].vpe_id))
+#define vpe_id()	(cpu_has_mipsmt_pertccounters ? \
+			0 : cpu_data[smp_processor_id()].vpe_id)
 
 /*
  * The number of bits to shift to convert between counters per core and
@@ -243,11 +248,11 @@ static inline int __n_counters(void)
 {
 	if (!(read_c0_config1() & M_CONFIG1_PC))
 		return 0;
-	if (!(r_c0_perfctrl0() & M_PERFCTL_MORE))
+	if (!(read_c0_perfctrl0() & M_PERFCTL_MORE))
 		return 1;
-	if (!(r_c0_perfctrl1() & M_PERFCTL_MORE))
+	if (!(read_c0_perfctrl1() & M_PERFCTL_MORE))
 		return 2;
-	if (!(r_c0_perfctrl2() & M_PERFCTL_MORE))
+	if (!(read_c0_perfctrl2() & M_PERFCTL_MORE))
 		return 3;
 
 	return 4;
@@ -274,8 +279,9 @@ static inline int n_counters(void)
 	return counters;
 }
 
-static inline void reset_counters(int counters)
+static void reset_counters(void *arg)
 {
+	int counters = (int)arg;
 	switch (counters) {
 	case 4:
 		w_c0_perfctrl3(0);
@@ -302,9 +308,12 @@ static int __init mipsxx_init(void)
 		return -ENODEV;
 	}
 
-	reset_counters(counters);
-
-	counters = counters_total_to_per_cpu(counters);
+#ifdef CONFIG_MIPS_MT_SMP
+	cpu_has_mipsmt_pertccounters = read_c0_config7() & (1<<19);
+	if (!cpu_has_mipsmt_pertccounters)
+		counters = counters_total_to_per_cpu(counters);
+#endif
+	on_each_cpu(reset_counters, (void *)counters, 0, 1);
 
 	op_model_mipsxx_ops.num_counters = counters;
 	switch (current_cpu_type()) {
@@ -320,6 +329,13 @@ static int __init mipsxx_init(void)
 		op_model_mipsxx_ops.cpu_type = "mips/25K";
 		break;
 
+	case CPU_1004K:
+#if 0
+		/* FIXME: report as 34K for now */
+		op_model_mipsxx_ops.cpu_type = "mips/1004K";
+		break;
+#endif
+
 	case CPU_34K:
 		op_model_mipsxx_ops.cpu_type = "mips/34K";
 		break;
@@ -355,6 +371,7 @@ static int __init mipsxx_init(void)
 		return -ENODEV;
 	}
 
+	save_perf_irq = perf_irq;
 	perf_irq = mipsxx_perfcount_handler;
 
 	return 0;
@@ -365,9 +382,9 @@ static void mipsxx_exit(void)
 	int counters = op_model_mipsxx_ops.num_counters;
 
 	counters = counters_per_cpu_to_total(counters);
-	reset_counters(counters);
+	on_each_cpu(reset_counters, (void *)counters, 0, 1);
 
-	perf_irq = null_perf_irq;
+	perf_irq = save_perf_irq;
 }
 
 struct op_mips_model op_model_mipsxx_ops = {
diff --git a/arch/mips/pci/fixup-au1000.c b/arch/mips/pci/fixup-au1000.c
index ca0276c8070a..00c36c9dbe0e 100644
--- a/arch/mips/pci/fixup-au1000.c
+++ b/arch/mips/pci/fixup-au1000.c
@@ -26,13 +26,10 @@
  *  with this program; if not, write  to the Free Software Foundation, Inc.,
  *  675 Mass Ave, Cambridge, MA 02139, USA.
  */
-#include <linux/types.h>
+
 #include <linux/pci.h>
-#include <linux/kernel.h>
 #include <linux/init.h>
 
-#include <asm/mach-au1x00/au1000.h>
-
 extern char irq_tab_alchemy[][5];
 
 int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
diff --git a/arch/mips/pci/ops-pnx8550.c b/arch/mips/pci/ops-pnx8550.c
index d61064652498..0e160d9f07c3 100644
--- a/arch/mips/pci/ops-pnx8550.c
+++ b/arch/mips/pci/ops-pnx8550.c
@@ -90,14 +90,14 @@ config_access(unsigned int pci_cmd, struct pci_bus *bus, unsigned int devfn, int
 
 		loops--;
 		if (loops == 0) {
-			printk("%s : Arbiter Locked.\n", __FUNCTION__);
+			printk("%s : Arbiter Locked.\n", __func__);
 		}
 	}
 
 	clear_status();
 	if ((pci_cmd == PCI_CMD_IOR) || (pci_cmd == PCI_CMD_IOW)) {
 		printk("%s timeout (GPPM_CTRL=%X) ioaddr %lX pci_cmd %X\n",
-		       __FUNCTION__, inl(PCI_BASE | PCI_GPPM_CTRL), ioaddr,
+		       __func__, inl(PCI_BASE | PCI_GPPM_CTRL), ioaddr,
 		       pci_cmd);
 	}
 
diff --git a/arch/mips/sgi-ip32/ip32-reset.c b/arch/mips/sgi-ip32/ip32-reset.c
index 624bbdbff2a8..b6cab089561e 100644
--- a/arch/mips/sgi-ip32/ip32-reset.c
+++ b/arch/mips/sgi-ip32/ip32-reset.c
@@ -142,7 +142,7 @@ static irqreturn_t ip32_rtc_int(int irq, void *dev_id)
 	reg_c = CMOS_READ(RTC_INTR_FLAGS);
 	if (!(reg_c & RTC_IRQF)) {
 		printk(KERN_WARNING
-			"%s: RTC IRQ without RTC_IRQF\n", __FUNCTION__);
+			"%s: RTC IRQ without RTC_IRQF\n", __func__);
 	}
 	/* Wait until interrupt goes away */
 	disable_irq(MACEISA_RTC_IRQ);
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c
index 3f808b629242..6d31f2a98abf 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_irq.c
@@ -173,7 +173,7 @@ static const u32 toshiba_rbtx4927_irq_debug_flag =
         { \
            char tmp[100]; \
            sprintf( tmp, str ); \
-           printk( "%s(%s:%u)::%s", __FUNCTION__, __FILE__, __LINE__, tmp ); \
+           printk( "%s(%s:%u)::%s", __func__, __FILE__, __LINE__, tmp ); \
         }
 #else
 #define TOSHIBA_RBTX4927_IRQ_DPRINTK(flag, str...)
diff --git a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
index e466e5e711d8..2203c77b2ce2 100644
--- a/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+++ b/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
@@ -93,7 +93,7 @@ static const u32 toshiba_rbtx4927_setup_debug_flag =
         { \
            char tmp[100]; \
            sprintf( tmp, str ); \
-           printk( "%s(%s:%u)::%s", __FUNCTION__, __FILE__, __LINE__, tmp ); \
+           printk( "%s(%s:%u)::%s", __func__, __FILE__, __LINE__, tmp ); \
         }
 #else
 #define TOSHIBA_RBTX4927_SETUP_DPRINTK(flag, str...)
diff --git a/arch/mips/tx4938/common/dbgio.c b/arch/mips/tx4938/common/dbgio.c
index bea59ff1842a..33b9c672a322 100644
--- a/arch/mips/tx4938/common/dbgio.c
+++ b/arch/mips/tx4938/common/dbgio.c
@@ -31,9 +31,7 @@
  * Support for TX4938 in 2.6 - Hiroshi DOYU <Hiroshi_DOYU@montavista.co.jp>
  */
 
-#include <asm/mipsregs.h>
-#include <asm/system.h>
-#include <asm/tx4938/tx4938_mips.h>
+#include <linux/types>
 
 extern u8 txx9_sio_kdbg_rd(void);
 extern int txx9_sio_kdbg_wr( u8 ch );
diff --git a/arch/mips/tx4938/common/prom.c b/arch/mips/tx4938/common/prom.c
index 3189a65f7d7e..20baeaeba4cd 100644
--- a/arch/mips/tx4938/common/prom.c
+++ b/arch/mips/tx4938/common/prom.c
@@ -13,13 +13,8 @@
  */
 
 #include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/bootmem.h>
-
-#include <asm/addrspace.h>
-#include <asm/bootinfo.h>
-#include <asm/tx4938/tx4938.h>
+#include <linux/types.h>
+#include <linux/io.h>
 
 static unsigned int __init
 tx4938_process_sdccr(u64 * addr)
@@ -35,7 +30,7 @@ tx4938_process_sdccr(u64 * addr)
 	unsigned int bc = 4;
 	unsigned int msize = 0;
 
-	val = (*((vu64 *) (addr)));
+	val = ____raw_readq((void __iomem *)addr);
 
 	/* MVMCP -- need #defs for these bits masks */
 	sdccr_ce = ((val & (1 << 10)) >> 10);
diff --git a/arch/mips/tx4938/toshiba_rbtx4938/irq.c b/arch/mips/tx4938/toshiba_rbtx4938/irq.c
index f00185017e80..4d6a8dc46c76 100644
--- a/arch/mips/tx4938/toshiba_rbtx4938/irq.c
+++ b/arch/mips/tx4938/toshiba_rbtx4938/irq.c
@@ -67,24 +67,7 @@ IRQ  Device
 63 RBTX4938-IOC/07 SWINT
 */
 #include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/ioport.h>
-#include <linux/sched.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
-#include <linux/timex.h>
-#include <asm/bootinfo.h>
-#include <asm/page.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/processor.h>
-#include <asm/reboot.h>
-#include <asm/time.h>
-#include <asm/wbflush.h>
-#include <linux/bootmem.h>
 #include <asm/tx4938/rbtx4938.h>
 
 static void toshiba_rbtx4938_irq_ioc_enable(unsigned int irq);
@@ -99,21 +82,16 @@ static struct irq_chip toshiba_rbtx4938_irq_ioc_type = {
 	.unmask = toshiba_rbtx4938_irq_ioc_enable,
 };
 
-#define TOSHIBA_RBTX4938_IOC_INTR_ENAB 0xb7f02000
-#define TOSHIBA_RBTX4938_IOC_INTR_STAT 0xb7f0200a
-
 int
 toshiba_rbtx4938_irq_nested(int sw_irq)
 {
 	u8 level3;
 
-	level3 = reg_rd08(TOSHIBA_RBTX4938_IOC_INTR_STAT) & 0xff;
-	if (level3) {
+	level3 = readb(rbtx4938_imstat_addr);
+	if (level3)
 		/* must use fls so onboard ATA has priority */
 		sw_irq = TOSHIBA_RBTX4938_IRQ_IOC_BEG + fls(level3) - 1;
-	}
 
-	wbflush();
 	return sw_irq;
 }
 
@@ -144,25 +122,23 @@ toshiba_rbtx4938_irq_ioc_init(void)
 static void
 toshiba_rbtx4938_irq_ioc_enable(unsigned int irq)
 {
-	volatile unsigned char v;
+	unsigned char v;
 
-	v = TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
+	v = readb(rbtx4938_imask_addr);
 	v |= (1 << (irq - TOSHIBA_RBTX4938_IRQ_IOC_BEG));
-	TX4938_WR08(TOSHIBA_RBTX4938_IOC_INTR_ENAB, v);
+	writeb(v, rbtx4938_imask_addr);
 	mmiowb();
-	TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
 }
 
 static void
 toshiba_rbtx4938_irq_ioc_disable(unsigned int irq)
 {
-	volatile unsigned char v;
+	unsigned char v;
 
-	v = TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
+	v = readb(rbtx4938_imask_addr);
 	v &= ~(1 << (irq - TOSHIBA_RBTX4938_IRQ_IOC_BEG));
-	TX4938_WR08(TOSHIBA_RBTX4938_IOC_INTR_ENAB, v);
+	writeb(v, rbtx4938_imask_addr);
 	mmiowb();
-	TX4938_RD08(TOSHIBA_RBTX4938_IOC_INTR_ENAB);
 }
 
 void __init arch_init_irq(void)
@@ -174,14 +150,12 @@ void __init arch_init_irq(void)
 	/* all IRC interrupt mode are Low Active. */
 
 	/* mask all IOC interrupts */
-	*rbtx4938_imask_ptr = 0;
+	writeb(0, rbtx4938_imask_addr);
 
 	/* clear SoftInt interrupts */
-	*rbtx4938_softint_ptr = 0;
+	writeb(0, rbtx4938_softint_addr);
 	tx4938_irq_init();
 	toshiba_rbtx4938_irq_ioc_init();
 	/* Onboard 10M Ether: High Active */
 	set_irq_type(RBTX4938_IRQ_ETHER, IRQF_TRIGGER_HIGH);
-
-	wbflush();
 }
diff --git a/arch/mips/tx4938/toshiba_rbtx4938/setup.c b/arch/mips/tx4938/toshiba_rbtx4938/setup.c
index 61249f049cd6..3a3659e8633a 100644
--- a/arch/mips/tx4938/toshiba_rbtx4938/setup.c
+++ b/arch/mips/tx4938/toshiba_rbtx4938/setup.c
@@ -21,8 +21,8 @@
 #include <linux/pm.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>
+#include <linux/gpio.h>
 
-#include <asm/wbflush.h>
 #include <asm/reboot.h>
 #include <asm/time.h>
 #include <asm/txx9tmr.h>
@@ -34,7 +34,7 @@
 #endif
 #include <linux/spi/spi.h>
 #include <asm/tx4938/spi.h>
-#include <asm/gpio.h>
+#include <asm/txx9pio.h>
 
 extern char * __init prom_getcmdline(void);
 static inline void tx4938_report_pcic_status1(struct tx4938_pcic_reg *pcicptr);
@@ -90,12 +90,11 @@ void rbtx4938_machine_restart(char *command)
 	local_irq_disable();
 
 	printk("Rebooting...");
-	*rbtx4938_softresetlock_ptr = 1;
-	*rbtx4938_sfvol_ptr = 1;
-	*rbtx4938_softreset_ptr = 1;
-	wbflush();
-
-	while(1);
+	writeb(1, rbtx4938_softresetlock_addr);
+	writeb(1, rbtx4938_sfvol_addr);
+	writeb(1, rbtx4938_softreset_addr);
+	while(1)
+		;
 }
 
 void __init
@@ -487,7 +486,7 @@ static int __init tx4938_pcibios_init(void)
 	}
 
 	/* Reset PCI Bus */
-	*rbtx4938_pcireset_ptr = 0;
+	writeb(0, rbtx4938_pcireset_addr);
 	/* Reset PCIC */
 	tx4938_ccfgptr->clkctr |= TX4938_CLKCTR_PCIRST;
 	if (txboard_pci66_mode > 0)
@@ -495,8 +494,8 @@ static int __init tx4938_pcibios_init(void)
 	mdelay(10);
 	/* clear PCIC reset */
 	tx4938_ccfgptr->clkctr &= ~TX4938_CLKCTR_PCIRST;
-	*rbtx4938_pcireset_ptr = 1;
-	wbflush();
+	writeb(1, rbtx4938_pcireset_addr);
+	mmiowb();
 	tx4938_report_pcic_status1(tx4938_pcicptr);
 
 	tx4938_report_pciclk();
@@ -504,15 +503,15 @@ static int __init tx4938_pcibios_init(void)
 	if (txboard_pci66_mode == 0 &&
 	    txboard_pci66_check(&tx4938_pci_controller[0], 0, 0)) {
 		/* Reset PCI Bus */
-		*rbtx4938_pcireset_ptr = 0;
+		writeb(0, rbtx4938_pcireset_addr);
 		/* Reset PCIC */
 		tx4938_ccfgptr->clkctr |= TX4938_CLKCTR_PCIRST;
 		tx4938_pciclk66_setup();
 		mdelay(10);
 		/* clear PCIC reset */
 		tx4938_ccfgptr->clkctr &= ~TX4938_CLKCTR_PCIRST;
-		*rbtx4938_pcireset_ptr = 1;
-		wbflush();
+		writeb(1, rbtx4938_pcireset_addr);
+		mmiowb();
 		/* Reinitialize PCIC */
 		tx4938_report_pciclk();
 		tx4938_pcic_setup(tx4938_pcicptr, &tx4938_pci_controller[0], io_base[0], extarb);
@@ -615,9 +614,6 @@ static void __init rbtx4938_spi_setup(void)
 {
 	/* set SPI_SEL */
 	tx4938_ccfgptr->pcfg |= TX4938_PCFG_SPI_SEL;
-	/* chip selects for SPI devices */
-	tx4938_pioptr->dout |= (1 << SEEPROM1_CS);
-	tx4938_pioptr->dir |= (1 << SEEPROM1_CS);
 }
 
 static struct resource rbtx4938_fpga_resource;
@@ -776,12 +772,13 @@ void __init tx4938_board_setup(void)
 		txx9_tmr_init(TX4938_TMR_REG(i) & 0xfffffffffULL);
 
 	/* enable DMA */
-	TX4938_WR64(0xff1fb150, TX4938_DMA_MCR_MSTEN);
-	TX4938_WR64(0xff1fb950, TX4938_DMA_MCR_MSTEN);
+	for (i = 0; i < 2; i++)
+		____raw_writeq(TX4938_DMA_MCR_MSTEN,
+			       (void __iomem *)(TX4938_DMA_REG(i) + 0x50));
 
 	/* PIO */
-	tx4938_pioptr->maskcpu = 0;
-	tx4938_pioptr->maskext = 0;
+	__raw_writel(0, &tx4938_pioptr->maskcpu);
+	__raw_writel(0, &tx4938_pioptr->maskext);
 
 	/* TX4938 internal registers */
 	if (request_resource(&iomem_resource, &tx4938_reg_resource))
@@ -863,10 +860,6 @@ void __init plat_mem_setup(void)
 	if (txx9_master_clock == 0)
 		txx9_master_clock = 25000000; /* 25MHz */
 	tx4938_board_setup();
-	/* setup serial stuff */
-	TX4938_WR(0xff1ff314, 0x00000000);	/* h/w flow control off */
-	TX4938_WR(0xff1ff414, 0x00000000);	/* h/w flow control off */
-
 #ifndef CONFIG_PCI
 	set_io_port_base(RBTX4938_ETHER_BASE);
 #endif
@@ -932,16 +925,16 @@ void __init plat_mem_setup(void)
 	pcfg = tx4938_ccfgptr->pcfg;	/* updated */
 	/* fixup piosel */
 	if ((pcfg & (TX4938_PCFG_ATA_SEL | TX4938_PCFG_NDF_SEL)) ==
-	    TX4938_PCFG_ATA_SEL) {
-		*rbtx4938_piosel_ptr = (*rbtx4938_piosel_ptr & 0x03) | 0x04;
-	}
+	    TX4938_PCFG_ATA_SEL)
+		writeb((readb(rbtx4938_piosel_addr) & 0x03) | 0x04,
+		       rbtx4938_piosel_addr);
 	else if ((pcfg & (TX4938_PCFG_ATA_SEL | TX4938_PCFG_NDF_SEL)) ==
-	    TX4938_PCFG_NDF_SEL) {
-		*rbtx4938_piosel_ptr = (*rbtx4938_piosel_ptr & 0x03) | 0x08;
-	}
-	else {
-		*rbtx4938_piosel_ptr &= ~(0x08 | 0x04);
-	}
+		 TX4938_PCFG_NDF_SEL)
+		writeb((readb(rbtx4938_piosel_addr) & 0x03) | 0x08,
+		       rbtx4938_piosel_addr);
+	else
+		writeb(readb(rbtx4938_piosel_addr) & ~(0x08 | 0x04),
+		       rbtx4938_piosel_addr);
 
 	rbtx4938_fpga_resource.name = "FPGA Registers";
 	rbtx4938_fpga_resource.start = CPHYSADDR(RBTX4938_FPGA_REG_ADDR);
@@ -950,17 +943,14 @@ void __init plat_mem_setup(void)
 	if (request_resource(&iomem_resource, &rbtx4938_fpga_resource))
 		printk("request resource for fpga failed\n");
 
-	/* disable all OnBoard I/O interrupts */
-	*rbtx4938_imask_ptr = 0;
-
 	_machine_restart = rbtx4938_machine_restart;
 	_machine_halt = rbtx4938_machine_halt;
 	pm_power_off = rbtx4938_machine_power_off;
 
-	*rbtx4938_led_ptr = 0xff;
-	printk("RBTX4938 --- FPGA(Rev %02x)", *rbtx4938_fpga_rev_ptr);
-	printk(" DIPSW:%02x,%02x\n",
-	       *rbtx4938_dipsw_ptr, *rbtx4938_bdipsw_ptr);
+	writeb(0xff, rbtx4938_led_addr);
+	printk(KERN_INFO "RBTX4938 --- FPGA(Rev %02x) DIPSW:%02x,%02x\n",
+	       readb(rbtx4938_fpga_rev_addr),
+	       readb(rbtx4938_dipsw_addr), readb(rbtx4938_bdipsw_addr));
 }
 
 static int __init rbtx4938_ne_init(void)
@@ -984,106 +974,48 @@ device_initcall(rbtx4938_ne_init);
 
 /* GPIO support */
 
-static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock);
-
-static void rbtx4938_spi_gpio_set(unsigned gpio, int value)
+int gpio_to_irq(unsigned gpio)
 {
-	u8 val;
-	unsigned long flags;
-	gpio -= 16;
-	spin_lock_irqsave(&rbtx4938_spi_gpio_lock, flags);
-	val = *rbtx4938_spics_ptr;
-	if (value)
-		val |= 1 << gpio;
-	else
-		val &= ~(1 << gpio);
-	*rbtx4938_spics_ptr = val;
-	mmiowb();
-	spin_unlock_irqrestore(&rbtx4938_spi_gpio_lock, flags);
+	return -EINVAL;
 }
 
-static int rbtx4938_spi_gpio_dir_out(unsigned gpio, int value)
+int irq_to_gpio(unsigned irq)
 {
-	rbtx4938_spi_gpio_set(gpio, value);
-	return 0;
+	return -EINVAL;
 }
 
-static DEFINE_SPINLOCK(tx4938_gpio_lock);
-
-static int tx4938_gpio_get(unsigned gpio)
-{
-	return tx4938_pioptr->din & (1 << gpio);
-}
+static DEFINE_SPINLOCK(rbtx4938_spi_gpio_lock);
 
-static void tx4938_gpio_set_raw(unsigned gpio, int value)
+static void rbtx4938_spi_gpio_set(struct gpio_chip *chip, unsigned int offset,
+				  int value)
 {
-	u32 val;
-	val = tx4938_pioptr->dout;
+	u8 val;
+	unsigned long flags;
+	spin_lock_irqsave(&rbtx4938_spi_gpio_lock, flags);
+	val = readb(rbtx4938_spics_addr);
 	if (value)
-		val |= 1 << gpio;
+		val |= 1 << offset;
 	else
-		val &= ~(1 << gpio);
-	tx4938_pioptr->dout = val;
-}
-
-static void tx4938_gpio_set(unsigned gpio, int value)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&tx4938_gpio_lock, flags);
-	tx4938_gpio_set_raw(gpio, value);
-	mmiowb();
-	spin_unlock_irqrestore(&tx4938_gpio_lock, flags);
-}
-
-static int tx4938_gpio_dir_in(unsigned gpio)
-{
-	spin_lock_irq(&tx4938_gpio_lock);
-	tx4938_pioptr->dir &= ~(1 << gpio);
+		val &= ~(1 << offset);
+	writeb(val, rbtx4938_spics_addr);
 	mmiowb();
-	spin_unlock_irq(&tx4938_gpio_lock);
-	return 0;
-}
-
-static int tx4938_gpio_dir_out(unsigned int gpio, int value)
-{
-	spin_lock_irq(&tx4938_gpio_lock);
-	tx4938_gpio_set_raw(gpio, value);
-	tx4938_pioptr->dir |= 1 << gpio;
-	mmiowb();
-	spin_unlock_irq(&tx4938_gpio_lock);
-	return 0;
-}
-
-int gpio_direction_input(unsigned gpio)
-{
-	if (gpio < 16)
-		return tx4938_gpio_dir_in(gpio);
-	return -EINVAL;
-}
-
-int gpio_direction_output(unsigned gpio, int value)
-{
-	if (gpio < 16)
-		return tx4938_gpio_dir_out(gpio, value);
-	if (gpio < 16 + 3)
-		return rbtx4938_spi_gpio_dir_out(gpio, value);
-	return -EINVAL;
+	spin_unlock_irqrestore(&rbtx4938_spi_gpio_lock, flags);
 }
 
-int gpio_get_value(unsigned gpio)
+static int rbtx4938_spi_gpio_dir_out(struct gpio_chip *chip,
+				     unsigned int offset, int value)
 {
-	if (gpio < 16)
-		return tx4938_gpio_get(gpio);
+	rbtx4938_spi_gpio_set(chip, offset, value);
 	return 0;
 }
 
-void gpio_set_value(unsigned gpio, int value)
-{
-	if (gpio < 16)
-		tx4938_gpio_set(gpio, value);
-	else
-		rbtx4938_spi_gpio_set(gpio, value);
-}
+static struct gpio_chip rbtx4938_spi_gpio_chip = {
+	.set = rbtx4938_spi_gpio_set,
+	.direction_output = rbtx4938_spi_gpio_dir_out,
+	.label = "RBTX4938-SPICS",
+	.base = 16,
+	.ngpio = 3,
+};
 
 /* SPI support */
 
@@ -1094,7 +1026,6 @@ static void __init txx9_spi_init(unsigned long base, int irq)
 			.start	= base,
 			.end	= base + 0x20 - 1,
 			.flags	= IORESOURCE_MEM,
-			.parent	= &tx4938_reg_resource,
 		}, {
 			.start	= irq,
 			.flags	= IORESOURCE_IRQ,
@@ -1118,10 +1049,25 @@ static int __init rbtx4938_spi_init(void)
 	spi_eeprom_register(SEEPROM1_CS);
 	spi_eeprom_register(16 + SEEPROM2_CS);
 	spi_eeprom_register(16 + SEEPROM3_CS);
+	gpio_request(16 + SRTC_CS, "rtc-rs5c348");
+	gpio_direction_output(16 + SRTC_CS, 0);
+	gpio_request(SEEPROM1_CS, "seeprom1");
+	gpio_direction_output(SEEPROM1_CS, 1);
+	gpio_request(16 + SEEPROM2_CS, "seeprom2");
+	gpio_direction_output(16 + SEEPROM2_CS, 1);
+	gpio_request(16 + SEEPROM3_CS, "seeprom3");
+	gpio_direction_output(16 + SEEPROM3_CS, 1);
 	txx9_spi_init(TX4938_SPI_REG & 0xfffffffffULL, RBTX4938_IRQ_IRC_SPI);
 	return 0;
 }
-arch_initcall(rbtx4938_spi_init);
+
+static int __init rbtx4938_arch_init(void)
+{
+	txx9_gpio_init(TX4938_PIO_REG & 0xfffffffffULL, 0, 16);
+	gpiochip_add(&rbtx4938_spi_gpio_chip);
+	return rbtx4938_spi_init();
+}
+arch_initcall(rbtx4938_arch_init);
 
 /* Watchdog support */
 
@@ -1131,7 +1077,6 @@ static int __init txx9_wdt_init(unsigned long base)
 		.start	= base,
 		.end	= base + 0x100 - 1,
 		.flags	= IORESOURCE_MEM,
-		.parent	= &tx4938_reg_resource,
 	};
 	struct platform_device *dev =
 		platform_device_register_simple("txx9wdt", -1, &res, 1);
diff --git a/arch/mips/vr41xx/common/init.c b/arch/mips/vr41xx/common/init.c
index 76d4b5ed3fc0..c64995342ba8 100644
--- a/arch/mips/vr41xx/common/init.c
+++ b/arch/mips/vr41xx/common/init.c
@@ -1,7 +1,7 @@
 /*
  *  init.c, Common initialization routines for NEC VR4100 series.
  *
- *  Copyright (C) 2003-2005  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *  Copyright (C) 2003-2008  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -53,6 +53,8 @@ void __init plat_time_init(void)
 void __init plat_mem_setup(void)
 {
 	iomem_resource_init();
+
+	vr41xx_siu_setup();
 }
 
 void __init prom_init(void)
diff --git a/arch/mips/vr41xx/common/siu.c b/arch/mips/vr41xx/common/siu.c
index b735f45b25f0..654dee6208be 100644
--- a/arch/mips/vr41xx/common/siu.c
+++ b/arch/mips/vr41xx/common/siu.c
@@ -1,7 +1,7 @@
 /*
  *  NEC VR4100 series SIU platform device.
  *
- *  Copyright (C) 2007  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
+ *  Copyright (C) 2007-2008  Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -118,3 +118,37 @@ err_free_device:
 	return retval;
 }
 device_initcall(vr41xx_siu_add);
+
+void __init vr41xx_siu_setup(void)
+{
+	struct uart_port port;
+	struct resource *res;
+	unsigned int *type;
+	int i;
+
+	switch (current_cpu_type()) {
+	case CPU_VR4111:
+	case CPU_VR4121:
+		type = siu_type1_ports;
+		res = siu_type1_resource;
+		break;
+	case CPU_VR4122:
+	case CPU_VR4131:
+	case CPU_VR4133:
+		type = siu_type2_ports;
+		res = siu_type2_resource;
+		break;
+	default:
+		return;
+	}
+
+	for (i = 0; i < SIU_PORTS_MAX; i++) {
+		port.line = i;
+		port.type = type[i];
+		if (port.type == PORT_UNKNOWN)
+			break;
+		port.mapbase = res[i].start;
+		port.membase = (unsigned char __iomem *)KSEG1ADDR(res[i].start);
+		vr41xx_siu_early_setup(&port);
+	}
+}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index eb80f5e33d7d..1f012843150f 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -603,15 +603,18 @@ void show_mem(void)
 #ifdef CONFIG_DISCONTIGMEM
 	{
 		struct zonelist *zl;
-		int i, j, k;
+		int i, j;
 
 		for (i = 0; i < npmem_ranges; i++) {
+			zl = node_zonelist(i);
 			for (j = 0; j < MAX_NR_ZONES; j++) {
-				zl = NODE_DATA(i)->node_zonelists + j;
+				struct zoneref *z;
+				struct zone *zone;
 
 				printk("Zone list for zone %d on node %d: ", j, i);
-				for (k = 0; zl->zones[k] != NULL; k++) 
-					printk("[%d/%s] ", zone_to_nid(zl->zones[k]), zl->zones[k]->name);
+				for_each_zone_zonelist(zone, z, zl, j)
+					printk("[%d/%s] ", zone_to_nid(zone),
+								zone->name);
 				printk("\n");
 			}
 		}
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bb2e9310a56..4e40c122bf26 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -626,20 +626,6 @@ config ADVANCED_OPTIONS
 comment "Default settings for advanced configuration options are used"
 	depends on !ADVANCED_OPTIONS
 
-config HIGHMEM_START_BOOL
-	bool "Set high memory pool address"
-	depends on ADVANCED_OPTIONS && HIGHMEM
-	help
-	  This option allows you to set the base address of the kernel virtual
-	  area used to map high memory pages.  This can be useful in
-	  optimizing the layout of kernel virtual memory.
-
-	  Say N here unless you know what you are doing.
-
-config HIGHMEM_START
-	hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL
-	default "0xfe000000"
-
 config LOWMEM_SIZE_BOOL
 	bool "Set maximum low memory"
 	depends on ADVANCED_OPTIONS
@@ -656,21 +642,76 @@ config LOWMEM_SIZE
 	hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL
 	default "0x30000000"
 
+config RELOCATABLE
+	bool "Build a relocatable kernel (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE
+	help
+	  This builds a kernel image that is capable of running at the
+	  location the kernel is loaded at (some alignment restrictions may
+	  exist).
+
+	  One use is for the kexec on panic case where the recovery kernel
+	  must live at a different physical address than the primary
+	  kernel.
+
+	  Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
+	  it has been loaded at and the compile time physical addresses
+	  CONFIG_PHYSICAL_START is ignored.  However CONFIG_PHYSICAL_START
+	  setting can still be useful to bootwrappers that need to know the
+	  load location of the kernel (eg. u-boot/mkimage).
+
+config PAGE_OFFSET_BOOL
+	bool "Set custom page offset address"
+	depends on ADVANCED_OPTIONS
+	help
+	  This option allows you to set the kernel virtual address at which
+	  the kernel will map low memory.  This can be useful in optimizing
+	  the virtual memory layout of the system.
+
+	  Say N here unless you know what you are doing.
+
+config PAGE_OFFSET
+	hex "Virtual address of memory base" if PAGE_OFFSET_BOOL
+	default "0xc0000000"
+
 config KERNEL_START_BOOL
 	bool "Set custom kernel base address"
 	depends on ADVANCED_OPTIONS
 	help
 	  This option allows you to set the kernel virtual address at which
-	  the kernel will map low memory (the kernel image will be linked at
-	  this address).  This can be useful in optimizing the virtual memory
-	  layout of the system.
+	  the kernel will be loaded.  Normally this should match PAGE_OFFSET
+	  however there are times (like kdump) that one might not want them
+	  to be the same.
 
 	  Say N here unless you know what you are doing.
 
 config KERNEL_START
 	hex "Virtual address of kernel base" if KERNEL_START_BOOL
+	default PAGE_OFFSET if PAGE_OFFSET_BOOL
+	default "0xc2000000" if CRASH_DUMP
 	default "0xc0000000"
 
+config PHYSICAL_START_BOOL
+	bool "Set physical address where the kernel is loaded"
+	depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE
+	help
+	  This gives the physical address where the kernel is loaded.
+
+	  Say N here unless you know what you are doing.
+
+config PHYSICAL_START
+	hex "Physical address where the kernel is loaded" if PHYSICAL_START_BOOL
+	default "0x02000000" if PPC_STD_MMU && CRASH_DUMP
+	default "0x00000000"
+
+config PHYSICAL_ALIGN
+	hex
+	default "0x10000000" if FSL_BOOKE
+	help
+	  This value puts the alignment restrictions on physical address
+	  where kernel is loaded and run from. Kernel is compiled for an
+	  address which meets above alignment restriction.
+
 config TASK_SIZE_BOOL
 	bool "Set custom user task size"
 	depends on ADVANCED_OPTIONS
@@ -717,9 +758,17 @@ config PIN_TLB
 endmenu
 
 if PPC64
+config PAGE_OFFSET
+	hex
+	default "0xc000000000000000"
 config KERNEL_START
 	hex
+	default "0xc000000002000000" if CRASH_DUMP
 	default "0xc000000000000000"
+config PHYSICAL_START
+	hex
+	default "0x02000000" if CRASH_DUMP
+	default "0x00000000"
 endif
 
 source "net/Kconfig"
@@ -754,3 +803,4 @@ config PPC_CLOCK
 config PPC_LIB_RHEAP
 	bool
 
+source "arch/powerpc/kvm/Kconfig"
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index a86d8d853214..807a2dce6263 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -151,6 +151,9 @@ config BOOTX_TEXT
 
 config PPC_EARLY_DEBUG
 	bool "Early debugging (dangerous)"
+	# PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water
+	# mark, which doesn't work with current 440 KVM.
+	depends on !KVM
 	help
 	  Say Y to enable some early debugging facilities that may be available
 	  for your processor/board combination. Those facilities are hacks
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index e2ec4a91ccef..9dcdc036cdf7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -145,6 +145,7 @@ core-y				+= arch/powerpc/kernel/ \
 				   arch/powerpc/platforms/
 core-$(CONFIG_MATH_EMULATION)	+= arch/powerpc/math-emu/
 core-$(CONFIG_XMON)		+= arch/powerpc/xmon/
+core-$(CONFIG_KVM) 		+= arch/powerpc/kvm/
 
 drivers-$(CONFIG_OPROFILE)	+= arch/powerpc/oprofile/
 
diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore
index 5ef2bdf8d189..2347294ff35b 100644
--- a/arch/powerpc/boot/.gitignore
+++ b/arch/powerpc/boot/.gitignore
@@ -27,6 +27,7 @@ zImage.chrp
 zImage.coff
 zImage.coff.lds
 zImage.ep*
+zImage.iseries
 zImage.*lds
 zImage.miboot
 zImage.pmac
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 5ba50c673390..7822d25c9d31 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -40,7 +40,7 @@ $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
 $(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405
-$(obj)/virtex405-head.o: BOOTCFLAGS += -mcpu=405
+$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405
 
 
 zlib       := inffast.c inflate.c inftrees.c
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
index 6f3d38a1554f..39634124929b 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -142,8 +142,45 @@
 				#address-cells = <2>;
 				#size-cells = <1>;
 				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
 				interrupts = <6 4>;
 				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0 000000 4000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0 1e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <1e0000 20000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <200000 1400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <1600000 400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <1a00000 2560000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <3f60000 40000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <3fa0000 60000>;
+					};
+				};
 			};
 
 			UART0: serial@ef600300 {
diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts
index 958a5ca53d35..0f2fc077d8db 100644
--- a/arch/powerpc/boot/dts/glacier.dts
+++ b/arch/powerpc/boot/dts/glacier.dts
@@ -145,8 +145,45 @@
 				#address-cells = <2>;
 				#size-cells = <1>;
 				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
 				interrupts = <6 4>;
 				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0 000000 4000000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0 1e0000>;
+					};
+					partition@1e0000 {
+						label = "dtb";
+						reg = <1e0000 20000>;
+					};
+					partition@200000 {
+						label = "ramdisk";
+						reg = <200000 1400000>;
+					};
+					partition@1600000 {
+						label = "jffs2";
+						reg = <1600000 400000>;
+					};
+					partition@1a00000 {
+						label = "user";
+						reg = <1a00000 2560000>;
+					};
+					partition@3f60000 {
+						label = "env";
+						reg = <3f60000 40000>;
+					};
+					partition@3fa0000 {
+						label = "u-boot";
+						reg = <3fa0000 60000>;
+					};
+				};
 			};
 
 			UART0: serial@ef600300 {
diff --git a/arch/powerpc/boot/dts/mpc8610_hpcd.dts b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
index 16c947b8a721..1f2f1e0a5571 100644
--- a/arch/powerpc/boot/dts/mpc8610_hpcd.dts
+++ b/arch/powerpc/boot/dts/mpc8610_hpcd.dts
@@ -45,6 +45,11 @@
 		reg = <0x00000000 0x20000000>;	// 512M at 0x0
 	};
 
+	board-control@e8000000 {
+		compatible = "fsl,fpga-pixis";
+		reg = <0xe8000000 32>;		// pixis at 0xe8000000
+	};
+
 	soc@e0000000 {
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -104,6 +109,13 @@
 			interrupt-parent = <&mpic>;
 		};
 
+		display@2c000 {
+			compatible = "fsl,diu";
+			reg = <0x2c000 100>;
+			interrupts = <72 2>;
+			interrupt-parent = <&mpic>;
+		};
+
 		mpic: interrupt-controller@40000 {
 			clock-frequency = <0>;
 			interrupt-controller;
diff --git a/arch/powerpc/boot/ns16550.c b/arch/powerpc/boot/ns16550.c
index aef3bdc89160..8c9ead94be06 100644
--- a/arch/powerpc/boot/ns16550.c
+++ b/arch/powerpc/boot/ns16550.c
@@ -55,10 +55,15 @@ static u8 ns16550_tstc(void)
 int ns16550_console_init(void *devp, struct serial_console_data *scdp)
 {
 	int n;
+	u32 reg_offset;
 
 	if (dt_get_virtual_reg(devp, (void **)&reg_base, 1) < 1)
 		return -1;
 
+	n = getprop(devp, "reg-offset", &reg_offset, sizeof(reg_offset));
+	if (n == sizeof(reg_offset))
+		reg_base += reg_offset;
+
 	n = getprop(devp, "reg-shift", &reg_shift, sizeof(reg_shift));
 	if (n != sizeof(reg_shift))
 		reg_shift = 0;
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index ce1e8d24e747..9177b21b1a95 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -106,4 +106,13 @@ PHONY += systbl_chk
 systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i
 	$(call cmd,systbl_chk)
 
+$(obj)/built-in.o:		prom_init_check
+
+quiet_cmd_prom_init_check = CALL    $<
+      cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+
+PHONY += prom_init_check
+prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
+	$(call cmd,prom_init_check)
+
 clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 292c6d8db0e1..62134845af08 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -23,6 +23,9 @@
 #include <linux/mm.h>
 #include <linux/suspend.h>
 #include <linux/hrtimer.h>
+#ifdef CONFIG_KVM
+#include <linux/kvm_host.h>
+#endif
 #ifdef CONFIG_PPC64
 #include <linux/time.h>
 #include <linux/hardirq.h>
@@ -93,10 +96,7 @@ int main(void)
 	DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags));
 	DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
-#ifdef CONFIG_PPC32
-	DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-#endif /* CONFIG_PPC32 */
 
 #ifdef CONFIG_PPC64
 	DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size));
@@ -165,13 +165,9 @@ int main(void)
 
 	/* Interrupt register frame */
 	DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD);
-#ifndef CONFIG_PPC64
-	DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#else /* CONFIG_PPC64 */
+	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
+#ifdef CONFIG_PPC64
 	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-	/* 288 = # of volatile regs, int & fp, for leaf routines */
-	/* which do not stack a frame.  See the PPC64 ABI.       */
-	DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288);
 	/* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
 	DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
 	DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
@@ -331,5 +327,30 @@ int main(void)
 
 	DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE);
 
+#ifdef CONFIG_KVM
+	DEFINE(TLBE_BYTES, sizeof(struct tlbe));
+
+	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
+	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
+	DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb));
+	DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb));
+	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
+	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
+	DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
+	DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
+	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
+	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
+	DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
+	DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
+	DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
+	DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
+	DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid));
+
+	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
+	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
+	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
+#endif
+
 	return 0;
 }
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
index 5465e8de0e61..e3623e3e3451 100644
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -33,7 +33,6 @@ _GLOBAL(__setup_cpu_440grx)
 	mtlr	r4
 	blr
 _GLOBAL(__setup_cpu_460ex)
-_GLOBAL(__setup_cpu_460gt)
 	b	__init_fpu_44x
 _GLOBAL(__setup_cpu_440gx)
 _GLOBAL(__setup_cpu_440spe)
diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f1ee0b3f78f2..72d1d7395254 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -17,7 +17,13 @@
 #include <asm/cache.h>
 
 _GLOBAL(__setup_cpu_603)
-	b	setup_common_caches
+	mflr	r4
+BEGIN_FTR_SECTION
+	bl	__init_fpu_registers
+END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE)
+	bl	setup_common_caches
+	mtlr	r4
+	blr
 _GLOBAL(__setup_cpu_604)
 	mflr	r4
 	bl	setup_common_caches
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 26ffb44e2701..36080d4d1922 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -37,7 +37,6 @@ extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
-extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
@@ -1416,10 +1415,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.pvr_value		= 0x13020000,
 		.cpu_name		= "460GT",
 		.cpu_features		= CPU_FTRS_44X,
-		.cpu_user_features	= COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU,
+		.cpu_user_features	= COMMON_USER_BOOKE,
 		.icache_bsize		= 32,
 		.dcache_bsize		= 32,
-		.cpu_setup		= __setup_cpu_460gt,
 		.machine_check		= machine_check_440A,
 		.platform		= "ppc440",
 	},
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 4ff744143566..e581524d85bc 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -371,6 +371,17 @@ skpinv:	addi	r6,r6,1				/* Increment */
 
 	bl	early_init
 
+#ifdef CONFIG_RELOCATABLE
+	lis	r3,kernstart_addr@ha
+	la	r3,kernstart_addr@l(r3)
+#ifdef CONFIG_PHYS_64BIT
+	stw	r23,0(r3)
+	stw	r25,4(r3)
+#else
+	stw	r25,0(r3)
+#endif
+#endif
+
 	mfspr	r3,SPRN_TLB1CFG
 	andi.	r3,r3,0xfff
 	lis	r4,num_tlbcam_entries@ha
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 9d2c56621f1e..92ccc6fcc5b0 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -152,7 +152,7 @@ _GLOBAL(low_choose_750fx_pll)
 	mtspr	SPRN_HID1,r4
 
 	/* Store new HID1 image */
-	rlwinm	r6,r1,0,0,18
+	rlwinm	r6,r1,0,0,(31-THREAD_SHIFT)
 	lwz	r6,TI_CPU(r6)
 	slwi	r6,r6,2
 	addis	r6,r6,nap_save_hid1@ha
@@ -281,7 +281,7 @@ _GLOBAL(_tlbia)
 #endif /* CONFIG_SMP */
 #else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
 #if defined(CONFIG_SMP)
-	rlwinm	r8,r1,0,0,18
+	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT)
 	lwz	r8,TI_CPU(r8)
 	oris	r8,r8,10
 	mfmsr	r10
@@ -377,7 +377,7 @@ _GLOBAL(_tlbie)
 #endif /* CONFIG_SMP */
 #else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */
 #if defined(CONFIG_SMP)
-	rlwinm	r8,r1,0,0,18
+	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT)
 	lwz	r8,TI_CPU(r8)
 	oris	r8,r8,11
 	mfmsr	r10
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index a3c491e88a72..942951e76586 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -27,23 +27,11 @@
 
 	.text
 
-_GLOBAL(get_msr)
-	mfmsr	r3
-	blr
-
-_GLOBAL(get_srr0)
-	mfsrr0  r3
-	blr
-
-_GLOBAL(get_srr1)
-	mfsrr1  r3
-	blr
-
 #ifdef CONFIG_IRQSTACKS
 _GLOBAL(call_do_softirq)
 	mflr	r0
 	std	r0,16(r1)
-	stdu	r1,THREAD_SIZE-112(r3)
+	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
 	mr	r1,r3
 	bl	.__do_softirq
 	ld	r1,0(r1)
@@ -56,7 +44,7 @@ _GLOBAL(call_handle_irq)
 	mflr	r0
 	std	r0,16(r1)
 	mtctr	r8
-	stdu	r1,THREAD_SIZE-112(r5)
+	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5)
 	mr	r1,r5
 	bctrl
 	ld	r1,0(r1)
@@ -599,7 +587,7 @@ _GLOBAL(kexec_sequence)
 	std	r0,16(r1)
 
 	/* switch stacks to newstack -- &kexec_stack.stack */
-	stdu	r1,THREAD_SIZE-112(r3)
+	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
 	mr	r1,r3
 
 	li	r0,0
@@ -616,7 +604,7 @@ _GLOBAL(kexec_sequence)
 	std	r26,-48(r1)
 	std	r25,-56(r1)
 
-	stdu	r1,-112-64(r1)
+	stdu	r1,-STACK_FRAME_OVERHEAD-64(r1)
 
 	/* save args into preserved regs */
 	mr	r31,r3			/* newstack (both) */
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index fb698d47082d..e79ad8afda07 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -275,6 +275,8 @@ static int __devinit of_pci_phb_probe(struct of_device *dev,
 
 	/* Scan the bus */
 	scan_phb(phb);
+	if (phb->bus == NULL)
+		return -ENXIO;
 
 	/* Claim resources. This might need some rework as well depending
 	 * wether we are doing probe-only or not, like assigning unassigned
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index ac163bd46cfd..c9bf17eec31b 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -7,17 +7,11 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#include <linux/types.h>
 #include <linux/threads.h>
 #include <linux/module.h>
 
-#include <asm/processor.h>
-#include <asm/ptrace.h>
-#include <asm/page.h>
 #include <asm/lppaca.h>
 #include <asm/paca.h>
-#include <asm/mmu.h>
-
 
 /* This symbol is provided by the linker - let it fill in the paca
  * field correctly */
@@ -65,60 +59,29 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = {
  * processors.  The processor VPD array needs one entry per physical
  * processor (not thread).
  */
-#define PACA_INIT(number)						    \
-{									    \
-	.lppaca_ptr = &lppaca[number],					    \
-	.lock_token = 0x8000,						    \
-	.paca_index = (number),		/* Paca Index */		    \
-	.kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL,		    \
-	.hw_cpu_id = 0xffff,						    \
-	.slb_shadow_ptr = &slb_shadow[number],				    \
-	.__current = &init_task,					    \
-}
-
-struct paca_struct paca[] = {
-	PACA_INIT(0),
-#if NR_CPUS > 1
-	PACA_INIT(  1), PACA_INIT(  2), PACA_INIT(  3),
-#if NR_CPUS > 4
-	PACA_INIT(  4), PACA_INIT(  5), PACA_INIT(  6), PACA_INIT(  7),
-#if NR_CPUS > 8
-	PACA_INIT(  8), PACA_INIT(  9), PACA_INIT( 10), PACA_INIT( 11),
-	PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15),
-	PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19),
-	PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23),
-	PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27),
-	PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31),
-#if NR_CPUS > 32
-	PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35),
-	PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39),
-	PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43),
-	PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47),
-	PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51),
-	PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55),
-	PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59),
-	PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63),
-#if NR_CPUS > 64
-	PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67),
-	PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71),
-	PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75),
-	PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79),
-	PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83),
-	PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87),
-	PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91),
-	PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95),
-	PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99),
-	PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103),
-	PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107),
-	PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111),
-	PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115),
-	PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119),
-	PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123),
-	PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127),
-#endif
-#endif
-#endif
-#endif
-#endif
-};
+struct paca_struct paca[NR_CPUS];
 EXPORT_SYMBOL(paca);
+
+void __init initialise_pacas(void)
+{
+	int cpu;
+
+	/* The TOC register (GPR2) points 32kB into the TOC, so that 64kB
+	 * of the TOC can be addressed using a single machine instruction.
+	 */
+	unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
+
+	/* Can't use for_each_*_cpu, as they aren't functional yet */
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+		struct paca_struct *new_paca = &paca[cpu];
+
+		new_paca->lppaca_ptr = &lppaca[cpu];
+		new_paca->lock_token = 0x8000;
+		new_paca->paca_index = cpu;
+		new_paca->kernel_toc = kernel_toc;
+		new_paca->hw_cpu_id = 0xffff;
+		new_paca->slb_shadow_ptr = &slb_shadow[cpu];
+		new_paca->__current = &init_task;
+
+	}
+}
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index fda05e2211d6..90e562771791 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -135,6 +135,4 @@ struct ucontext32 {
 	struct mcontext32	uc_mcontext;
 };
 
-extern int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s);
-
 #endif  /* _PPC64_PPC32_H */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 703100d5e458..6caad17ea72e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1033,3 +1033,34 @@ void ppc64_runlatch_off(void)
 	}
 }
 #endif
+
+#if THREAD_SHIFT < PAGE_SHIFT
+
+static struct kmem_cache *thread_info_cache;
+
+struct thread_info *alloc_thread_info(struct task_struct *tsk)
+{
+	struct thread_info *ti;
+
+	ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL);
+	if (unlikely(ti == NULL))
+		return NULL;
+#ifdef CONFIG_DEBUG_STACK_USAGE
+	memset(ti, 0, THREAD_SIZE);
+#endif
+	return ti;
+}
+
+void free_thread_info(struct thread_info *ti)
+{
+	kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+	thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+					      THREAD_SIZE, 0, NULL);
+	BUG_ON(thread_info_cache == NULL);
+}
+
+#endif /* THREAD_SHIFT < PAGE_SHIFT */
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 3bfe7837e820..2aefe2a4129a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -53,6 +53,7 @@
 #include <asm/pci-bridge.h>
 #include <asm/phyp_dump.h>
 #include <asm/kexec.h>
+#include <mm/mmu_decl.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) printk(KERN_ERR fmt)
@@ -978,7 +979,10 @@ static int __init early_init_dt_scan_memory(unsigned long node,
 		}
 #endif
 		lmb_add(base, size);
+
+		memstart_addr = min((u64)memstart_addr, base);
 	}
+
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
new file mode 100644
index 000000000000..8e24fc1821e8
--- /dev/null
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -0,0 +1,58 @@
+#!/bin/sh
+#
+# Copyright © 2008 IBM Corporation
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version
+# 2 of the License, or (at your option) any later version.
+
+# This script checks prom_init.o to see what external symbols it
+# is using, if it finds symbols not in the whitelist it returns
+# an error. The point of this is to discourage people from
+# intentionally or accidentally adding new code to prom_init.c
+# which has side effects on other parts of the kernel.
+
+# If you really need to reference something from prom_init.o add
+# it to the list below:
+
+WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
+_end enter_prom memcpy memset reloc_offset __secondary_hold
+__secondary_hold_acknowledge __secondary_hold_spinloop __start
+strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
+reloc_got2"
+
+NM="$1"
+OBJ="$2"
+
+ERROR=0
+
+for UNDEF in $($NM -u $OBJ | awk '{print $2}')
+do
+	# On 64-bit nm gives us the function descriptors, which have
+	# a leading . on the name, so strip it off here.
+	UNDEF="${UNDEF#.}"
+
+	if [ $KBUILD_VERBOSE ]; then
+		if [ $KBUILD_VERBOSE -ne 0 ]; then
+			echo "Checking prom_init.o symbol '$UNDEF'"
+		fi
+	fi
+
+	OK=0
+	for WHITE in $WHITELIST
+	do
+		if [ "$UNDEF" = "$WHITE" ]; then
+			OK=1
+			break
+		fi
+	done
+
+	if [ $OK -eq 0 ]; then
+		ERROR=1
+		echo "Error: External symbol '$UNDEF' referenced" \
+		     "from prom_init.c" >&2
+	fi
+done
+
+exit $ERROR
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index 9d30e10970ac..4c1de6af4c09 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -29,15 +29,12 @@
 #include <linux/security.h>
 #include <linux/signal.h>
 #include <linux/compat.h>
-#include <linux/elf.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
 
-#include "ppc32.h"
-
 /*
  * does not yet catch signals sent when the child dies.
  * in exit.c or in signal.c.
@@ -67,27 +64,6 @@ static long compat_ptrace_old(struct task_struct *child, long request,
 	return -EPERM;
 }
 
-static int compat_ptrace_getsiginfo(struct task_struct *child, compat_siginfo_t __user *data)
-{
-	siginfo_t lastinfo;
-	int error = -ESRCH;
-
-	read_lock(&tasklist_lock);
-	if (likely(child->sighand != NULL)) {
-		error = -EINVAL;
-		spin_lock_irq(&child->sighand->siglock);
-		if (likely(child->last_siginfo != NULL)) {
-			lastinfo = *child->last_siginfo;
-			error = 0;
-		}
-		spin_unlock_irq(&child->sighand->siglock);
-	}
-	read_unlock(&tasklist_lock);
-	if (!error)
-		return copy_siginfo_to_user32(data, &lastinfo);
-	return error;
-}
-
 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			compat_ulong_t caddr, compat_ulong_t cdata)
 {
@@ -306,9 +282,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			0, PT_REGS_COUNT * sizeof(compat_long_t),
 			compat_ptr(data));
 
-	case PTRACE_GETSIGINFO:
-		return compat_ptrace_getsiginfo(child, compat_ptr(data));
-
 	case PTRACE_GETFPREGS:
 	case PTRACE_SETFPREGS:
 	case PTRACE_GETVRREGS:
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 31ada9fdfc5c..dff6308d1b5e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -170,6 +170,9 @@ void __init setup_paca(int cpu)
 
 void __init early_setup(unsigned long dt_ptr)
 {
+	/* Fill in any unititialised pacas */
+	initialise_pacas();
+
 	/* Identify CPU type */
 	identify_cpu(0, mfspr(SPRN_PVR));
 
@@ -435,7 +438,7 @@ void __init setup_system(void)
 		printk("htab_address                  = 0x%p\n", htab_address);
 	printk("htab_hash_mask                = 0x%lx\n", htab_hash_mask);
 #if PHYSICAL_START > 0
-	printk("physical_start                = 0x%x\n", PHYSICAL_START);
+	printk("physical_start                = 0x%lx\n", PHYSICAL_START);
 #endif
 	printk("-----------------------------------------------------\n");
 
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index e3638eeaaae7..962944038430 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/stacktrace.h>
 #include <asm/ptrace.h>
-#include <asm/asm-offsets.h>
 
 /*
  * Save stack-backtrace addresses into a stack_trace buffer.
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index 7aad6203e411..7d6c9bb8c77f 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -154,8 +154,8 @@ static void udbg_console_write(struct console *con, const char *s,
 static struct console udbg_console = {
 	.name	= "udbg",
 	.write	= udbg_console_write,
-	.flags	= CON_PRINTBUFFER | CON_ENABLED | CON_BOOT,
-	.index	= -1,
+	.flags	= CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME,
+	.index	= 0,
 };
 
 static int early_console_initialized;
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
new file mode 100644
index 000000000000..f5d7a5eab96e
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -0,0 +1,224 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+#include <linux/highmem.h>
+#include <asm/mmu-44x.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
+#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
+
+static unsigned int kvmppc_tlb_44x_pos;
+
+static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
+{
+	/* Mask off reserved bits. */
+	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK;
+
+	if (!usermode) {
+		/* Guest is in supervisor mode, so we need to translate guest
+		 * supervisor permissions into user permissions. */
+		attrib &= ~PPC44x_TLB_USER_PERM_MASK;
+		attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
+	}
+
+	/* Make sure host can always access this memory. */
+	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
+
+	return attrib;
+}
+
+/* Search the guest TLB for a matching entry. */
+int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
+                         unsigned int as)
+{
+	int i;
+
+	/* XXX Replace loop with fancy data structures. */
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		struct tlbe *tlbe = &vcpu->arch.guest_tlb[i];
+		unsigned int tid;
+
+		if (eaddr < get_tlb_eaddr(tlbe))
+			continue;
+
+		if (eaddr > get_tlb_end(tlbe))
+			continue;
+
+		tid = get_tlb_tid(tlbe);
+		if (tid && (tid != pid))
+			continue;
+
+		if (!get_tlb_v(tlbe))
+			continue;
+
+		if (get_tlb_ts(tlbe) != as)
+			continue;
+
+		return i;
+	}
+
+	return -1;
+}
+
+struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+	unsigned int index;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+	if (index == -1)
+		return NULL;
+	return &vcpu->arch.guest_tlb[index];
+}
+
+struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr)
+{
+	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+	unsigned int index;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
+	if (index == -1)
+		return NULL;
+	return &vcpu->arch.guest_tlb[index];
+}
+
+static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
+{
+	return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
+}
+
+/* Must be called with mmap_sem locked for writing. */
+static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
+                                      unsigned int index)
+{
+	struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index];
+	struct page *page = vcpu->arch.shadow_pages[index];
+
+	kunmap(vcpu->arch.shadow_pages[index]);
+
+	if (get_tlb_v(stlbe)) {
+		if (kvmppc_44x_tlbe_is_writable(stlbe))
+			kvm_release_page_dirty(page);
+		else
+			kvm_release_page_clean(page);
+	}
+}
+
+/* Caller must ensure that the specified guest TLB entry is safe to insert into
+ * the shadow TLB. */
+void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid,
+                    u32 flags)
+{
+	struct page *new_page;
+	struct tlbe *stlbe;
+	hpa_t hpaddr;
+	unsigned int victim;
+
+	/* Future optimization: don't overwrite the TLB entry containing the
+	 * current PC (or stack?). */
+	victim = kvmppc_tlb_44x_pos++;
+	if (kvmppc_tlb_44x_pos > tlb_44x_hwater)
+		kvmppc_tlb_44x_pos = 0;
+	stlbe = &vcpu->arch.shadow_tlb[victim];
+
+	/* Get reference to new page. */
+	down_write(&current->mm->mmap_sem);
+	new_page = gfn_to_page(vcpu->kvm, gfn);
+	if (is_error_page(new_page)) {
+		printk(KERN_ERR "Couldn't get guest page!\n");
+		kvm_release_page_clean(new_page);
+		return;
+	}
+	hpaddr = page_to_phys(new_page);
+
+	/* Drop reference to old page. */
+	kvmppc_44x_shadow_release(vcpu, victim);
+	up_write(&current->mm->mmap_sem);
+
+	vcpu->arch.shadow_pages[victim] = new_page;
+
+	/* XXX Make sure (va, size) doesn't overlap any other
+	 * entries. 440x6 user manual says the result would be
+	 * "undefined." */
+
+	/* XXX what about AS? */
+
+	stlbe->tid = asid & 0xff;
+
+	/* Force TS=1 for all guest mappings. */
+	/* For now we hardcode 4KB mappings, but it will be important to
+	 * use host large pages in the future. */
+	stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS
+	               | PPC44x_TLB_4K;
+
+	stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
+	stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags,
+	                                            vcpu->arch.msr & MSR_PR);
+}
+
+void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid)
+{
+	unsigned int pid = asid & 0xff;
+	int i;
+
+	/* XXX Replace loop with fancy data structures. */
+	down_write(&current->mm->mmap_sem);
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
+		unsigned int tid;
+
+		if (!get_tlb_v(stlbe))
+			continue;
+
+		if (eaddr < get_tlb_eaddr(stlbe))
+			continue;
+
+		if (eaddr > get_tlb_end(stlbe))
+			continue;
+
+		tid = get_tlb_tid(stlbe);
+		if (tid && (tid != pid))
+			continue;
+
+		kvmppc_44x_shadow_release(vcpu, i);
+		stlbe->word0 = 0;
+	}
+	up_write(&current->mm->mmap_sem);
+}
+
+/* Invalidate all mappings, so that when they fault back in they will get the
+ * proper permission bits. */
+void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode)
+{
+	int i;
+
+	/* XXX Replace loop with fancy data structures. */
+	down_write(&current->mm->mmap_sem);
+	for (i = 0; i <= tlb_44x_hwater; i++) {
+		kvmppc_44x_shadow_release(vcpu, i);
+		vcpu->arch.shadow_tlb[i].word0 = 0;
+	}
+	up_write(&current->mm->mmap_sem);
+}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
new file mode 100644
index 000000000000..2ccd46b6f6b7
--- /dev/null
+++ b/arch/powerpc/kvm/44x_tlb.h
@@ -0,0 +1,91 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __KVM_POWERPC_TLB_H__
+#define __KVM_POWERPC_TLB_H__
+
+#include <linux/kvm_host.h>
+#include <asm/mmu-44x.h>
+
+extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
+                                unsigned int pid, unsigned int as);
+extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr);
+
+/* TLB helper functions */
+static inline unsigned int get_tlb_size(const struct tlbe *tlbe)
+{
+	return (tlbe->word0 >> 4) & 0xf;
+}
+
+static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe)
+{
+	return tlbe->word0 & 0xfffffc00;
+}
+
+static inline gva_t get_tlb_bytes(const struct tlbe *tlbe)
+{
+	unsigned int pgsize = get_tlb_size(tlbe);
+	return 1 << 10 << (pgsize << 1);
+}
+
+static inline gva_t get_tlb_end(const struct tlbe *tlbe)
+{
+	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
+}
+
+static inline u64 get_tlb_raddr(const struct tlbe *tlbe)
+{
+	u64 word1 = tlbe->word1;
+	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
+}
+
+static inline unsigned int get_tlb_tid(const struct tlbe *tlbe)
+{
+	return tlbe->tid & 0xff;
+}
+
+static inline unsigned int get_tlb_ts(const struct tlbe *tlbe)
+{
+	return (tlbe->word0 >> 8) & 0x1;
+}
+
+static inline unsigned int get_tlb_v(const struct tlbe *tlbe)
+{
+	return (tlbe->word0 >> 9) & 0x1;
+}
+
+static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.mmucr & 0xff;
+}
+
+static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
+{
+	return (vcpu->arch.mmucr >> 16) & 0x1;
+}
+
+static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr)
+{
+	unsigned int pgmask = get_tlb_bytes(tlbe) - 1;
+
+	return get_tlb_raddr(tlbe) | (eaddr & pgmask);
+}
+
+#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
new file mode 100644
index 000000000000..6b076010213b
--- /dev/null
+++ b/arch/powerpc/kvm/Kconfig
@@ -0,0 +1,42 @@
+#
+# KVM configuration
+#
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	---help---
+	  Say Y here to get to see options for using your Linux host to run
+	  other operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and
+	  disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	bool "Kernel-based Virtual Machine (KVM) support"
+	depends on 44x && EXPERIMENTAL
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	# We can only run on Book E hosts so far
+	select KVM_BOOKE_HOST
+	---help---
+	  Support hosting virtualized guest machines. You will also
+	  need to select one or more of the processor modules below.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  If unsure, say N.
+
+config KVM_BOOKE_HOST
+	bool "KVM host support for Book E PowerPC processors"
+	depends on KVM && 44x
+	---help---
+	  Provides host support for KVM on Book E PowerPC processors. Currently
+	  this works on 440 processors only.
+
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
new file mode 100644
index 000000000000..d0d358d367ec
--- /dev/null
+++ b/arch/powerpc/kvm/Makefile
@@ -0,0 +1,15 @@
+#
+# Makefile for Kernel-based Virtual Machine module
+#
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o
+obj-$(CONFIG_KVM) += kvm.o
+
+AFLAGS_booke_interrupts.o := -I$(obj)
+
+kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o
+obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o
diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
new file mode 100644
index 000000000000..6d9884a6884a
--- /dev/null
+++ b/arch/powerpc/kvm/booke_guest.c
@@ -0,0 +1,615 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "exits",      VCPU_STAT(sum_exits) },
+	{ "mmio",       VCPU_STAT(mmio_exits) },
+	{ "dcr",        VCPU_STAT(dcr_exits) },
+	{ "sig",        VCPU_STAT(signal_exits) },
+	{ "light",      VCPU_STAT(light_exits) },
+	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
+	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
+	{ "dtlb_r",     VCPU_STAT(dtlb_real_miss_exits) },
+	{ "dtlb_v",     VCPU_STAT(dtlb_virt_miss_exits) },
+	{ "sysc",       VCPU_STAT(syscall_exits) },
+	{ "isi",        VCPU_STAT(isi_exits) },
+	{ "dsi",        VCPU_STAT(dsi_exits) },
+	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
+	{ "dec",        VCPU_STAT(dec_exits) },
+	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
+	{ NULL }
+};
+
+static const u32 interrupt_msr_mask[16] = {
+	[BOOKE_INTERRUPT_CRITICAL]      = MSR_ME,
+	[BOOKE_INTERRUPT_MACHINE_CHECK] = 0,
+	[BOOKE_INTERRUPT_DATA_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_INST_STORAGE]  = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_EXTERNAL]      = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_ALIGNMENT]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_PROGRAM]       = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_FP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_SYSCALL]       = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_AP_UNAVAIL]    = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_DECREMENTER]   = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_FIT]           = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_WATCHDOG]      = MSR_ME,
+	[BOOKE_INTERRUPT_DTLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_ITLB_MISS]     = MSR_CE|MSR_ME|MSR_DE,
+	[BOOKE_INTERRUPT_DEBUG]         = MSR_ME,
+};
+
+const unsigned char exception_priority[] = {
+	[BOOKE_INTERRUPT_DATA_STORAGE] = 0,
+	[BOOKE_INTERRUPT_INST_STORAGE] = 1,
+	[BOOKE_INTERRUPT_ALIGNMENT] = 2,
+	[BOOKE_INTERRUPT_PROGRAM] = 3,
+	[BOOKE_INTERRUPT_FP_UNAVAIL] = 4,
+	[BOOKE_INTERRUPT_SYSCALL] = 5,
+	[BOOKE_INTERRUPT_AP_UNAVAIL] = 6,
+	[BOOKE_INTERRUPT_DTLB_MISS] = 7,
+	[BOOKE_INTERRUPT_ITLB_MISS] = 8,
+	[BOOKE_INTERRUPT_MACHINE_CHECK] = 9,
+	[BOOKE_INTERRUPT_DEBUG] = 10,
+	[BOOKE_INTERRUPT_CRITICAL] = 11,
+	[BOOKE_INTERRUPT_WATCHDOG] = 12,
+	[BOOKE_INTERRUPT_EXTERNAL] = 13,
+	[BOOKE_INTERRUPT_FIT] = 14,
+	[BOOKE_INTERRUPT_DECREMENTER] = 15,
+};
+
+const unsigned char priority_exception[] = {
+	BOOKE_INTERRUPT_DATA_STORAGE,
+	BOOKE_INTERRUPT_INST_STORAGE,
+	BOOKE_INTERRUPT_ALIGNMENT,
+	BOOKE_INTERRUPT_PROGRAM,
+	BOOKE_INTERRUPT_FP_UNAVAIL,
+	BOOKE_INTERRUPT_SYSCALL,
+	BOOKE_INTERRUPT_AP_UNAVAIL,
+	BOOKE_INTERRUPT_DTLB_MISS,
+	BOOKE_INTERRUPT_ITLB_MISS,
+	BOOKE_INTERRUPT_MACHINE_CHECK,
+	BOOKE_INTERRUPT_DEBUG,
+	BOOKE_INTERRUPT_CRITICAL,
+	BOOKE_INTERRUPT_WATCHDOG,
+	BOOKE_INTERRUPT_EXTERNAL,
+	BOOKE_INTERRUPT_FIT,
+	BOOKE_INTERRUPT_DECREMENTER,
+};
+
+
+void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
+{
+	struct tlbe *tlbe;
+	int i;
+
+	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
+	printk("| %2s | %3s | %8s | %8s | %8s |\n",
+			"nr", "tid", "word0", "word1", "word2");
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		tlbe = &vcpu->arch.guest_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+
+	for (i = 0; i < PPC44x_TLB_SIZE; i++) {
+		tlbe = &vcpu->arch.shadow_tlb[i];
+		if (tlbe->word0 & PPC44x_TLB_VALID)
+			printk(" S%2d | %02X | %08X | %08X | %08X |\n",
+			       i, tlbe->tid, tlbe->word0, tlbe->word1,
+			       tlbe->word2);
+	}
+}
+
+/* TODO: use vcpu_printf() */
+void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
+{
+	int i;
+
+	printk("pc:   %08x msr:  %08x\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("lr:   %08x ctr:  %08x\n", vcpu->arch.lr, vcpu->arch.ctr);
+	printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1);
+
+	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
+
+	for (i = 0; i < 32; i += 4) {
+		printk("gpr%02d: %08x %08x %08x %08x\n", i,
+		       vcpu->arch.gpr[i],
+		       vcpu->arch.gpr[i+1],
+		       vcpu->arch.gpr[i+2],
+		       vcpu->arch.gpr[i+3]);
+	}
+}
+
+/* Check if we are ready to deliver the interrupt */
+static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+	int r;
+
+	switch (interrupt) {
+	case BOOKE_INTERRUPT_CRITICAL:
+		r = vcpu->arch.msr & MSR_CE;
+		break;
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		r = vcpu->arch.msr & MSR_ME;
+		break;
+	case BOOKE_INTERRUPT_EXTERNAL:
+		r = vcpu->arch.msr & MSR_EE;
+		break;
+	case BOOKE_INTERRUPT_DECREMENTER:
+		r = vcpu->arch.msr & MSR_EE;
+		break;
+	case BOOKE_INTERRUPT_FIT:
+		r = vcpu->arch.msr & MSR_EE;
+		break;
+	case BOOKE_INTERRUPT_WATCHDOG:
+		r = vcpu->arch.msr & MSR_CE;
+		break;
+	case BOOKE_INTERRUPT_DEBUG:
+		r = vcpu->arch.msr & MSR_DE;
+		break;
+	default:
+		r = 1;
+	}
+
+	return r;
+}
+
+static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt)
+{
+	switch (interrupt) {
+	case BOOKE_INTERRUPT_DECREMENTER:
+		vcpu->arch.tsr |= TSR_DIS;
+		break;
+	}
+
+	vcpu->arch.srr0 = vcpu->arch.pc;
+	vcpu->arch.srr1 = vcpu->arch.msr;
+	vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt];
+	kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]);
+}
+
+/* Check pending exceptions and deliver one, if possible. */
+void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu)
+{
+	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned int exception;
+	unsigned int priority;
+
+	priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending));
+	while (priority <= BOOKE_MAX_INTERRUPT) {
+		exception = priority_exception[priority];
+		if (kvmppc_can_deliver_interrupt(vcpu, exception)) {
+			kvmppc_clear_exception(vcpu, exception);
+			kvmppc_deliver_interrupt(vcpu, exception);
+			break;
+		}
+
+		priority = find_next_bit(pending,
+		                         BITS_PER_BYTE * sizeof(*pending),
+		                         priority + 1);
+	}
+}
+
+static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	enum emulation_result er;
+	int r;
+
+	er = kvmppc_emulate_instruction(run, vcpu);
+	switch (er) {
+	case EMULATE_DONE:
+		/* Future optimization: only reload non-volatiles if they were
+		 * actually modified. */
+		r = RESUME_GUEST_NV;
+		break;
+	case EMULATE_DO_MMIO:
+		run->exit_reason = KVM_EXIT_MMIO;
+		/* We must reload nonvolatiles because "update" load/store
+		 * instructions modify register state. */
+		/* Future optimization: only reload non-volatiles if they were
+		 * actually modified. */
+		r = RESUME_HOST_NV;
+		break;
+	case EMULATE_FAIL:
+		/* XXX Deliver Program interrupt to guest. */
+		printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
+		       vcpu->arch.last_inst);
+		r = RESUME_HOST;
+		break;
+	default:
+		BUG();
+	}
+
+	return r;
+}
+
+/**
+ * kvmppc_handle_exit
+ *
+ * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV)
+ */
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int exit_nr)
+{
+	enum emulation_result er;
+	int r = RESUME_HOST;
+
+	local_irq_enable();
+
+	run->exit_reason = KVM_EXIT_UNKNOWN;
+	run->ready_for_interrupt_injection = 1;
+
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_MACHINE_CHECK:
+		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
+		kvmppc_dump_vcpu(vcpu);
+		r = RESUME_HOST;
+		break;
+
+	case BOOKE_INTERRUPT_EXTERNAL:
+	case BOOKE_INTERRUPT_DECREMENTER:
+		/* Since we switched IVPR back to the host's value, the host
+		 * handled this interrupt the moment we enabled interrupts.
+		 * Now we just offer it a chance to reschedule the guest. */
+
+		/* XXX At this point the TLB still holds our shadow TLB, so if
+		 * we do reschedule the host will fault over it. Perhaps we
+		 * should politely restore the host's entries to minimize
+		 * misses before ceding control. */
+		if (need_resched())
+			cond_resched();
+		if (exit_nr == BOOKE_INTERRUPT_DECREMENTER)
+			vcpu->stat.dec_exits++;
+		else
+			vcpu->stat.ext_intr_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_PROGRAM:
+		if (vcpu->arch.msr & MSR_PR) {
+			/* Program traps generated by user-level software must be handled
+			 * by the guest kernel. */
+			vcpu->arch.esr = vcpu->arch.fault_esr;
+			kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+			r = RESUME_GUEST;
+			break;
+		}
+
+		er = kvmppc_emulate_instruction(run, vcpu);
+		switch (er) {
+		case EMULATE_DONE:
+			/* Future optimization: only reload non-volatiles if
+			 * they were actually modified by emulation. */
+			vcpu->stat.emulated_inst_exits++;
+			r = RESUME_GUEST_NV;
+			break;
+		case EMULATE_DO_DCR:
+			run->exit_reason = KVM_EXIT_DCR;
+			r = RESUME_HOST;
+			break;
+		case EMULATE_FAIL:
+			/* XXX Deliver Program interrupt to guest. */
+			printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n",
+			       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+			/* For debugging, encode the failing instruction and
+			 * report it to userspace. */
+			run->hw.hardware_exit_reason = ~0ULL << 32;
+			run->hw.hardware_exit_reason |= vcpu->arch.last_inst;
+			r = RESUME_HOST;
+			break;
+		default:
+			BUG();
+		}
+		break;
+
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+		vcpu->arch.dear = vcpu->arch.fault_dear;
+		vcpu->arch.esr = vcpu->arch.fault_esr;
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.dsi_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_INST_STORAGE:
+		vcpu->arch.esr = vcpu->arch.fault_esr;
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.isi_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_SYSCALL:
+		kvmppc_queue_exception(vcpu, exit_nr);
+		vcpu->stat.syscall_exits++;
+		r = RESUME_GUEST;
+		break;
+
+	case BOOKE_INTERRUPT_DTLB_MISS: {
+		struct tlbe *gtlbe;
+		unsigned long eaddr = vcpu->arch.fault_dear;
+		gfn_t gfn;
+
+		/* Check the guest TLB. */
+		gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr);
+		if (!gtlbe) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_queue_exception(vcpu, exit_nr);
+			vcpu->arch.dear = vcpu->arch.fault_dear;
+			vcpu->arch.esr = vcpu->arch.fault_esr;
+			vcpu->stat.dtlb_real_miss_exits++;
+			r = RESUME_GUEST;
+			break;
+		}
+
+		vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
+		gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't, and it is RAM. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+			               gtlbe->word2);
+			vcpu->stat.dtlb_virt_miss_exits++;
+			r = RESUME_GUEST;
+		} else {
+			/* Guest has mapped and accessed a page which is not
+			 * actually RAM. */
+			r = kvmppc_emulate_mmio(run, vcpu);
+		}
+
+		break;
+	}
+
+	case BOOKE_INTERRUPT_ITLB_MISS: {
+		struct tlbe *gtlbe;
+		unsigned long eaddr = vcpu->arch.pc;
+		gfn_t gfn;
+
+		r = RESUME_GUEST;
+
+		/* Check the guest TLB. */
+		gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr);
+		if (!gtlbe) {
+			/* The guest didn't have a mapping for it. */
+			kvmppc_queue_exception(vcpu, exit_nr);
+			vcpu->stat.itlb_real_miss_exits++;
+			break;
+		}
+
+		vcpu->stat.itlb_virt_miss_exits++;
+
+		gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT;
+
+		if (kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+			/* The guest TLB had a mapping, but the shadow TLB
+			 * didn't. This could be because:
+			 * a) the entry is mapping the host kernel, or
+			 * b) the guest used a large mapping which we're faking
+			 * Either way, we need to satisfy the fault without
+			 * invoking the guest. */
+			kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid,
+			               gtlbe->word2);
+		} else {
+			/* Guest mapped and leaped at non-RAM! */
+			kvmppc_queue_exception(vcpu,
+			                       BOOKE_INTERRUPT_MACHINE_CHECK);
+		}
+
+		break;
+	}
+
+	default:
+		printk(KERN_EMERG "exit_nr %d\n", exit_nr);
+		BUG();
+	}
+
+	local_irq_disable();
+
+	kvmppc_check_and_deliver_interrupts(vcpu);
+
+	/* Do some exit accounting. */
+	vcpu->stat.sum_exits++;
+	if (!(r & RESUME_HOST)) {
+		/* To avoid clobbering exit_reason, only check for signals if
+		 * we aren't already exiting to userspace for some other
+		 * reason. */
+		if (signal_pending(current)) {
+			run->exit_reason = KVM_EXIT_INTR;
+			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
+
+			vcpu->stat.signal_exits++;
+		} else {
+			vcpu->stat.light_exits++;
+		}
+	} else {
+		switch (run->exit_reason) {
+		case KVM_EXIT_MMIO:
+			vcpu->stat.mmio_exits++;
+			break;
+		case KVM_EXIT_DCR:
+			vcpu->stat.dcr_exits++;
+			break;
+		case KVM_EXIT_INTR:
+			vcpu->stat.signal_exits++;
+			break;
+		}
+	}
+
+	return r;
+}
+
+/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	struct tlbe *tlbe = &vcpu->arch.guest_tlb[0];
+
+	tlbe->tid = 0;
+	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
+	tlbe->word1 = 0;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
+
+	tlbe++;
+	tlbe->tid = 0;
+	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
+	tlbe->word1 = 0xef600000;
+	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
+	              | PPC44x_TLB_I | PPC44x_TLB_G;
+
+	vcpu->arch.pc = 0;
+	vcpu->arch.msr = 0;
+	vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
+
+	/* Eye-catching number so we know if the guest takes an interrupt
+	 * before it's programmed its own IVPR. */
+	vcpu->arch.ivpr = 0x55550000;
+
+	/* Since the guest can directly access the timebase, it must know the
+	 * real timebase frequency. Accordingly, it must see the state of
+	 * CCR1[TCS]. */
+	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	regs->pc = vcpu->arch.pc;
+	regs->cr = vcpu->arch.cr;
+	regs->ctr = vcpu->arch.ctr;
+	regs->lr = vcpu->arch.lr;
+	regs->xer = vcpu->arch.xer;
+	regs->msr = vcpu->arch.msr;
+	regs->srr0 = vcpu->arch.srr0;
+	regs->srr1 = vcpu->arch.srr1;
+	regs->pid = vcpu->arch.pid;
+	regs->sprg0 = vcpu->arch.sprg0;
+	regs->sprg1 = vcpu->arch.sprg1;
+	regs->sprg2 = vcpu->arch.sprg2;
+	regs->sprg3 = vcpu->arch.sprg3;
+	regs->sprg5 = vcpu->arch.sprg4;
+	regs->sprg6 = vcpu->arch.sprg5;
+	regs->sprg7 = vcpu->arch.sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+		regs->gpr[i] = vcpu->arch.gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	int i;
+
+	vcpu->arch.pc = regs->pc;
+	vcpu->arch.cr = regs->cr;
+	vcpu->arch.ctr = regs->ctr;
+	vcpu->arch.lr = regs->lr;
+	vcpu->arch.xer = regs->xer;
+	vcpu->arch.msr = regs->msr;
+	vcpu->arch.srr0 = regs->srr0;
+	vcpu->arch.srr1 = regs->srr1;
+	vcpu->arch.sprg0 = regs->sprg0;
+	vcpu->arch.sprg1 = regs->sprg1;
+	vcpu->arch.sprg2 = regs->sprg2;
+	vcpu->arch.sprg3 = regs->sprg3;
+	vcpu->arch.sprg5 = regs->sprg4;
+	vcpu->arch.sprg6 = regs->sprg5;
+	vcpu->arch.sprg7 = regs->sprg6;
+
+	for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+		vcpu->arch.gpr[i] = regs->gpr[i];
+
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	return -ENOTSUPP;
+}
+
+/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+	struct tlbe *gtlbe;
+	int index;
+	gva_t eaddr;
+	u8 pid;
+	u8 as;
+
+	eaddr = tr->linear_address;
+	pid = (tr->linear_address >> 32) & 0xff;
+	as = (tr->linear_address >> 40) & 0x1;
+
+	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
+	if (index == -1) {
+		tr->valid = 0;
+		return 0;
+	}
+
+	gtlbe = &vcpu->arch.guest_tlb[index];
+
+	tr->physical_address = tlb_xlate(gtlbe, eaddr);
+	/* XXX what does "writeable" and "usermode" even mean? */
+	tr->valid = 1;
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/booke_host.c b/arch/powerpc/kvm/booke_host.c
new file mode 100644
index 000000000000..b480341bc31e
--- /dev/null
+++ b/arch/powerpc/kvm/booke_host.c
@@ -0,0 +1,83 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <asm/cacheflush.h>
+#include <asm/kvm_ppc.h>
+
+unsigned long kvmppc_booke_handlers;
+
+static int kvmppc_booke_init(void)
+{
+	unsigned long ivor[16];
+	unsigned long max_ivor = 0;
+	int i;
+
+	/* We install our own exception handlers by hijacking IVPR. IVPR must
+	 * be 16-bit aligned, so we need a 64KB allocation. */
+	kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	                                         VCPU_SIZE_ORDER);
+	if (!kvmppc_booke_handlers)
+		return -ENOMEM;
+
+	/* XXX make sure our handlers are smaller than Linux's */
+
+	/* Copy our interrupt handlers to match host IVORs. That way we don't
+	 * have to swap the IVORs on every guest/host transition. */
+	ivor[0] = mfspr(SPRN_IVOR0);
+	ivor[1] = mfspr(SPRN_IVOR1);
+	ivor[2] = mfspr(SPRN_IVOR2);
+	ivor[3] = mfspr(SPRN_IVOR3);
+	ivor[4] = mfspr(SPRN_IVOR4);
+	ivor[5] = mfspr(SPRN_IVOR5);
+	ivor[6] = mfspr(SPRN_IVOR6);
+	ivor[7] = mfspr(SPRN_IVOR7);
+	ivor[8] = mfspr(SPRN_IVOR8);
+	ivor[9] = mfspr(SPRN_IVOR9);
+	ivor[10] = mfspr(SPRN_IVOR10);
+	ivor[11] = mfspr(SPRN_IVOR11);
+	ivor[12] = mfspr(SPRN_IVOR12);
+	ivor[13] = mfspr(SPRN_IVOR13);
+	ivor[14] = mfspr(SPRN_IVOR14);
+	ivor[15] = mfspr(SPRN_IVOR15);
+
+	for (i = 0; i < 16; i++) {
+		if (ivor[i] > max_ivor)
+			max_ivor = ivor[i];
+
+		memcpy((void *)kvmppc_booke_handlers + ivor[i],
+		       kvmppc_handlers_start + i * kvmppc_handler_len,
+		       kvmppc_handler_len);
+	}
+	flush_icache_range(kvmppc_booke_handlers,
+	                   kvmppc_booke_handlers + max_ivor + kvmppc_handler_len);
+
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvmppc_booke_exit(void)
+{
+	free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER);
+	kvm_exit();
+}
+
+module_init(kvmppc_booke_init)
+module_exit(kvmppc_booke_exit)
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
new file mode 100644
index 000000000000..3b653b5309b8
--- /dev/null
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -0,0 +1,436 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/mmu-44x.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS)
+
+#define VCPU_GPR(n)     (VCPU_GPRS + (n * 4))
+
+/* The host stack layout: */
+#define HOST_R1         0 /* Implied by stwu. */
+#define HOST_CALLEE_LR  4
+#define HOST_RUN        8
+/* r2 is special: it holds 'current', and it made nonvolatile in the
+ * kernel with the -ffixed-r2 gcc option. */
+#define HOST_R2         12
+#define HOST_NV_GPRS    16
+#define HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * 4))
+#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4)
+#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */
+#define HOST_STACK_LR   (HOST_STACK_SIZE + 4) /* In caller stack frame. */
+
+#define NEED_INST_MASK ((1<<BOOKE_INTERRUPT_PROGRAM) | \
+                        (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+#define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+                        (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+#define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \
+                       (1<<BOOKE_INTERRUPT_INST_STORAGE) | \
+                       (1<<BOOKE_INTERRUPT_PROGRAM) | \
+                       (1<<BOOKE_INTERRUPT_DTLB_MISS))
+
+.macro KVM_HANDLER ivor_nr
+_GLOBAL(kvmppc_handler_\ivor_nr)
+	/* Get pointer to vcpu and record exit number. */
+	mtspr	SPRN_SPRG0, r4
+	mfspr	r4, SPRN_SPRG1
+	stw	r5, VCPU_GPR(r5)(r4)
+	stw	r6, VCPU_GPR(r6)(r4)
+	mfctr	r5
+	lis	r6, kvmppc_resume_host@h
+	stw	r5, VCPU_CTR(r4)
+	li	r5, \ivor_nr
+	ori	r6, r6, kvmppc_resume_host@l
+	mtctr	r6
+	bctr
+.endm
+
+_GLOBAL(kvmppc_handlers_start)
+KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
+KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
+KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
+KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
+KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
+KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
+KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
+KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
+KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
+KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
+KVM_HANDLER BOOKE_INTERRUPT_FIT
+KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
+KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
+KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
+KVM_HANDLER BOOKE_INTERRUPT_DEBUG
+
+_GLOBAL(kvmppc_handler_len)
+	.long kvmppc_handler_1 - kvmppc_handler_0
+
+
+/* Registers:
+ *  SPRG0: guest r4
+ *  r4: vcpu pointer
+ *  r5: KVM exit number
+ */
+_GLOBAL(kvmppc_resume_host)
+	stw	r3, VCPU_GPR(r3)(r4)
+	mfcr	r3
+	stw	r3, VCPU_CR(r4)
+	stw	r7, VCPU_GPR(r7)(r4)
+	stw	r8, VCPU_GPR(r8)(r4)
+	stw	r9, VCPU_GPR(r9)(r4)
+
+	li	r6, 1
+	slw	r6, r6, r5
+
+	/* Save the faulting instruction and all GPRs for emulation. */
+	andi.	r7, r6, NEED_INST_MASK
+	beq	..skip_inst_copy
+	mfspr	r9, SPRN_SRR0
+	mfmsr	r8
+	ori	r7, r8, MSR_DS
+	mtmsr	r7
+	isync
+	lwz	r9, 0(r9)
+	mtmsr	r8
+	isync
+	stw	r9, VCPU_LAST_INST(r4)
+
+	stw	r15, VCPU_GPR(r15)(r4)
+	stw	r16, VCPU_GPR(r16)(r4)
+	stw	r17, VCPU_GPR(r17)(r4)
+	stw	r18, VCPU_GPR(r18)(r4)
+	stw	r19, VCPU_GPR(r19)(r4)
+	stw	r20, VCPU_GPR(r20)(r4)
+	stw	r21, VCPU_GPR(r21)(r4)
+	stw	r22, VCPU_GPR(r22)(r4)
+	stw	r23, VCPU_GPR(r23)(r4)
+	stw	r24, VCPU_GPR(r24)(r4)
+	stw	r25, VCPU_GPR(r25)(r4)
+	stw	r26, VCPU_GPR(r26)(r4)
+	stw	r27, VCPU_GPR(r27)(r4)
+	stw	r28, VCPU_GPR(r28)(r4)
+	stw	r29, VCPU_GPR(r29)(r4)
+	stw	r30, VCPU_GPR(r30)(r4)
+	stw	r31, VCPU_GPR(r31)(r4)
+..skip_inst_copy:
+
+	/* Also grab DEAR and ESR before the host can clobber them. */
+
+	andi.	r7, r6, NEED_DEAR_MASK
+	beq	..skip_dear
+	mfspr	r9, SPRN_DEAR
+	stw	r9, VCPU_FAULT_DEAR(r4)
+..skip_dear:
+
+	andi.	r7, r6, NEED_ESR_MASK
+	beq	..skip_esr
+	mfspr	r9, SPRN_ESR
+	stw	r9, VCPU_FAULT_ESR(r4)
+..skip_esr:
+
+	/* Save remaining volatile guest register state to vcpu. */
+	stw	r0, VCPU_GPR(r0)(r4)
+	stw	r1, VCPU_GPR(r1)(r4)
+	stw	r2, VCPU_GPR(r2)(r4)
+	stw	r10, VCPU_GPR(r10)(r4)
+	stw	r11, VCPU_GPR(r11)(r4)
+	stw	r12, VCPU_GPR(r12)(r4)
+	stw	r13, VCPU_GPR(r13)(r4)
+	stw	r14, VCPU_GPR(r14)(r4) /* We need a NV GPR below. */
+	mflr	r3
+	stw	r3, VCPU_LR(r4)
+	mfxer	r3
+	stw	r3, VCPU_XER(r4)
+	mfspr	r3, SPRN_SPRG0
+	stw	r3, VCPU_GPR(r4)(r4)
+	mfspr	r3, SPRN_SRR0
+	stw	r3, VCPU_PC(r4)
+
+	/* Restore host stack pointer and PID before IVPR, since the host
+	 * exception handlers use them. */
+	lwz	r1, VCPU_HOST_STACK(r4)
+	lwz	r3, VCPU_HOST_PID(r4)
+	mtspr	SPRN_PID, r3
+
+	/* Restore host IVPR before re-enabling interrupts. We cheat and know
+	 * that Linux IVPR is always 0xc0000000. */
+	lis	r3, 0xc000
+	mtspr	SPRN_IVPR, r3
+
+	/* Switch to kernel stack and jump to handler. */
+	LOAD_REG_ADDR(r3, kvmppc_handle_exit)
+	mtctr	r3
+	lwz	r3, HOST_RUN(r1)
+	lwz	r2, HOST_R2(r1)
+	mr	r14, r4 /* Save vcpu pointer. */
+
+	bctrl	/* kvmppc_handle_exit() */
+
+	/* Restore vcpu pointer and the nonvolatiles we used. */
+	mr	r4, r14
+	lwz	r14, VCPU_GPR(r14)(r4)
+
+	/* Sometimes instruction emulation must restore complete GPR state. */
+	andi.	r5, r3, RESUME_FLAG_NV
+	beq	..skip_nv_load
+	lwz	r15, VCPU_GPR(r15)(r4)
+	lwz	r16, VCPU_GPR(r16)(r4)
+	lwz	r17, VCPU_GPR(r17)(r4)
+	lwz	r18, VCPU_GPR(r18)(r4)
+	lwz	r19, VCPU_GPR(r19)(r4)
+	lwz	r20, VCPU_GPR(r20)(r4)
+	lwz	r21, VCPU_GPR(r21)(r4)
+	lwz	r22, VCPU_GPR(r22)(r4)
+	lwz	r23, VCPU_GPR(r23)(r4)
+	lwz	r24, VCPU_GPR(r24)(r4)
+	lwz	r25, VCPU_GPR(r25)(r4)
+	lwz	r26, VCPU_GPR(r26)(r4)
+	lwz	r27, VCPU_GPR(r27)(r4)
+	lwz	r28, VCPU_GPR(r28)(r4)
+	lwz	r29, VCPU_GPR(r29)(r4)
+	lwz	r30, VCPU_GPR(r30)(r4)
+	lwz	r31, VCPU_GPR(r31)(r4)
+..skip_nv_load:
+
+	/* Should we return to the guest? */
+	andi.	r5, r3, RESUME_FLAG_HOST
+	beq	lightweight_exit
+
+	srawi	r3, r3, 2 /* Shift -ERR back down. */
+
+heavyweight_exit:
+	/* Not returning to guest. */
+
+	/* We already saved guest volatile register state; now save the
+	 * non-volatiles. */
+	stw	r15, VCPU_GPR(r15)(r4)
+	stw	r16, VCPU_GPR(r16)(r4)
+	stw	r17, VCPU_GPR(r17)(r4)
+	stw	r18, VCPU_GPR(r18)(r4)
+	stw	r19, VCPU_GPR(r19)(r4)
+	stw	r20, VCPU_GPR(r20)(r4)
+	stw	r21, VCPU_GPR(r21)(r4)
+	stw	r22, VCPU_GPR(r22)(r4)
+	stw	r23, VCPU_GPR(r23)(r4)
+	stw	r24, VCPU_GPR(r24)(r4)
+	stw	r25, VCPU_GPR(r25)(r4)
+	stw	r26, VCPU_GPR(r26)(r4)
+	stw	r27, VCPU_GPR(r27)(r4)
+	stw	r28, VCPU_GPR(r28)(r4)
+	stw	r29, VCPU_GPR(r29)(r4)
+	stw	r30, VCPU_GPR(r30)(r4)
+	stw	r31, VCPU_GPR(r31)(r4)
+
+	/* Load host non-volatile register state from host stack. */
+	lwz	r14, HOST_NV_GPR(r14)(r1)
+	lwz	r15, HOST_NV_GPR(r15)(r1)
+	lwz	r16, HOST_NV_GPR(r16)(r1)
+	lwz	r17, HOST_NV_GPR(r17)(r1)
+	lwz	r18, HOST_NV_GPR(r18)(r1)
+	lwz	r19, HOST_NV_GPR(r19)(r1)
+	lwz	r20, HOST_NV_GPR(r20)(r1)
+	lwz	r21, HOST_NV_GPR(r21)(r1)
+	lwz	r22, HOST_NV_GPR(r22)(r1)
+	lwz	r23, HOST_NV_GPR(r23)(r1)
+	lwz	r24, HOST_NV_GPR(r24)(r1)
+	lwz	r25, HOST_NV_GPR(r25)(r1)
+	lwz	r26, HOST_NV_GPR(r26)(r1)
+	lwz	r27, HOST_NV_GPR(r27)(r1)
+	lwz	r28, HOST_NV_GPR(r28)(r1)
+	lwz	r29, HOST_NV_GPR(r29)(r1)
+	lwz	r30, HOST_NV_GPR(r30)(r1)
+	lwz	r31, HOST_NV_GPR(r31)(r1)
+
+	/* Return to kvm_vcpu_run(). */
+	lwz	r4, HOST_STACK_LR(r1)
+	addi	r1, r1, HOST_STACK_SIZE
+	mtlr	r4
+	/* r3 still contains the return code from kvmppc_handle_exit(). */
+	blr
+
+
+/* Registers:
+ *  r3: kvm_run pointer
+ *  r4: vcpu pointer
+ */
+_GLOBAL(__kvmppc_vcpu_run)
+	stwu	r1, -HOST_STACK_SIZE(r1)
+	stw	r1, VCPU_HOST_STACK(r4)	/* Save stack pointer to vcpu. */
+
+	/* Save host state to stack. */
+	stw	r3, HOST_RUN(r1)
+	mflr	r3
+	stw	r3, HOST_STACK_LR(r1)
+
+	/* Save host non-volatile register state to stack. */
+	stw	r14, HOST_NV_GPR(r14)(r1)
+	stw	r15, HOST_NV_GPR(r15)(r1)
+	stw	r16, HOST_NV_GPR(r16)(r1)
+	stw	r17, HOST_NV_GPR(r17)(r1)
+	stw	r18, HOST_NV_GPR(r18)(r1)
+	stw	r19, HOST_NV_GPR(r19)(r1)
+	stw	r20, HOST_NV_GPR(r20)(r1)
+	stw	r21, HOST_NV_GPR(r21)(r1)
+	stw	r22, HOST_NV_GPR(r22)(r1)
+	stw	r23, HOST_NV_GPR(r23)(r1)
+	stw	r24, HOST_NV_GPR(r24)(r1)
+	stw	r25, HOST_NV_GPR(r25)(r1)
+	stw	r26, HOST_NV_GPR(r26)(r1)
+	stw	r27, HOST_NV_GPR(r27)(r1)
+	stw	r28, HOST_NV_GPR(r28)(r1)
+	stw	r29, HOST_NV_GPR(r29)(r1)
+	stw	r30, HOST_NV_GPR(r30)(r1)
+	stw	r31, HOST_NV_GPR(r31)(r1)
+
+	/* Load guest non-volatiles. */
+	lwz	r14, VCPU_GPR(r14)(r4)
+	lwz	r15, VCPU_GPR(r15)(r4)
+	lwz	r16, VCPU_GPR(r16)(r4)
+	lwz	r17, VCPU_GPR(r17)(r4)
+	lwz	r18, VCPU_GPR(r18)(r4)
+	lwz	r19, VCPU_GPR(r19)(r4)
+	lwz	r20, VCPU_GPR(r20)(r4)
+	lwz	r21, VCPU_GPR(r21)(r4)
+	lwz	r22, VCPU_GPR(r22)(r4)
+	lwz	r23, VCPU_GPR(r23)(r4)
+	lwz	r24, VCPU_GPR(r24)(r4)
+	lwz	r25, VCPU_GPR(r25)(r4)
+	lwz	r26, VCPU_GPR(r26)(r4)
+	lwz	r27, VCPU_GPR(r27)(r4)
+	lwz	r28, VCPU_GPR(r28)(r4)
+	lwz	r29, VCPU_GPR(r29)(r4)
+	lwz	r30, VCPU_GPR(r30)(r4)
+	lwz	r31, VCPU_GPR(r31)(r4)
+
+lightweight_exit:
+	stw	r2, HOST_R2(r1)
+
+	mfspr	r3, SPRN_PID
+	stw	r3, VCPU_HOST_PID(r4)
+	lwz	r3, VCPU_PID(r4)
+	mtspr	SPRN_PID, r3
+
+	/* Prevent all TLB updates. */
+	mfmsr	r5
+	lis	r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
+	ori	r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
+	andc	r6, r5, r6
+	mtmsr	r6
+
+	/* Save the host's non-pinned TLB mappings, and load the guest mappings
+	 * over them. Leave the host's "pinned" kernel mappings in place. */
+	/* XXX optimization: use generation count to avoid swapping unmodified
+	 * entries. */
+	mfspr	r10, SPRN_MMUCR			/* Save host MMUCR. */
+	lis	r8, tlb_44x_hwater@ha
+	lwz	r8, tlb_44x_hwater@l(r8)
+	addi	r3, r4, VCPU_HOST_TLB - 4
+	addi	r9, r4, VCPU_SHADOW_TLB - 4
+	li	r6, 0
+1:
+	/* Save host entry. */
+	tlbre	r7, r6, PPC44x_TLB_PAGEID
+	mfspr	r5, SPRN_MMUCR
+	stwu	r5, 4(r3)
+	stwu	r7, 4(r3)
+	tlbre	r7, r6, PPC44x_TLB_XLAT
+	stwu	r7, 4(r3)
+	tlbre	r7, r6, PPC44x_TLB_ATTRIB
+	stwu	r7, 4(r3)
+	/* Load guest entry. */
+	lwzu	r7, 4(r9)
+	mtspr	SPRN_MMUCR, r7
+	lwzu	r7, 4(r9)
+	tlbwe	r7, r6, PPC44x_TLB_PAGEID
+	lwzu	r7, 4(r9)
+	tlbwe	r7, r6, PPC44x_TLB_XLAT
+	lwzu	r7, 4(r9)
+	tlbwe	r7, r6, PPC44x_TLB_ATTRIB
+	/* Increment index. */
+	addi	r6, r6, 1
+	cmpw	r6, r8
+	blt	1b
+	mtspr	SPRN_MMUCR, r10			/* Restore host MMUCR. */
+
+	iccci	0, 0 /* XXX hack */
+
+	/* Load some guest volatiles. */
+	lwz	r0, VCPU_GPR(r0)(r4)
+	lwz	r2, VCPU_GPR(r2)(r4)
+	lwz	r9, VCPU_GPR(r9)(r4)
+	lwz	r10, VCPU_GPR(r10)(r4)
+	lwz	r11, VCPU_GPR(r11)(r4)
+	lwz	r12, VCPU_GPR(r12)(r4)
+	lwz	r13, VCPU_GPR(r13)(r4)
+	lwz	r3, VCPU_LR(r4)
+	mtlr	r3
+	lwz	r3, VCPU_XER(r4)
+	mtxer	r3
+
+	/* Switch the IVPR. XXX If we take a TLB miss after this we're screwed,
+	 * so how do we make sure vcpu won't fault? */
+	lis	r8, kvmppc_booke_handlers@ha
+	lwz	r8, kvmppc_booke_handlers@l(r8)
+	mtspr	SPRN_IVPR, r8
+
+	/* Save vcpu pointer for the exception handlers. */
+	mtspr	SPRN_SPRG1, r4
+
+	/* Can't switch the stack pointer until after IVPR is switched,
+	 * because host interrupt handlers would get confused. */
+	lwz	r1, VCPU_GPR(r1)(r4)
+
+	/* XXX handle USPRG0 */
+	/* Host interrupt handlers may have clobbered these guest-readable
+	 * SPRGs, so we need to reload them here with the guest's values. */
+	lwz	r3, VCPU_SPRG4(r4)
+	mtspr	SPRN_SPRG4, r3
+	lwz	r3, VCPU_SPRG5(r4)
+	mtspr	SPRN_SPRG5, r3
+	lwz	r3, VCPU_SPRG6(r4)
+	mtspr	SPRN_SPRG6, r3
+	lwz	r3, VCPU_SPRG7(r4)
+	mtspr	SPRN_SPRG7, r3
+
+	/* Finish loading guest volatiles and jump to guest. */
+	lwz	r3, VCPU_CTR(r4)
+	mtctr	r3
+	lwz	r3, VCPU_CR(r4)
+	mtcr	r3
+	lwz	r5, VCPU_GPR(r5)(r4)
+	lwz	r6, VCPU_GPR(r6)(r4)
+	lwz	r7, VCPU_GPR(r7)(r4)
+	lwz	r8, VCPU_GPR(r8)(r4)
+	lwz	r3, VCPU_PC(r4)
+	mtsrr0	r3
+	lwz	r3, VCPU_MSR(r4)
+	oris	r3, r3, KVMPPC_MSR_MASK@h
+	ori	r3, r3, KVMPPC_MSR_MASK@l
+	mtsrr1	r3
+	lwz	r3, VCPU_GPR(r3)(r4)
+	lwz	r4, VCPU_GPR(r4)(r4)
+	rfi
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
new file mode 100644
index 000000000000..a03fe0c80698
--- /dev/null
+++ b/arch/powerpc/kvm/emulate.c
@@ -0,0 +1,760 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+
+#include <asm/dcr.h>
+#include <asm/dcr-regs.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+
+#include "44x_tlb.h"
+
+/* Instruction decoding */
+static inline unsigned int get_op(u32 inst)
+{
+	return inst >> 26;
+}
+
+static inline unsigned int get_xop(u32 inst)
+{
+	return (inst >> 1) & 0x3ff;
+}
+
+static inline unsigned int get_sprn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_dcrn(u32 inst)
+{
+	return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0);
+}
+
+static inline unsigned int get_rt(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_rs(u32 inst)
+{
+	return (inst >> 21) & 0x1f;
+}
+
+static inline unsigned int get_ra(u32 inst)
+{
+	return (inst >> 16) & 0x1f;
+}
+
+static inline unsigned int get_rb(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_rc(u32 inst)
+{
+	return inst & 0x1;
+}
+
+static inline unsigned int get_ws(u32 inst)
+{
+	return (inst >> 11) & 0x1f;
+}
+
+static inline unsigned int get_d(u32 inst)
+{
+	return inst & 0xffff;
+}
+
+static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
+                             const struct tlbe *tlbe)
+{
+	gpa_t gpa;
+
+	if (!get_tlb_v(tlbe))
+		return 0;
+
+	/* Does it match current guest AS? */
+	/* XXX what about IS != DS? */
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+		return 0;
+
+	gpa = get_tlb_raddr(tlbe);
+	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
+		/* Mapping is not for RAM. */
+		return 0;
+
+	return 1;
+}
+
+static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst)
+{
+	u64 eaddr;
+	u64 raddr;
+	u64 asid;
+	u32 flags;
+	struct tlbe *tlbe;
+	unsigned int ra;
+	unsigned int rs;
+	unsigned int ws;
+	unsigned int index;
+
+	ra = get_ra(inst);
+	rs = get_rs(inst);
+	ws = get_ws(inst);
+
+	index = vcpu->arch.gpr[ra];
+	if (index > PPC44x_TLB_SIZE) {
+		printk("%s: index %d\n", __func__, index);
+		kvmppc_dump_vcpu(vcpu);
+		return EMULATE_FAIL;
+	}
+
+	tlbe = &vcpu->arch.guest_tlb[index];
+
+	/* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */
+	if (tlbe->word0 & PPC44x_TLB_VALID) {
+		eaddr = get_tlb_eaddr(tlbe);
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		kvmppc_mmu_invalidate(vcpu, eaddr, asid);
+	}
+
+	switch (ws) {
+	case PPC44x_TLB_PAGEID:
+		tlbe->tid = vcpu->arch.mmucr & 0xff;
+		tlbe->word0 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_XLAT:
+		tlbe->word1 = vcpu->arch.gpr[rs];
+		break;
+
+	case PPC44x_TLB_ATTRIB:
+		tlbe->word2 = vcpu->arch.gpr[rs];
+		break;
+
+	default:
+		return EMULATE_FAIL;
+	}
+
+	if (tlbe_is_host_safe(vcpu, tlbe)) {
+		eaddr = get_tlb_eaddr(tlbe);
+		raddr = get_tlb_raddr(tlbe);
+		asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid;
+		flags = tlbe->word2 & 0xffff;
+
+		/* Create a 4KB mapping on the host. If the guest wanted a
+		 * large page, only the first 4KB is mapped here and the rest
+		 * are mapped on the fly. */
+		kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags);
+	}
+
+	return EMULATE_DONE;
+}
+
+static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.tcr & TCR_DIE) {
+		/* The decrementer ticks at the same rate as the timebase, so
+		 * that's how we convert the guest DEC value to the number of
+		 * host ticks. */
+		unsigned long nr_jiffies;
+
+		nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy;
+		mod_timer(&vcpu->arch.dec_timer,
+		          get_jiffies_64() + nr_jiffies);
+	} else {
+		del_timer(&vcpu->arch.dec_timer);
+	}
+}
+
+static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.pc = vcpu->arch.srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+}
+
+/* XXX to do:
+ * lhax
+ * lhaux
+ * lswx
+ * lswi
+ * stswx
+ * stswi
+ * lha
+ * lhau
+ * lmw
+ * stmw
+ *
+ * XXX is_bigendian should depend on MMU mapping or MSR[LE]
+ */
+int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	u32 inst = vcpu->arch.last_inst;
+	u32 ea;
+	int ra;
+	int rb;
+	int rc;
+	int rs;
+	int rt;
+	int sprn;
+	int dcrn;
+	enum emulation_result emulated = EMULATE_DONE;
+	int advance = 1;
+
+	switch (get_op(inst)) {
+	case 3:                                                 /* trap */
+		printk("trap!\n");
+		kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM);
+		advance = 0;
+		break;
+
+	case 19:
+		switch (get_xop(inst)) {
+		case 50:                                        /* rfi */
+			kvmppc_emul_rfi(vcpu);
+			advance = 0;
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 31:
+		switch (get_xop(inst)) {
+
+		case 83:                                        /* mfmsr */
+			rt = get_rt(inst);
+			vcpu->arch.gpr[rt] = vcpu->arch.msr;
+			break;
+
+		case 87:                                        /* lbzx */
+			rt = get_rt(inst);
+			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+			break;
+
+		case 131:                                       /* wrtee */
+			rs = get_rs(inst);
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+			                 | (vcpu->arch.gpr[rs] & MSR_EE);
+			break;
+
+		case 146:                                       /* mtmsr */
+			rs = get_rs(inst);
+			kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
+			break;
+
+		case 163:                                       /* wrteei */
+			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+			                 | (inst & MSR_EE);
+			break;
+
+		case 215:                                       /* stbx */
+			rs = get_rs(inst);
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               1, 1);
+			break;
+
+		case 247:                                       /* stbux */
+			rs = get_rs(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+
+			ea = vcpu->arch.gpr[rb];
+			if (ra)
+				ea += vcpu->arch.gpr[ra];
+
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               1, 1);
+			vcpu->arch.gpr[rs] = ea;
+			break;
+
+		case 279:                                       /* lhzx */
+			rt = get_rt(inst);
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+			break;
+
+		case 311:                                       /* lhzux */
+			rt = get_rt(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+
+			ea = vcpu->arch.gpr[rb];
+			if (ra)
+				ea += vcpu->arch.gpr[ra];
+
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+			vcpu->arch.gpr[ra] = ea;
+			break;
+
+		case 323:                                       /* mfdcr */
+			dcrn = get_dcrn(inst);
+			rt = get_rt(inst);
+
+			/* The guest may access CPR0 registers to determine the timebase
+			 * frequency, and it must know the real host frequency because it
+			 * can directly access the timebase registers.
+			 *
+			 * It would be possible to emulate those accesses in userspace,
+			 * but userspace can really only figure out the end frequency.
+			 * We could decompose that into the factors that compute it, but
+			 * that's tricky math, and it's easier to just report the real
+			 * CPR0 values.
+			 */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
+				break;
+			case DCRN_CPR0_CONFIG_DATA:
+				local_irq_disable();
+				mtdcr(DCRN_CPR0_CONFIG_ADDR,
+				      vcpu->arch.cpr0_cfgaddr);
+				vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
+				local_irq_enable();
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data =  0;
+				run->dcr.is_write = 0;
+				vcpu->arch.io_gpr = rt;
+				vcpu->arch.dcr_needed = 1;
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case 339:                                       /* mfspr */
+			sprn = get_sprn(inst);
+			rt = get_rt(inst);
+
+			switch (sprn) {
+			case SPRN_SRR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
+			case SPRN_SRR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
+			case SPRN_MMUCR:
+				vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
+			case SPRN_PID:
+				vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
+			case SPRN_IVPR:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
+			case SPRN_CCR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
+			case SPRN_CCR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
+			case SPRN_PVR:
+				vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
+			case SPRN_DEAR:
+				vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
+			case SPRN_ESR:
+				vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
+			case SPRN_DBCR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
+			case SPRN_DBCR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
+
+			/* Note: mftb and TBRL/TBWL are user-accessible, so
+			 * the guest can always access the real TB anyways.
+			 * In fact, we probably will never see these traps. */
+			case SPRN_TBWL:
+				vcpu->arch.gpr[rt] = mftbl(); break;
+			case SPRN_TBWU:
+				vcpu->arch.gpr[rt] = mftbu(); break;
+
+			case SPRN_SPRG0:
+				vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
+			case SPRN_SPRG1:
+				vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
+			case SPRN_SPRG2:
+				vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
+			case SPRN_SPRG3:
+				vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
+			/* Note: SPRG4-7 are user-readable, so we don't get
+			 * a trap. */
+
+			case SPRN_IVOR0:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break;
+			case SPRN_IVOR1:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break;
+			case SPRN_IVOR2:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break;
+			case SPRN_IVOR3:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break;
+			case SPRN_IVOR4:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break;
+			case SPRN_IVOR5:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break;
+			case SPRN_IVOR6:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break;
+			case SPRN_IVOR7:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break;
+			case SPRN_IVOR8:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break;
+			case SPRN_IVOR9:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break;
+			case SPRN_IVOR10:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break;
+			case SPRN_IVOR11:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break;
+			case SPRN_IVOR12:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break;
+			case SPRN_IVOR13:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break;
+			case SPRN_IVOR14:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break;
+			case SPRN_IVOR15:
+				vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break;
+
+			default:
+				printk("mfspr: unknown spr %x\n", sprn);
+				vcpu->arch.gpr[rt] = 0;
+				break;
+			}
+			break;
+
+		case 407:                                       /* sthx */
+			rs = get_rs(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               2, 1);
+			break;
+
+		case 439:                                       /* sthux */
+			rs = get_rs(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+
+			ea = vcpu->arch.gpr[rb];
+			if (ra)
+				ea += vcpu->arch.gpr[ra];
+
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               2, 1);
+			vcpu->arch.gpr[ra] = ea;
+			break;
+
+		case 451:                                       /* mtdcr */
+			dcrn = get_dcrn(inst);
+			rs = get_rs(inst);
+
+			/* emulate some access in kernel */
+			switch (dcrn) {
+			case DCRN_CPR0_CONFIG_ADDR:
+				vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
+				break;
+			default:
+				run->dcr.dcrn = dcrn;
+				run->dcr.data = vcpu->arch.gpr[rs];
+				run->dcr.is_write = 1;
+				vcpu->arch.dcr_needed = 1;
+				emulated = EMULATE_DO_DCR;
+			}
+
+			break;
+
+		case 467:                                       /* mtspr */
+			sprn = get_sprn(inst);
+			rs = get_rs(inst);
+			switch (sprn) {
+			case SPRN_SRR0:
+				vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SRR1:
+				vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
+			case SPRN_MMUCR:
+				vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
+			case SPRN_PID:
+				vcpu->arch.pid = vcpu->arch.gpr[rs]; break;
+			case SPRN_CCR0:
+				vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
+			case SPRN_CCR1:
+				vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
+			case SPRN_DEAR:
+				vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
+			case SPRN_ESR:
+				vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
+			case SPRN_DBCR0:
+				vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
+			case SPRN_DBCR1:
+				vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
+
+			/* XXX We need to context-switch the timebase for
+			 * watchdog and FIT. */
+			case SPRN_TBWL: break;
+			case SPRN_TBWU: break;
+
+			case SPRN_DEC:
+				vcpu->arch.dec = vcpu->arch.gpr[rs];
+				kvmppc_emulate_dec(vcpu);
+				break;
+
+			case SPRN_TSR:
+				vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
+
+			case SPRN_TCR:
+				vcpu->arch.tcr = vcpu->arch.gpr[rs];
+				kvmppc_emulate_dec(vcpu);
+				break;
+
+			case SPRN_SPRG0:
+				vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG1:
+				vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG2:
+				vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG3:
+				vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
+
+			/* Note: SPRG4-7 are user-readable. These values are
+			 * loaded into the real SPRGs when resuming the
+			 * guest. */
+			case SPRN_SPRG4:
+				vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG5:
+				vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG6:
+				vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
+			case SPRN_SPRG7:
+				vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
+
+			case SPRN_IVPR:
+				vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR0:
+				vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR1:
+				vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR2:
+				vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR3:
+				vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR4:
+				vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR5:
+				vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR6:
+				vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR7:
+				vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR8:
+				vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR9:
+				vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR10:
+				vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR11:
+				vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR12:
+				vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR13:
+				vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR14:
+				vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break;
+			case SPRN_IVOR15:
+				vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break;
+
+			default:
+				printk("mtspr: unknown spr %x\n", sprn);
+				emulated = EMULATE_FAIL;
+				break;
+			}
+			break;
+
+		case 470:                                       /* dcbi */
+			/* Do nothing. The guest is performing dcbi because
+			 * hardware DMA is not snooped by the dcache, but
+			 * emulated DMA either goes through the dcache as
+			 * normal writes, or the host kernel has handled dcache
+			 * coherence. */
+			break;
+
+		case 534:                                       /* lwbrx */
+			rt = get_rt(inst);
+			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
+			break;
+
+		case 566:                                       /* tlbsync */
+			break;
+
+		case 662:                                       /* stwbrx */
+			rs = get_rs(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               4, 0);
+			break;
+
+		case 978:                                       /* tlbwe */
+			emulated = kvmppc_emul_tlbwe(vcpu, inst);
+			break;
+
+		case 914:       {                               /* tlbsx */
+			int index;
+			unsigned int as = get_mmucr_sts(vcpu);
+			unsigned int pid = get_mmucr_stid(vcpu);
+
+			rt = get_rt(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+			rc = get_rc(inst);
+
+			ea = vcpu->arch.gpr[rb];
+			if (ra)
+				ea += vcpu->arch.gpr[ra];
+
+			index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
+			if (rc) {
+				if (index < 0)
+					vcpu->arch.cr &= ~0x20000000;
+				else
+					vcpu->arch.cr |= 0x20000000;
+			}
+			vcpu->arch.gpr[rt] = index;
+
+			}
+			break;
+
+		case 790:                                       /* lhbrx */
+			rt = get_rt(inst);
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+			break;
+
+		case 918:                                       /* sthbrx */
+			rs = get_rs(inst);
+			ra = get_ra(inst);
+			rb = get_rb(inst);
+
+			emulated = kvmppc_handle_store(run, vcpu,
+			                               vcpu->arch.gpr[rs],
+			                               2, 0);
+			break;
+
+		case 966:                                       /* iccci */
+			break;
+
+		default:
+			printk("unknown: op %d xop %d\n", get_op(inst),
+				get_xop(inst));
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case 32:                                                /* lwz */
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+		break;
+
+	case 33:                                                /* lwzu */
+		ra = get_ra(inst);
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		break;
+
+	case 34:                                                /* lbz */
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+		break;
+
+	case 35:                                                /* lbzu */
+		ra = get_ra(inst);
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		break;
+
+	case 36:                                                /* stw */
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		                               4, 1);
+		break;
+
+	case 37:                                                /* stwu */
+		ra = get_ra(inst);
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		                               4, 1);
+		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		break;
+
+	case 38:                                                /* stb */
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		                               1, 1);
+		break;
+
+	case 39:                                                /* stbu */
+		ra = get_ra(inst);
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		                               1, 1);
+		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		break;
+
+	case 40:                                                /* lhz */
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+		break;
+
+	case 41:                                                /* lhzu */
+		ra = get_ra(inst);
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		break;
+
+	case 44:                                                /* sth */
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		                               2, 1);
+		break;
+
+	case 45:                                                /* sthu */
+		ra = get_ra(inst);
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
+		                               2, 1);
+		vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
+		break;
+
+	default:
+		printk("unknown op %d\n", get_op(inst));
+		emulated = EMULATE_FAIL;
+		break;
+	}
+
+	if (advance)
+		vcpu->arch.pc += 4; /* Advance past emulated instruction. */
+
+	return emulated;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
new file mode 100644
index 000000000000..bad40bd2d3ac
--- /dev/null
+++ b/arch/powerpc/kvm/powerpc.c
@@ -0,0 +1,436 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ *          Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+#include <linux/fs.h>
+#include <asm/cputable.h>
+#include <asm/uaccess.h>
+#include <asm/kvm_ppc.h>
+
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
+{
+	/* XXX implement me */
+	return 0;
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+{
+	return 1;
+}
+
+
+int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+	enum emulation_result er;
+	int r;
+
+	er = kvmppc_emulate_instruction(run, vcpu);
+	switch (er) {
+	case EMULATE_DONE:
+		/* Future optimization: only reload non-volatiles if they were
+		 * actually modified. */
+		r = RESUME_GUEST_NV;
+		break;
+	case EMULATE_DO_MMIO:
+		run->exit_reason = KVM_EXIT_MMIO;
+		/* We must reload nonvolatiles because "update" load/store
+		 * instructions modify register state. */
+		/* Future optimization: only reload non-volatiles if they were
+		 * actually modified. */
+		r = RESUME_HOST_NV;
+		break;
+	case EMULATE_FAIL:
+		/* XXX Deliver Program interrupt to guest. */
+		printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
+		       vcpu->arch.last_inst);
+		r = RESUME_HOST;
+		break;
+	default:
+		BUG();
+	}
+
+	return r;
+}
+
+void kvm_arch_hardware_enable(void *garbage)
+{
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+	int r;
+
+	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+		r = 0;
+	else
+		r = -ENOTSUPP;
+
+	*(int *)rtn = r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+	struct kvm *kvm;
+
+	kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	if (!kvm)
+		return ERR_PTR(-ENOMEM);
+
+	return kvm;
+}
+
+static void kvmppc_free_vcpus(struct kvm *kvm)
+{
+	unsigned int i;
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (kvm->vcpus[i]) {
+			kvm_arch_vcpu_free(kvm->vcpus[i]);
+			kvm->vcpus[i] = NULL;
+		}
+	}
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	kvmppc_free_vcpus(kvm);
+	kvm_free_physmem(kvm);
+	kfree(kvm);
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	int r;
+
+	switch (ext) {
+	case KVM_CAP_USER_MEMORY:
+		r = 1;
+		break;
+	default:
+		r = 0;
+		break;
+	}
+	return r;
+
+}
+
+long kvm_arch_dev_ioctl(struct file *filp,
+                        unsigned int ioctl, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_set_memory_region(struct kvm *kvm,
+                               struct kvm_userspace_memory_region *mem,
+                               struct kvm_memory_slot old,
+                               int user_alloc)
+{
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+	struct kvm_vcpu *vcpu;
+	int err;
+
+	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
+	if (!vcpu) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = kvm_vcpu_init(vcpu, kvm, id);
+	if (err)
+		goto free_vcpu;
+
+	return vcpu;
+
+free_vcpu:
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+out:
+	return ERR_PTR(err);
+}
+
+void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
+{
+	kvm_vcpu_uninit(vcpu);
+	kmem_cache_free(kvm_vcpu_cache, vcpu);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	kvm_arch_vcpu_free(vcpu);
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER];
+
+	return test_bit(priority, &vcpu->arch.pending_exceptions);
+}
+
+static void kvmppc_decrementer_func(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
+}
+
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func,
+	            (unsigned long)vcpu);
+
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+                                    struct kvm_debug_guest *dbg)
+{
+	return -ENOTSUPP;
+}
+
+static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
+                                     struct kvm_run *run)
+{
+	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+	*gpr = run->dcr.data;
+}
+
+static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
+                                      struct kvm_run *run)
+{
+	u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
+
+	if (run->mmio.len > sizeof(*gpr)) {
+		printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
+		return;
+	}
+
+	if (vcpu->arch.mmio_is_bigendian) {
+		switch (run->mmio.len) {
+		case 4: *gpr = *(u32 *)run->mmio.data; break;
+		case 2: *gpr = *(u16 *)run->mmio.data; break;
+		case 1: *gpr = *(u8 *)run->mmio.data; break;
+		}
+	} else {
+		/* Convert BE data from userland back to LE. */
+		switch (run->mmio.len) {
+		case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
+		case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
+		case 1: *gpr = *(u8 *)run->mmio.data; break;
+		}
+	}
+}
+
+int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int rt, unsigned int bytes, int is_bigendian)
+{
+	if (bytes > sizeof(run->mmio.data)) {
+		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
+		       run->mmio.len);
+	}
+
+	run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+	run->mmio.len = bytes;
+	run->mmio.is_write = 0;
+
+	vcpu->arch.io_gpr = rt;
+	vcpu->arch.mmio_is_bigendian = is_bigendian;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_is_write = 0;
+
+	return EMULATE_DO_MMIO;
+}
+
+int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                        u32 val, unsigned int bytes, int is_bigendian)
+{
+	void *data = run->mmio.data;
+
+	if (bytes > sizeof(run->mmio.data)) {
+		printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__,
+		       run->mmio.len);
+	}
+
+	run->mmio.phys_addr = vcpu->arch.paddr_accessed;
+	run->mmio.len = bytes;
+	run->mmio.is_write = 1;
+	vcpu->mmio_needed = 1;
+	vcpu->mmio_is_write = 1;
+
+	/* Store the value at the lowest bytes in 'data'. */
+	if (is_bigendian) {
+		switch (bytes) {
+		case 4: *(u32 *)data = val; break;
+		case 2: *(u16 *)data = val; break;
+		case 1: *(u8  *)data = val; break;
+		}
+	} else {
+		/* Store LE value into 'data'. */
+		switch (bytes) {
+		case 4: st_le32(data, val); break;
+		case 2: st_le16(data, val); break;
+		case 1: *(u8 *)data = val; break;
+		}
+	}
+
+	return EMULATE_DO_MMIO;
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+{
+	int r;
+	sigset_t sigsaved;
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	if (vcpu->mmio_needed) {
+		if (!vcpu->mmio_is_write)
+			kvmppc_complete_mmio_load(vcpu, run);
+		vcpu->mmio_needed = 0;
+	} else if (vcpu->arch.dcr_needed) {
+		if (!vcpu->arch.dcr_is_write)
+			kvmppc_complete_dcr_load(vcpu, run);
+		vcpu->arch.dcr_needed = 0;
+	}
+
+	kvmppc_check_and_deliver_interrupts(vcpu);
+
+	local_irq_disable();
+	kvm_guest_enter();
+	r = __kvmppc_vcpu_run(run, vcpu);
+	kvm_guest_exit();
+	local_irq_enable();
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	return r;
+}
+
+int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+{
+	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+                                    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+                                    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+                         unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	long r;
+
+	switch (ioctl) {
+	case KVM_INTERRUPT: {
+		struct kvm_interrupt irq;
+		r = -EFAULT;
+		if (copy_from_user(&irq, argp, sizeof(irq)))
+			goto out;
+		r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
+		break;
+	}
+	default:
+		r = -EINVAL;
+	}
+
+out:
+	return r;
+}
+
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
+{
+	return -ENOTSUPP;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+                       unsigned int ioctl, unsigned long arg)
+{
+	long r;
+
+	switch (ioctl) {
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index ada249bf9779..ce10e2b1b902 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -202,7 +202,7 @@ adjust_total_lowmem(void)
 		cam_max_size = max_lowmem_size;
 
 	/* adjust lowmem size to max_lowmem_size */
-	ram = min(max_lowmem_size, total_lowmem);
+	ram = min(max_lowmem_size, (phys_addr_t)total_lowmem);
 
 	/* Calculate CAM values */
 	__cam0 = 1UL << 2 * (__ilog2(ram) / 2);
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S
index e10d76a860d3..ddeaf9e38ad5 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/hash_low_32.S
@@ -191,7 +191,7 @@ _GLOBAL(add_hash_page)
 	add	r3,r3,r0		/* note create_hpte trims to 24 bits */
 
 #ifdef CONFIG_SMP
-	rlwinm	r8,r1,0,0,18		/* use cpu number to make tag */
+	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT) /* use cpu number to make tag */
 	lwz	r8,TI_CPU(r8)		/* to go in mmu_hash_lock */
 	oris	r8,r8,12
 #endif /* CONFIG_SMP */
@@ -526,7 +526,7 @@ _GLOBAL(flush_hash_pages)
 #ifdef CONFIG_SMP
 	addis	r9,r7,mmu_hash_lock@ha
 	addi	r9,r9,mmu_hash_lock@l
-	rlwinm	r8,r1,0,0,18
+	rlwinm	r8,r1,0,0,(31-THREAD_SHIFT)
 	add	r8,r8,r7
 	lwz	r8,TI_CPU(r8)
 	oris	r8,r8,9
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 47325f23c51f..1952b4d3fa7f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -59,7 +59,10 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long total_memory;
 unsigned long total_lowmem;
 
-phys_addr_t memstart_addr;
+phys_addr_t memstart_addr = (phys_addr_t)~0ull;
+EXPORT_SYMBOL(memstart_addr);
+phys_addr_t kernstart_addr;
+EXPORT_SYMBOL(kernstart_addr);
 phys_addr_t lowmem_end_addr;
 
 int boot_mapsize;
@@ -68,14 +71,6 @@ unsigned long agp_special_page;
 EXPORT_SYMBOL(agp_special_page);
 #endif
 
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-EXPORT_SYMBOL(kmap_prot);
-EXPORT_SYMBOL(kmap_pte);
-#endif
-
 void MMU_init(void);
 
 /* XXX should be in current.h  -- paulus */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 698bd000f98b..c5ac532a0161 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -72,7 +72,8 @@
 #warning TASK_SIZE is smaller than it needs to be.
 #endif
 
-phys_addr_t memstart_addr;
+phys_addr_t memstart_addr = ~0;
+phys_addr_t kernstart_addr;
 
 void free_initmem(void)
 {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 16def4dcff6d..d9e37f365b54 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -45,6 +45,7 @@
 #include <asm/tlb.h>
 #include <asm/sections.h>
 #include <asm/vdso.h>
+#include <asm/fixmap.h>
 
 #include "mmu_decl.h"
 
@@ -57,6 +58,20 @@ int init_bootmem_done;
 int mem_init_done;
 unsigned long memory_limit;
 
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
+EXPORT_SYMBOL(kmap_prot);
+EXPORT_SYMBOL(kmap_pte);
+
+static inline pte_t *virt_to_kpte(unsigned long vaddr)
+{
+	return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
+			vaddr), vaddr), vaddr);
+}
+#endif
+
 int page_is_ram(unsigned long pfn)
 {
 	unsigned long paddr = (pfn << PAGE_SHIFT);
@@ -95,15 +110,6 @@ EXPORT_SYMBOL(phys_mem_access_prot);
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
 #ifdef CONFIG_NUMA
 int memory_add_physaddr_to_nid(u64 start)
 {
@@ -216,7 +222,7 @@ void __init do_init_bootmem(void)
 	unsigned long total_pages;
 	int boot_mapsize;
 
-	max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+	max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
 	total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT;
 #ifdef CONFIG_HIGHMEM
 	total_pages = total_lowmem >> PAGE_SHIFT;
@@ -232,7 +238,8 @@ void __init do_init_bootmem(void)
 
 	start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE);
 
-	boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+	min_low_pfn = MEMORY_START >> PAGE_SHIFT;
+	boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
 
 	/* Add active regions with valid PFNs */
 	for (i = 0; i < lmb.memory.cnt; i++) {
@@ -310,14 +317,19 @@ void __init paging_init(void)
 	unsigned long top_of_ram = lmb_end_of_DRAM();
 	unsigned long max_zone_pfns[MAX_NR_ZONES];
 
+#ifdef CONFIG_PPC32
+	unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
+	unsigned long end = __fix_to_virt(FIX_HOLE);
+
+	for (; v < end; v += PAGE_SIZE)
+		map_page(v, 0, 0); /* XXX gross */
+#endif
+
 #ifdef CONFIG_HIGHMEM
 	map_page(PKMAP_BASE, 0, 0);	/* XXX gross */
-	pkmap_page_table = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k
-			(PKMAP_BASE), PKMAP_BASE), PKMAP_BASE), PKMAP_BASE);
-	map_page(KMAP_FIX_BEGIN, 0, 0);	/* XXX gross */
-	kmap_pte = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k
-			(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN),
-			 KMAP_FIX_BEGIN);
+	pkmap_page_table = virt_to_kpte(PKMAP_BASE);
+
+	kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
 	kmap_prot = PAGE_KERNEL;
 #endif /* CONFIG_HIGHMEM */
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 1efd631211ef..dc704da363eb 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -18,6 +18,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/lmb.h>
+#include <linux/of.h>
 #include <asm/sparsemem.h>
 #include <asm/prom.h>
 #include <asm/system.h>
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 64c44bcc68de..80d1babb230d 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -29,6 +29,7 @@
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
+#include <asm/fixmap.h>
 #include <asm/io.h>
 
 #include "mmu_decl.h"
@@ -387,3 +388,25 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
+
+static int fixmaps;
+unsigned long FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(FIXADDR_TOP);
+
+void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+{
+	unsigned long address = __fix_to_virt(idx);
+
+	if (idx >= __end_of_fixed_addresses) {
+		BUG();
+		return;
+	}
+
+	map_page(address, phys, flags);
+	fixmaps++;
+}
+
+void __this_fixmap_does_not_exist(void)
+{
+	WARN_ON(1);
+}
diff --git a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
index 18b8ebe930d5..5e1e8cf14e75 100644
--- a/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
+++ b/arch/powerpc/platforms/86xx/mpc8610_hpcd.c
@@ -3,11 +3,12 @@
  *
  * Initial author: Xianghua Xiao <x.xiao@freescale.com>
  * Recode: Jason Jin <jason.jin@freescale.com>
+ *         York Sun <yorksun@freescale.com>
  *
  * Rewrite the interrupt routing. remove the 8259PIC support,
  * All the integrated device in ULI use sideband interrupt.
  *
- * Copyright 2007 Freescale Semiconductor Inc.
+ * Copyright 2008 Freescale Semiconductor Inc.
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -38,6 +39,8 @@
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
 
+static unsigned char *pixis_bdcfg0, *pixis_arch;
+
 static struct of_device_id __initdata mpc8610_ids[] = {
 	{ .compatible = "fsl,mpc8610-immr", },
 	{}
@@ -52,8 +55,7 @@ static int __init mpc8610_declare_of_platform_devices(void)
 }
 machine_device_initcall(mpc86xx_hpcd, mpc8610_declare_of_platform_devices);
 
-static void __init
-mpc86xx_hpcd_init_irq(void)
+static void __init mpc86xx_hpcd_init_irq(void)
 {
 	struct mpic *mpic1;
 	struct device_node *np;
@@ -161,12 +163,159 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5288, final_uli5288);
 #endif /* CONFIG_PCI */
 
-static void __init
-mpc86xx_hpcd_setup_arch(void)
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+
+static u32 get_busfreq(void)
 {
-#ifdef CONFIG_PCI
-	struct device_node *np;
+	struct device_node *node;
+
+	u32 fs_busfreq = 0;
+	node = of_find_node_by_type(NULL, "cpu");
+	if (node) {
+		unsigned int size;
+		const unsigned int *prop =
+			of_get_property(node, "bus-frequency", &size);
+		if (prop)
+			fs_busfreq = *prop;
+		of_node_put(node);
+	};
+	return fs_busfreq;
+}
+
+unsigned int mpc8610hpcd_get_pixel_format(unsigned int bits_per_pixel,
+						int monitor_port)
+{
+	static const unsigned long pixelformat[][3] = {
+		{0x88882317, 0x88083218, 0x65052119},
+		{0x88883316, 0x88082219, 0x65053118},
+	};
+	unsigned int pix_fmt, arch_monitor;
+
+	arch_monitor = ((*pixis_arch == 0x01) && (monitor_port == 0))? 0 : 1;
+		/* DVI port for board version 0x01 */
+
+	if (bits_per_pixel == 32)
+		pix_fmt = pixelformat[arch_monitor][0];
+	else if (bits_per_pixel == 24)
+		pix_fmt = pixelformat[arch_monitor][1];
+	else if (bits_per_pixel == 16)
+		pix_fmt = pixelformat[arch_monitor][2];
+	else
+		pix_fmt = pixelformat[1][0];
+
+	return pix_fmt;
+}
+
+void mpc8610hpcd_set_gamma_table(int monitor_port, char *gamma_table_base)
+{
+	int i;
+	if (monitor_port == 2) {		/* dual link LVDS */
+		for (i = 0; i < 256*3; i++)
+			gamma_table_base[i] = (gamma_table_base[i] << 2) |
+					 ((gamma_table_base[i] >> 6) & 0x03);
+	}
+}
+
+void mpc8610hpcd_set_monitor_port(int monitor_port)
+{
+	static const u8 bdcfg[] = {0xBD, 0xB5, 0xA5};
+	if (monitor_port < 3)
+		*pixis_bdcfg0 = bdcfg[monitor_port];
+}
+
+void mpc8610hpcd_set_pixel_clock(unsigned int pixclock)
+{
+	u32 __iomem *clkdvdr;
+	u32 temp;
+	/* variables for pixel clock calcs */
+	ulong  bestval, bestfreq, speed_ccb, minpixclock, maxpixclock;
+	ulong pixval;
+	long err;
+	int i;
+
+	clkdvdr = ioremap(get_immrbase() + 0xe0800, sizeof(u32));
+	if (!clkdvdr) {
+		printk(KERN_ERR "Err: can't map clock divider register!\n");
+		return;
+	}
+
+	/* Pixel Clock configuration */
+	pr_debug("DIU: Bus Frequency = %d\n", get_busfreq());
+	speed_ccb = get_busfreq();
+
+	/* Calculate the pixel clock with the smallest error */
+	/* calculate the following in steps to avoid overflow */
+	pr_debug("DIU pixclock in ps - %d\n", pixclock);
+	temp = 1000000000/pixclock;
+	temp *= 1000;
+	pixclock = temp;
+	pr_debug("DIU pixclock freq - %u\n", pixclock);
+
+	temp = pixclock * 5 / 100;
+	pr_debug("deviation = %d\n", temp);
+	minpixclock = pixclock - temp;
+	maxpixclock = pixclock + temp;
+	pr_debug("DIU minpixclock - %lu\n", minpixclock);
+	pr_debug("DIU maxpixclock - %lu\n", maxpixclock);
+	pixval = speed_ccb/pixclock;
+	pr_debug("DIU pixval = %lu\n", pixval);
+
+	err = 100000000;
+	bestval = pixval;
+	pr_debug("DIU bestval = %lu\n", bestval);
+
+	bestfreq = 0;
+	for (i = -1; i <= 1; i++) {
+		temp = speed_ccb / ((pixval+i) + 1);
+		pr_debug("DIU test pixval i= %d, pixval=%lu, temp freq. = %u\n",
+							i, pixval, temp);
+		if ((temp < minpixclock) || (temp > maxpixclock))
+			pr_debug("DIU exceeds monitor range (%lu to %lu)\n",
+				minpixclock, maxpixclock);
+		else if (abs(temp - pixclock) < err) {
+		  pr_debug("Entered the else if block %d\n", i);
+			err = abs(temp - pixclock);
+			bestval = pixval+i;
+			bestfreq = temp;
+		}
+	}
+
+	pr_debug("DIU chose = %lx\n", bestval);
+	pr_debug("DIU error = %ld\n NomPixClk ", err);
+	pr_debug("DIU: Best Freq = %lx\n", bestfreq);
+	/* Modify PXCLK in GUTS CLKDVDR */
+	pr_debug("DIU: Current value of CLKDVDR = 0x%08x\n", (*clkdvdr));
+	temp = (*clkdvdr) & 0x2000FFFF;
+	*clkdvdr = temp;		/* turn off clock */
+	*clkdvdr = temp | 0x80000000 | (((bestval) & 0x1F) << 16);
+	pr_debug("DIU: Modified value of CLKDVDR = 0x%08x\n", (*clkdvdr));
+	iounmap(clkdvdr);
+}
+
+ssize_t mpc8610hpcd_show_monitor_port(int monitor_port, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE,
+			"%c0 - DVI\n"
+			"%c1 - Single link LVDS\n"
+			"%c2 - Dual link LVDS\n",
+			monitor_port == 0 ? '*' : ' ',
+			monitor_port == 1 ? '*' : ' ',
+			monitor_port == 2 ? '*' : ' ');
+}
+
+int mpc8610hpcd_set_sysfs_monitor_port(int val)
+{
+	return val < 3 ? val : 0;
+}
+
 #endif
+
+static void __init mpc86xx_hpcd_setup_arch(void)
+{
+	struct resource r;
+	struct device_node *np;
+	unsigned char *pixis;
+
 	if (ppc_md.progress)
 		ppc_md.progress("mpc86xx_hpcd_setup_arch()", 0);
 
@@ -183,6 +332,30 @@ mpc86xx_hpcd_setup_arch(void)
 		}
         }
 #endif
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+	preallocate_diu_videomemory();
+	diu_ops.get_pixel_format	= mpc8610hpcd_get_pixel_format;
+	diu_ops.set_gamma_table		= mpc8610hpcd_set_gamma_table;
+	diu_ops.set_monitor_port	= mpc8610hpcd_set_monitor_port;
+	diu_ops.set_pixel_clock		= mpc8610hpcd_set_pixel_clock;
+	diu_ops.show_monitor_port	= mpc8610hpcd_show_monitor_port;
+	diu_ops.set_sysfs_monitor_port	= mpc8610hpcd_set_sysfs_monitor_port;
+#endif
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,fpga-pixis");
+	if (np) {
+		of_address_to_resource(np, 0, &r);
+		of_node_put(np);
+		pixis = ioremap(r.start, 32);
+		if (!pixis) {
+			printk(KERN_ERR "Err: can't map FPGA cfg register!\n");
+			return;
+		}
+		pixis_bdcfg0 = pixis + 8;
+		pixis_arch = pixis + 1;
+	} else
+		printk(KERN_ERR "Err: "
+				"can't find device node 'fsl,fpga-pixis'\n");
 
 	printk("MPC86xx HPCD board from Freescale Semiconductor\n");
 }
@@ -200,8 +373,7 @@ static int __init mpc86xx_hpcd_probe(void)
 	return 0;
 }
 
-static long __init
-mpc86xx_time_init(void)
+static long __init mpc86xx_time_init(void)
 {
 	unsigned int temp;
 
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f38c50b4ce56..87454c526973 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -45,7 +45,6 @@ source "arch/powerpc/platforms/powermac/Kconfig"
 source "arch/powerpc/platforms/prep/Kconfig"
 source "arch/powerpc/platforms/maple/Kconfig"
 source "arch/powerpc/platforms/pasemi/Kconfig"
-source "arch/powerpc/platforms/celleb/Kconfig"
 source "arch/powerpc/platforms/ps3/Kconfig"
 source "arch/powerpc/platforms/cell/Kconfig"
 source "arch/powerpc/platforms/8xx/Kconfig"
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 5fc7fac10e93..f7efaa925a13 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -220,8 +220,8 @@ config SMP
 	  If you don't know what to do here, say N.
 
 config NR_CPUS
-	int "Maximum number of CPUs (2-128)"
-	range 2 128
+	int "Maximum number of CPUs (2-1024)"
+	range 2 1024
 	depends on SMP
 	default "32" if PPC64
 	default "4"
diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile
index a984894466d9..423a0234dc31 100644
--- a/arch/powerpc/platforms/Makefile
+++ b/arch/powerpc/platforms/Makefile
@@ -24,5 +24,4 @@ obj-$(CONFIG_PPC_MAPLE)		+= maple/
 obj-$(CONFIG_PPC_PASEMI)	+= pasemi/
 obj-$(CONFIG_PPC_CELL)		+= cell/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
-obj-$(CONFIG_PPC_CELLEB)	+= celleb/
 obj-$(CONFIG_EMBEDDED6xx)	+= embedded6xx/
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 2f169991896d..3959fcfe731c 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -25,6 +25,19 @@ config PPC_IBM_CELL_BLADE
 	select PPC_UDBG_16550
 	select UDBG_RTAS_CONSOLE
 
+config PPC_CELLEB
+	bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
+	depends on PPC_MULTIPLATFORM && PPC64
+	select PPC_CELL
+	select PPC_CELL_NATIVE
+	select PPC_RTAS
+	select PPC_INDIRECT_IO
+	select PPC_OF_PLATFORM_PCI
+	select HAS_TXX9_SERIAL
+	select PPC_UDBG_BEAT
+	select USB_OHCI_BIG_ENDIAN_MMIO
+	select USB_EHCI_BIG_ENDIAN_MMIO
+
 menu "Cell Broadband Engine options"
 	depends on PPC_CELL
 
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index c89964c6fb1f..c2a7e4e5ddf9 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_PPC_CELL_NATIVE)		+= interrupt.o iommu.o setup.o \
 					   cbe_regs.o spider-pic.o \
-					   pervasive.o pmu.o io-workarounds.o
+					   pervasive.o pmu.o io-workarounds.o \
+					   spider-pci.o
 obj-$(CONFIG_CBE_RAS)			+= ras.o
 
 obj-$(CONFIG_CBE_THERM)			+= cbe_thermal.o
@@ -26,3 +27,20 @@ obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
 					   spufs/
 
 obj-$(CONFIG_PCI_MSI)			+= axon_msi.o
+
+
+# celleb stuff
+ifeq ($(CONFIG_PPC_CELLEB),y)
+obj-y					+= celleb_setup.o \
+					   celleb_pci.o celleb_scc_epci.o \
+					   celleb_scc_pciex.o \
+					   celleb_scc_uhc.o \
+					   io-workarounds.o spider-pci.o \
+					   beat.o beat_htab.o beat_hvCall.o \
+					   beat_interrupt.o beat_iommu.o
+
+obj-$(CONFIG_SMP)			+= beat_smp.o
+obj-$(CONFIG_PPC_UDBG_BEAT)		+= beat_udbg.o
+obj-$(CONFIG_SERIAL_TXX9)		+= celleb_scc_sio.o
+obj-$(CONFIG_SPU_BASE)			+= beat_spu_priv1.o
+endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index d95e71dee91f..c39f5c225f2e 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -123,7 +123,7 @@ static struct axon_msic *find_msi_translator(struct pci_dev *dev)
 		return NULL;
 	}
 
-	for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
+	for (; dn; dn = of_get_next_parent(dn)) {
 		ph = of_get_property(dn, "msi-translator", NULL);
 		if (ph)
 			break;
@@ -169,7 +169,7 @@ static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type)
 
 static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
 {
-	struct device_node *dn, *tmp;
+	struct device_node *dn;
 	struct msi_desc *entry;
 	int len;
 	const u32 *prop;
@@ -182,7 +182,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg)
 
 	entry = list_first_entry(&dev->msi_list, struct msi_desc, list);
 
-	for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) {
+	for (; dn; dn = of_get_next_parent(dn)) {
 		if (entry->msi_attrib.is_64) {
 			prop = of_get_property(dn, "msi-address-64", &len);
 			if (prop)
diff --git a/arch/powerpc/platforms/celleb/beat.c b/arch/powerpc/platforms/cell/beat.c
index b64b171f245b..48c690ea65da 100644
--- a/arch/powerpc/platforms/celleb/beat.c
+++ b/arch/powerpc/platforms/cell/beat.c
@@ -33,7 +33,7 @@
 
 #include "beat_wrapper.h"
 #include "beat.h"
-#include "interrupt.h"
+#include "beat_interrupt.h"
 
 static int beat_pm_poweroff_flag;
 
diff --git a/arch/powerpc/platforms/celleb/beat.h b/arch/powerpc/platforms/cell/beat.h
index 32c8efcedc80..32c8efcedc80 100644
--- a/arch/powerpc/platforms/celleb/beat.h
+++ b/arch/powerpc/platforms/cell/beat.h
diff --git a/arch/powerpc/platforms/celleb/htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 81467ff055c8..81467ff055c8 100644
--- a/arch/powerpc/platforms/celleb/htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
diff --git a/arch/powerpc/platforms/celleb/hvCall.S b/arch/powerpc/platforms/cell/beat_hvCall.S
index 74c817448948..74c817448948 100644
--- a/arch/powerpc/platforms/celleb/hvCall.S
+++ b/arch/powerpc/platforms/cell/beat_hvCall.S
diff --git a/arch/powerpc/platforms/celleb/interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c
index 69562a867876..192a93509372 100644
--- a/arch/powerpc/platforms/celleb/interrupt.c
+++ b/arch/powerpc/platforms/cell/beat_interrupt.c
@@ -26,7 +26,7 @@
 
 #include <asm/machdep.h>
 
-#include "interrupt.h"
+#include "beat_interrupt.h"
 #include "beat_wrapper.h"
 
 #define	MAX_IRQS	NR_IRQS
diff --git a/arch/powerpc/platforms/celleb/interrupt.h b/arch/powerpc/platforms/cell/beat_interrupt.h
index b470fd0051f1..b470fd0051f1 100644
--- a/arch/powerpc/platforms/celleb/interrupt.h
+++ b/arch/powerpc/platforms/cell/beat_interrupt.h
diff --git a/arch/powerpc/platforms/celleb/iommu.c b/arch/powerpc/platforms/cell/beat_iommu.c
index 93b0efddd658..93b0efddd658 100644
--- a/arch/powerpc/platforms/celleb/iommu.c
+++ b/arch/powerpc/platforms/cell/beat_iommu.c
diff --git a/arch/powerpc/platforms/celleb/smp.c b/arch/powerpc/platforms/cell/beat_smp.c
index a7631250aeb4..26efc204c47f 100644
--- a/arch/powerpc/platforms/celleb/smp.c
+++ b/arch/powerpc/platforms/cell/beat_smp.c
@@ -37,7 +37,7 @@
 #include <asm/machdep.h>
 #include <asm/udbg.h>
 
-#include "interrupt.h"
+#include "beat_interrupt.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
diff --git a/arch/powerpc/platforms/celleb/spu_priv1.c b/arch/powerpc/platforms/cell/beat_spu_priv1.c
index bcc17f7fe8ad..bcc17f7fe8ad 100644
--- a/arch/powerpc/platforms/celleb/spu_priv1.c
+++ b/arch/powerpc/platforms/cell/beat_spu_priv1.c
diff --git a/arch/powerpc/platforms/celleb/beat_syscall.h b/arch/powerpc/platforms/cell/beat_syscall.h
index 8580dc7e1798..8580dc7e1798 100644
--- a/arch/powerpc/platforms/celleb/beat_syscall.h
+++ b/arch/powerpc/platforms/cell/beat_syscall.h
diff --git a/arch/powerpc/platforms/celleb/udbg_beat.c b/arch/powerpc/platforms/cell/beat_udbg.c
index 6b418f6b6175..6b418f6b6175 100644
--- a/arch/powerpc/platforms/celleb/udbg_beat.c
+++ b/arch/powerpc/platforms/cell/beat_udbg.c
diff --git a/arch/powerpc/platforms/celleb/beat_wrapper.h b/arch/powerpc/platforms/cell/beat_wrapper.h
index b47dfda48d06..b47dfda48d06 100644
--- a/arch/powerpc/platforms/celleb/beat_wrapper.h
+++ b/arch/powerpc/platforms/cell/beat_wrapper.h
diff --git a/arch/powerpc/platforms/celleb/pci.c b/arch/powerpc/platforms/cell/celleb_pci.c
index 51b390d34e4d..f39a3b2a1667 100644
--- a/arch/powerpc/platforms/celleb/pci.c
+++ b/arch/powerpc/platforms/cell/celleb_pci.c
@@ -37,12 +37,11 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
-#include <asm/machdep.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
-#include "pci.h"
-#include "interrupt.h"
+#include "io-workarounds.h"
+#include "celleb_pci.h"
 
 #define MAX_PCI_DEVICES    32
 #define MAX_PCI_FUNCTIONS   8
@@ -190,7 +189,7 @@ static int celleb_fake_pci_read_config(struct pci_bus *bus,
 
 
 static int celleb_fake_pci_write_config(struct pci_bus *bus,
-		 unsigned int devfn, int where, int size, u32 val)
+		unsigned int devfn, int where, int size, u32 val)
 {
 	char *config;
 	struct device_node *node;
@@ -457,33 +456,42 @@ static int __init celleb_setup_fake_pci(struct device_node *dev,
 	return 0;
 }
 
-void __init fake_pci_workaround_init(struct pci_controller *phb)
-{
-	/**
-	 *  We will add fake pci bus to scc_pci_bus for the purpose to improve
-	 *  I/O Macro performance. But device-tree and device drivers
-	 *  are not ready to use address with a token.
-	 */
-
-	/* celleb_pci_add_one(phb, NULL); */
-}
+static struct celleb_phb_spec celleb_fake_pci_spec __initdata = {
+	.setup = celleb_setup_fake_pci,
+};
 
 static struct of_device_id celleb_phb_match[] __initdata = {
 	{
 		.name = "pci-pseudo",
-		.data = celleb_setup_fake_pci,
+		.data = &celleb_fake_pci_spec,
 	}, {
 		.name = "epci",
-		.data = celleb_setup_epci,
+		.data = &celleb_epci_spec,
+	}, {
+		.name = "pcie",
+		.data = &celleb_pciex_spec,
 	}, {
 	},
 };
 
+static int __init celleb_io_workaround_init(struct pci_controller *phb,
+					    struct celleb_phb_spec *phb_spec)
+{
+	if (phb_spec->ops) {
+		iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init,
+				  phb_spec->iowa_data);
+		io_workaround_init();
+	}
+
+	return 0;
+}
+
 int __init celleb_setup_phb(struct pci_controller *phb)
 {
 	struct device_node *dev = phb->dn;
 	const struct of_device_id *match;
-	int (*setup_func)(struct device_node *, struct pci_controller *);
+	struct celleb_phb_spec *phb_spec;
+	int rc;
 
 	match = of_match_node(celleb_phb_match, dev);
 	if (!match)
@@ -492,8 +500,12 @@ int __init celleb_setup_phb(struct pci_controller *phb)
 	phb_set_bus_ranges(dev, phb);
 	phb->buid = 1;
 
-	setup_func = match->data;
-	return (*setup_func)(dev, phb);
+	phb_spec = match->data;
+	rc = (*phb_spec->setup)(dev, phb);
+	if (rc)
+		return 1;
+
+	return celleb_io_workaround_init(phb, phb_spec);
 }
 
 int celleb_pci_probe_mode(struct pci_bus *bus)
diff --git a/arch/powerpc/platforms/celleb/pci.h b/arch/powerpc/platforms/cell/celleb_pci.h
index 5d5544ffeddb..4cba1523ec50 100644
--- a/arch/powerpc/platforms/celleb/pci.h
+++ b/arch/powerpc/platforms/cell/celleb_pci.h
@@ -27,16 +27,19 @@
 #include <asm/prom.h>
 #include <asm/ppc-pci.h>
 
+#include "io-workarounds.h"
+
+struct celleb_phb_spec {
+	int (*setup)(struct device_node *, struct pci_controller *);
+	struct ppc_pci_io *ops;
+	int (*iowa_init)(struct iowa_bus *, void *);
+	void *iowa_data;
+};
+
 extern int celleb_setup_phb(struct pci_controller *);
 extern int celleb_pci_probe_mode(struct pci_bus *);
 
-extern int celleb_setup_epci(struct device_node *, struct pci_controller *);
-
-extern void *celleb_dummy_page_va;
-extern int __init celleb_pci_workaround_init(void);
-extern void __init celleb_pci_add_one(struct pci_controller *,
-				      void (*)(struct pci_controller *));
-extern void fake_pci_workaround_init(struct pci_controller *);
-extern void epci_workaround_init(struct pci_controller *);
+extern struct celleb_phb_spec celleb_epci_spec;
+extern struct celleb_phb_spec celleb_pciex_spec;
 
 #endif /* _CELLEB_PCI_H */
diff --git a/arch/powerpc/platforms/celleb/scc.h b/arch/powerpc/platforms/cell/celleb_scc.h
index 6be1542a6e66..b596a711c348 100644
--- a/arch/powerpc/platforms/celleb/scc.h
+++ b/arch/powerpc/platforms/cell/celleb_scc.h
@@ -125,6 +125,93 @@
 /* bits for SCC_EPCI_CNTOPT */
 #define SCC_EPCI_CNTOPT_O2PMB   0x00000002
 
+/* SCC PCIEXC SMMIO registers */
+#define PEXCADRS		0x000
+#define PEXCWDATA		0x004
+#define PEXCRDATA		0x008
+#define PEXDADRS		0x010
+#define PEXDCMND		0x014
+#define PEXDWDATA		0x018
+#define PEXDRDATA		0x01c
+#define PEXREQID		0x020
+#define PEXTIDMAP		0x024
+#define PEXINTMASK		0x028
+#define PEXINTSTS		0x02c
+#define PEXAERRMASK		0x030
+#define PEXAERRSTS		0x034
+#define PEXPRERRMASK		0x040
+#define PEXPRERRSTS		0x044
+#define PEXPRERRID01		0x048
+#define PEXPRERRID23		0x04c
+#define PEXVDMASK		0x050
+#define PEXVDSTS		0x054
+#define PEXRCVCPLIDA		0x060
+#define PEXLENERRIDA		0x068
+#define PEXPHYPLLST		0x070
+#define PEXDMRDEN0		0x100
+#define PEXDMRDADR0		0x104
+#define PEXDMRDENX		0x110
+#define PEXDMRDADRX		0x114
+#define PEXECMODE		0xf00
+#define PEXMAEA(n)		(0xf50 + (8 * n))
+#define PEXMAEC(n)		(0xf54 + (8 * n))
+#define PEXCCRCTRL		0xff0
+
+/* SCC PCIEXC bits and shifts for PEXCADRS */
+#define PEXCADRS_BYTE_EN_SHIFT		20
+#define PEXCADRS_CMD_SHIFT		16
+#define PEXCADRS_CMD_READ		(0xa << PEXCADRS_CMD_SHIFT)
+#define PEXCADRS_CMD_WRITE		(0xb << PEXCADRS_CMD_SHIFT)
+
+/* SCC PCIEXC shifts for PEXDADRS */
+#define PEXDADRS_BUSNO_SHIFT		20
+#define PEXDADRS_DEVNO_SHIFT		15
+#define PEXDADRS_FUNCNO_SHIFT		12
+
+/* SCC PCIEXC bits and shifts for PEXDCMND */
+#define PEXDCMND_BYTE_EN_SHIFT		4
+#define PEXDCMND_IO_READ		0x2
+#define PEXDCMND_IO_WRITE		0x3
+#define PEXDCMND_CONFIG_READ		0xa
+#define PEXDCMND_CONFIG_WRITE		0xb
+
+/* SCC PCIEXC bits for PEXPHYPLLST */
+#define PEXPHYPLLST_PEXPHYAPLLST	0x00000001
+
+/* SCC PCIEXC bits for PEXECMODE */
+#define PEXECMODE_ALL_THROUGH		0x00000000
+#define PEXECMODE_ALL_8BIT		0x00550155
+#define PEXECMODE_ALL_16BIT		0x00aa02aa
+
+/* SCC PCIEXC bits for PEXCCRCTRL */
+#define PEXCCRCTRL_PEXIPCOREEN		0x00040000
+#define PEXCCRCTRL_PEXIPCONTEN		0x00020000
+#define PEXCCRCTRL_PEXPHYPLLEN		0x00010000
+#define PEXCCRCTRL_PCIEXCAOCKEN		0x00000100
+
+/* SCC PCIEXC port configuration registers */
+#define PEXTCERRCHK		0x21c
+#define PEXTAMAPB0		0x220
+#define PEXTAMAPL0		0x224
+#define PEXTAMAPB(n)		(PEXTAMAPB0 + 8 * (n))
+#define PEXTAMAPL(n)		(PEXTAMAPL0 + 8 * (n))
+#define PEXCHVC0P		0x500
+#define PEXCHVC0NP		0x504
+#define PEXCHVC0C		0x508
+#define PEXCDVC0P		0x50c
+#define PEXCDVC0NP		0x510
+#define PEXCDVC0C		0x514
+#define PEXCHVCXP		0x518
+#define PEXCHVCXNP		0x51c
+#define PEXCHVCXC		0x520
+#define PEXCDVCXP		0x524
+#define PEXCDVCXNP		0x528
+#define PEXCDVCXC		0x52c
+#define PEXCTTRG		0x530
+#define PEXTSCTRL		0x700
+#define PEXTSSTS		0x704
+#define PEXSKPCTRL		0x708
+
 /* UHC registers */
 #define SCC_UHC_CKRCTRL         0xff0
 #define SCC_UHC_ECMODE          0xf00
diff --git a/arch/powerpc/platforms/celleb/scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c
index a999b393f6f6..08c285b10e30 100644
--- a/arch/powerpc/platforms/celleb/scc_epci.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c
@@ -30,23 +30,17 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/prom.h>
-#include <asm/machdep.h>
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
-#include "scc.h"
-#include "pci.h"
-#include "interrupt.h"
+#include "celleb_scc.h"
+#include "celleb_pci.h"
 
 #define MAX_PCI_DEVICES   32
 #define MAX_PCI_FUNCTIONS  8
 
 #define iob()  __asm__ __volatile__("eieio; sync":::"memory")
 
-struct epci_private {
-	dma_addr_t	dummy_page_da;
-};
-
 static inline PCI_IO_ADDR celleb_epci_get_epci_base(
 					struct pci_controller *hose)
 {
@@ -71,42 +65,6 @@ static inline PCI_IO_ADDR celleb_epci_get_epci_cfg(
 	return hose->cfg_data;
 }
 
-static void scc_epci_dummy_read(struct pci_controller *hose)
-{
-	PCI_IO_ADDR epci_base;
-	u32 val;
-
-	epci_base = celleb_epci_get_epci_base(hose);
-
-	val = in_be32(epci_base + SCC_EPCI_WATRP);
-	iosync();
-
-	return;
-}
-
-void __init epci_workaround_init(struct pci_controller *hose)
-{
-	PCI_IO_ADDR epci_base;
-	PCI_IO_ADDR reg;
-	struct epci_private *private = hose->private_data;
-
-	BUG_ON(!private);
-
-	private->dummy_page_da = dma_map_single(hose->parent,
-		celleb_dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE);
-	if (private->dummy_page_da == DMA_ERROR_CODE) {
-		printk(KERN_ERR "EPCI: dummy read disabled. "
-		       "Map dummy page failed.\n");
-		return;
-	}
-
-	celleb_pci_add_one(hose, scc_epci_dummy_read);
-	epci_base = celleb_epci_get_epci_base(hose);
-
-	reg = epci_base + SCC_EPCI_DUMYRADR;
-	out_be32(reg, private->dummy_page_da);
-}
-
 static inline void clear_and_disable_master_abort_interrupt(
 					struct pci_controller *hose)
 {
@@ -151,10 +109,8 @@ static int celleb_epci_check_abort(struct pci_controller *hose,
 	return PCIBIOS_SUCCESSFUL;
 }
 
-static PCI_IO_ADDR celleb_epci_make_config_addr(
-					struct pci_bus *bus,
-					struct pci_controller *hose,
-					unsigned int devfn, int where)
+static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus,
+		struct pci_controller *hose, unsigned int devfn, int where)
 {
 	PCI_IO_ADDR addr;
 
@@ -425,8 +381,8 @@ static int __init celleb_epci_init(struct pci_controller *hose)
 	return 0;
 }
 
-int __init celleb_setup_epci(struct device_node *node,
-				struct pci_controller *hose)
+static int __init celleb_setup_epci(struct device_node *node,
+				    struct pci_controller *hose)
 {
 	struct resource r;
 
@@ -450,8 +406,7 @@ int __init celleb_setup_epci(struct device_node *node,
 	if (!hose->cfg_addr)
 		goto error;
 	pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n",
-		 r.start, (unsigned long)hose->cfg_addr,
-		(r.end - r.start + 1));
+		 r.start, (unsigned long)hose->cfg_addr, (r.end - r.start + 1));
 
 	if (of_address_to_resource(node, 2, &r))
 		goto error;
@@ -459,14 +414,7 @@ int __init celleb_setup_epci(struct device_node *node,
 	if (!hose->cfg_data)
 		goto error;
 	pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n",
-		 r.start, (unsigned long)hose->cfg_data,
-		(r.end - r.start + 1));
-
-	hose->private_data = kzalloc(sizeof(struct epci_private), GFP_KERNEL);
-	if (hose->private_data == NULL) {
-		printk(KERN_ERR "EPCI: no memory for private data.\n");
-		goto error;
-	}
+		 r.start, (unsigned long)hose->cfg_data, (r.end - r.start + 1));
 
 	hose->ops = &celleb_epci_ops;
 	celleb_epci_init(hose);
@@ -474,8 +422,6 @@ int __init celleb_setup_epci(struct device_node *node,
 	return 0;
 
 error:
-	kfree(hose->private_data);
-
 	if (hose->cfg_addr)
 		iounmap(hose->cfg_addr);
 
@@ -483,3 +429,10 @@ error:
 		iounmap(hose->cfg_data);
 	return 1;
 }
+
+struct celleb_phb_spec celleb_epci_spec __initdata = {
+	.setup = celleb_setup_epci,
+	.ops = &spiderpci_ops,
+	.iowa_init = &spiderpci_iowa_init,
+	.iowa_data = (void *)0,
+};
diff --git a/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
new file mode 100644
index 000000000000..31da84c458d2
--- /dev/null
+++ b/arch/powerpc/platforms/cell/celleb_scc_pciex.c
@@ -0,0 +1,547 @@
+/*
+ * Support for Celleb PCI-Express.
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/iommu.h>
+#include <asm/byteorder.h>
+
+#include "celleb_scc.h"
+#include "celleb_pci.h"
+
+#define PEX_IN(base, off)	in_be32((void __iomem *)(base) + (off))
+#define PEX_OUT(base, off, data) out_be32((void __iomem *)(base) + (off), (data))
+
+static void scc_pciex_io_flush(struct iowa_bus *bus)
+{
+	(void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0);
+}
+
+/*
+ * Memory space access to device on PCIEX
+ */
+#define PCIEX_MMIO_READ(name, ret)					\
+static ret scc_pciex_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define PCIEX_MMIO_READ_STR(name)					\
+static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf,		\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	scc_pciex_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+PCIEX_MMIO_READ(readb, u8)
+PCIEX_MMIO_READ(readw, u16)
+PCIEX_MMIO_READ(readl, u32)
+PCIEX_MMIO_READ(readq, u64)
+PCIEX_MMIO_READ(readw_be, u16)
+PCIEX_MMIO_READ(readl_be, u32)
+PCIEX_MMIO_READ(readq_be, u64)
+PCIEX_MMIO_READ_STR(readsb)
+PCIEX_MMIO_READ_STR(readsw)
+PCIEX_MMIO_READ_STR(readsl)
+
+static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	scc_pciex_io_flush(iowa_mem_find_bus(src));
+}
+
+/*
+ * I/O port access to devices on PCIEX.
+ */
+
+static inline unsigned long get_bus_address(struct pci_controller *phb,
+					    unsigned long port)
+{
+	return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE);
+}
+
+static u32 scc_pciex_read_port(struct pci_controller *phb,
+			       unsigned long port, int size)
+{
+	unsigned int byte_enable;
+	unsigned int cmd, shift;
+	unsigned long addr;
+	u32 data, ret;
+
+	BUG_ON(((port & 0x3ul) + size) > 4);
+
+	addr = get_bus_address(phb, port);
+	shift = addr & 0x3ul;
+	byte_enable = ((1 << size) - 1) << shift;
+	cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+	data = PEX_IN(phb->cfg_addr, PEXDRDATA);
+	ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8));
+
+	pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x,"
+		 " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable,
+		 cmd, data, ret);
+
+	return ret;
+}
+
+static void scc_pciex_write_port(struct pci_controller *phb,
+				 unsigned long port, int size, u32 val)
+{
+	unsigned int byte_enable;
+	unsigned int cmd, shift;
+	unsigned long addr;
+	u32 data;
+
+	BUG_ON(((port & 0x3ul) + size) > 4);
+
+	addr = get_bus_address(phb, port);
+	shift = addr & 0x3ul;
+	byte_enable = ((1 << size) - 1) << shift;
+	cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT);
+	data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8);
+	PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul));
+	PEX_OUT(phb->cfg_addr, PEXDCMND, cmd);
+	PEX_OUT(phb->cfg_addr, PEXDWDATA, data);
+
+	pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x,"
+		 " be=%x, cmd=%x, data=%x\n", port, addr, size, val,
+		 byte_enable, cmd, data);
+}
+
+static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port)
+{
+	return (u8)scc_pciex_read_port(phb, port, 1);
+}
+
+static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port)
+{
+	u32 data;
+	if ((port & 0x3ul) < 3)
+		data = scc_pciex_read_port(phb, port, 2);
+	else {
+		u32 d1 = scc_pciex_read_port(phb, port, 1);
+		u32 d2 = scc_pciex_read_port(phb, port + 1, 1);
+		data = d1 | (d2 << 8);
+	}
+	return (u16)data;
+}
+
+static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port)
+{
+	unsigned int mod = port & 0x3ul;
+	u32 data;
+	if (mod == 0)
+		data = scc_pciex_read_port(phb, port, 4);
+	else {
+		u32 d1 = scc_pciex_read_port(phb, port, 4 - mod);
+		u32 d2 = scc_pciex_read_port(phb, port + 1, mod);
+		data = d1 | (d2 << (mod * 8));
+	}
+	return data;
+}
+
+static void __scc_pciex_outb(struct pci_controller *phb,
+			     u8 val, unsigned long port)
+{
+	scc_pciex_write_port(phb, port, 1, (u32)val);
+}
+
+static void __scc_pciex_outw(struct pci_controller *phb,
+			     u16 val, unsigned long port)
+{
+	if ((port & 0x3ul) < 3)
+		scc_pciex_write_port(phb, port, 2, (u32)val);
+	else {
+		u32 d1 = val & 0x000000FF;
+		u32 d2 = (val & 0x0000FF00) >> 8;
+		scc_pciex_write_port(phb, port, 1, d1);
+		scc_pciex_write_port(phb, port + 1, 1, d2);
+	}
+}
+
+static void __scc_pciex_outl(struct pci_controller *phb,
+			     u32 val, unsigned long port)
+{
+	unsigned int mod = port & 0x3ul;
+	if (mod == 0)
+		scc_pciex_write_port(phb, port, 4, val);
+	else {
+		u32 d1 = val & (0xFFFFFFFFul >> (mod * 8));
+		u32 d2 = val >> ((4 - mod) * 8);
+		scc_pciex_write_port(phb, port, 4 - mod, d1);
+		scc_pciex_write_port(phb, port + 1, mod, d2);
+	}
+}
+
+#define PCIEX_PIO_FUNC(size, name)					\
+static u##size scc_pciex_in##name(unsigned long port)			\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
+	u##size data = __scc_pciex_in##name(bus->phb, port);		\
+	scc_pciex_io_flush(bus);					\
+	return data;							\
+}									\
+static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
+	u##size *dst = b;						\
+	for (; c != 0; c--, dst++)					\
+		*dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \
+	scc_pciex_io_flush(bus);					\
+}									\
+static void scc_pciex_out##name(u##size val, unsigned long port)	\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(port);			\
+	__scc_pciex_out##name(bus->phb, val, port);			\
+}									\
+static void scc_pciex_outs##name(unsigned long p, const void *b,	\
+				 unsigned long c)			\
+{									\
+	struct iowa_bus *bus = iowa_pio_find_bus(p);			\
+	const u##size *src = b;						\
+	for (; c != 0; c--, src++)					\
+		__scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \
+}
+#define cpu_to_le8(x) (x)
+#define le8_to_cpu(x) (x)
+PCIEX_PIO_FUNC(8, b)
+PCIEX_PIO_FUNC(16, w)
+PCIEX_PIO_FUNC(32, l)
+
+static struct ppc_pci_io scc_pciex_ops = {
+	.readb = scc_pciex_readb,
+	.readw = scc_pciex_readw,
+	.readl = scc_pciex_readl,
+	.readq = scc_pciex_readq,
+	.readw_be = scc_pciex_readw_be,
+	.readl_be = scc_pciex_readl_be,
+	.readq_be = scc_pciex_readq_be,
+	.readsb = scc_pciex_readsb,
+	.readsw = scc_pciex_readsw,
+	.readsl = scc_pciex_readsl,
+	.memcpy_fromio = scc_pciex_memcpy_fromio,
+	.inb = scc_pciex_inb,
+	.inw = scc_pciex_inw,
+	.inl = scc_pciex_inl,
+	.outb = scc_pciex_outb,
+	.outw = scc_pciex_outw,
+	.outl = scc_pciex_outl,
+	.insb = scc_pciex_insb,
+	.insw = scc_pciex_insw,
+	.insl = scc_pciex_insl,
+	.outsb = scc_pciex_outsb,
+	.outsw = scc_pciex_outsw,
+	.outsl = scc_pciex_outsl,
+};
+
+static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data)
+{
+	dma_addr_t dummy_page_da;
+	void *dummy_page_va;
+
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("PCIEX:Alloc dummy_page_va failed\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dummy_page_da)) {
+		pr_err("PCIEX:Map dummy page failed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da);
+
+	return 0;
+}
+
+/*
+ * config space access
+ */
+#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \
+	((uint32_t)(((addr) & ~0x3UL) | \
+	((bus_no) << PEXDADRS_BUSNO_SHIFT) | \
+	((dev_no)  << PEXDADRS_DEVNO_SHIFT) | \
+	((func_no) << PEXDADRS_FUNCNO_SHIFT)))
+
+#define MK_PEXDCMND_BYTE_EN(addr, size) \
+	((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT)
+#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size))
+
+static uint32_t config_read_pciex_dev(unsigned int __iomem *base,
+		uint64_t bus_no, uint64_t dev_no, uint64_t func_no,
+		uint64_t off, uint64_t size)
+{
+	uint32_t ret;
+	uint32_t addr, cmd;
+
+	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size);
+	PEX_OUT(base, PEXDADRS, addr);
+	PEX_OUT(base, PEXDCMND, cmd);
+	ret = (PEX_IN(base, PEXDRDATA)
+		>> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1);
+	return ret;
+}
+
+static void config_write_pciex_dev(unsigned int __iomem *base, uint64_t bus_no,
+	uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size,
+	uint32_t data)
+{
+	uint32_t addr, cmd;
+
+	addr = MK_PEXDADRS(bus_no, dev_no, func_no, off);
+	cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size);
+	PEX_OUT(base, PEXDADRS, addr);
+	PEX_OUT(base, PEXDCMND, cmd);
+	PEX_OUT(base, PEXDWDATA,
+		(data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8));
+}
+
+#define MK_PEXCADRS_BYTE_EN(off, len) \
+	((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT)
+#define MK_PEXCADRS(cmd, addr, size) \
+	((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3))
+static uint32_t config_read_pciex_rc(unsigned int __iomem *base,
+				     uint32_t where, uint32_t size)
+{
+	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size));
+	return (PEX_IN(base, PEXCRDATA)
+		>> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1);
+}
+
+static void config_write_pciex_rc(unsigned int __iomem *base, uint32_t where,
+				  uint32_t size, uint32_t val)
+{
+	uint32_t data;
+
+	data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8);
+	PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size));
+	PEX_OUT(base, PEXCWDATA, data);
+}
+
+/* Interfaces */
+/* Note: Work-around
+ *  On SCC PCIEXC, one device is seen on all 32 dev_no.
+ *  As SCC PCIEXC can have only one device on the bus, we look only one dev_no.
+ * (dev_no = 1)
+ */
+static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn,
+				 int where, int size, unsigned int *val)
+{
+	struct device_node *dn;
+	struct pci_controller *phb;
+
+	dn = bus->sysdata;
+	phb = pci_find_hose_for_OF_device(dn);
+
+	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) {
+		*val = ~0;
+		return PCIBIOS_DEVICE_NOT_FOUND;
+	}
+
+	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+		*val = config_read_pciex_rc(phb->cfg_addr, where, size);
+	else
+		*val = config_read_pciex_dev(phb->cfg_addr, bus->number,
+				PCI_SLOT(devfn), PCI_FUNC(devfn), where, size);
+
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn,
+				  int where, int size, unsigned int val)
+{
+	struct device_node *dn;
+	struct pci_controller *phb;
+
+	dn = bus->sysdata;
+	phb = pci_find_hose_for_OF_device(dn);
+
+	if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1)
+		return PCIBIOS_DEVICE_NOT_FOUND;
+
+	if (bus->number == 0 && PCI_SLOT(devfn) == 0)
+		config_write_pciex_rc(phb->cfg_addr, where, size, val);
+	else
+		config_write_pciex_dev(phb->cfg_addr, bus->number,
+			PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val);
+	return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops scc_pciex_pci_ops = {
+	scc_pciex_read_config,
+	scc_pciex_write_config,
+};
+
+static void pciex_clear_intr_all(unsigned int __iomem *base)
+{
+	PEX_OUT(base, PEXAERRSTS, 0xffffffff);
+	PEX_OUT(base, PEXPRERRSTS, 0xffffffff);
+	PEX_OUT(base, PEXINTSTS, 0xffffffff);
+}
+
+#if 0
+static void pciex_disable_intr_all(unsigned int *base)
+{
+	PEX_OUT(base, PEXINTMASK,   0x0);
+	PEX_OUT(base, PEXAERRMASK,  0x0);
+	PEX_OUT(base, PEXPRERRMASK, 0x0);
+	PEX_OUT(base, PEXVDMASK,    0x0);
+}
+#endif
+
+static void pciex_enable_intr_all(unsigned int __iomem *base)
+{
+	PEX_OUT(base, PEXINTMASK, 0x0000e7f1);
+	PEX_OUT(base, PEXAERRMASK, 0x03ff01ff);
+	PEX_OUT(base, PEXPRERRMASK, 0x0001010f);
+	PEX_OUT(base, PEXVDMASK, 0x00000001);
+}
+
+static void pciex_check_status(unsigned int __iomem *base)
+{
+	uint32_t err = 0;
+	uint32_t intsts, aerr, prerr, rcvcp, lenerr;
+	uint32_t maea, maec;
+
+	intsts = PEX_IN(base, PEXINTSTS);
+	aerr = PEX_IN(base, PEXAERRSTS);
+	prerr = PEX_IN(base, PEXPRERRSTS);
+	rcvcp = PEX_IN(base, PEXRCVCPLIDA);
+	lenerr = PEX_IN(base, PEXLENERRIDA);
+
+	if (intsts || aerr || prerr || rcvcp || lenerr)
+		err = 1;
+
+	pr_info("PCEXC interrupt!!\n");
+	pr_info("PEXINTSTS    :0x%08x\n", intsts);
+	pr_info("PEXAERRSTS   :0x%08x\n", aerr);
+	pr_info("PEXPRERRSTS  :0x%08x\n", prerr);
+	pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp);
+	pr_info("PEXLENERRIDA :0x%08x\n", lenerr);
+
+	/* print detail of Protection Error */
+	if (intsts & 0x00004000) {
+		uint32_t i, n;
+		for (i = 0; i < 4; i++) {
+			n = 1 << i;
+			if (prerr & n) {
+				maea = PEX_IN(base, PEXMAEA(i));
+				maec = PEX_IN(base, PEXMAEC(i));
+				pr_info("PEXMAEC%d     :0x%08x\n", i, maec);
+				pr_info("PEXMAEA%d     :0x%08x\n", i, maea);
+			}
+		}
+	}
+
+	if (err)
+		pciex_clear_intr_all(base);
+}
+
+static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id)
+{
+	struct pci_controller *phb = dev_id;
+
+	pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq);
+
+	BUG_ON(phb->cfg_addr == NULL);
+
+	pciex_check_status(phb->cfg_addr);
+
+	return IRQ_HANDLED;
+}
+
+static __init int celleb_setup_pciex(struct device_node *node,
+				     struct pci_controller *phb)
+{
+	struct resource	r;
+	struct of_irq oirq;
+	int virq;
+
+	/* SMMIO registers; used inside this file */
+	if (of_address_to_resource(node, 0, &r)) {
+		pr_err("PCIEXC:Failed to get config resource.\n");
+		return 1;
+	}
+	phb->cfg_addr = ioremap(r.start, r.end - r.start + 1);
+	if (!phb->cfg_addr) {
+		pr_err("PCIEXC:Failed to remap SMMIO region.\n");
+		return 1;
+	}
+
+	/* Not use cfg_data,  cmd and data regs are near address reg */
+	phb->cfg_data = NULL;
+
+	/* set pci_ops */
+	phb->ops = &scc_pciex_pci_ops;
+
+	/* internal interrupt handler */
+	if (of_irq_map_one(node, 1, &oirq)) {
+		pr_err("PCIEXC:Failed to map irq\n");
+		goto error;
+	}
+	virq = irq_create_of_mapping(oirq.controller, oirq.specifier,
+				     oirq.size);
+	if (request_irq(virq, pciex_handle_internal_irq,
+			IRQF_DISABLED, "pciex", (void *)phb)) {
+		pr_err("PCIEXC:Failed to request irq\n");
+		goto error;
+	}
+
+	/* enable all interrupts */
+	pciex_clear_intr_all(phb->cfg_addr);
+	pciex_enable_intr_all(phb->cfg_addr);
+	/* MSI: TBD */
+
+	return 0;
+
+error:
+	phb->cfg_data = NULL;
+	if (phb->cfg_addr)
+		iounmap(phb->cfg_addr);
+	phb->cfg_addr = NULL;
+	return 1;
+}
+
+struct celleb_phb_spec celleb_pciex_spec __initdata = {
+	.setup = celleb_setup_pciex,
+	.ops = &scc_pciex_ops,
+	.iowa_init = &scc_pciex_iowa_init,
+};
diff --git a/arch/powerpc/platforms/celleb/scc_sio.c b/arch/powerpc/platforms/cell/celleb_scc_sio.c
index 3a16c5b3c464..3a16c5b3c464 100644
--- a/arch/powerpc/platforms/celleb/scc_sio.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_sio.c
diff --git a/arch/powerpc/platforms/celleb/scc_uhc.c b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
index cb4307994087..d63b720bfe3a 100644
--- a/arch/powerpc/platforms/celleb/scc_uhc.c
+++ b/arch/powerpc/platforms/cell/celleb_scc_uhc.c
@@ -25,7 +25,7 @@
 #include <asm/io.h>
 #include <asm/machdep.h>
 
-#include "scc.h"
+#include "celleb_scc.h"
 
 #define UHC_RESET_WAIT_MAX 10000
 
diff --git a/arch/powerpc/platforms/celleb/setup.c b/arch/powerpc/platforms/cell/celleb_setup.c
index f27ae1e3fb58..b11cb30decb2 100644
--- a/arch/powerpc/platforms/celleb/setup.c
+++ b/arch/powerpc/platforms/cell/celleb_setup.c
@@ -56,13 +56,13 @@
 #include <asm/rtas.h>
 #include <asm/cell-regs.h>
 
-#include "interrupt.h"
+#include "beat_interrupt.h"
 #include "beat_wrapper.h"
 #include "beat.h"
-#include "pci.h"
-#include "../cell/interrupt.h"
-#include "../cell/pervasive.h"
-#include "../cell/ras.h"
+#include "celleb_pci.h"
+#include "interrupt.h"
+#include "pervasive.h"
+#include "ras.h"
 
 static char celleb_machine_type[128] = "Celleb";
 
@@ -114,8 +114,6 @@ static int __init celleb_publish_devices(void)
 	/* Publish OF platform devices for southbridge IOs */
 	of_platform_bus_probe(NULL, celleb_bus_ids, NULL);
 
-	celleb_pci_workaround_init();
-
 	return 0;
 }
 machine_device_initcall(celleb_beat, celleb_publish_devices);
diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c
index 979d4b67efb4..3b84e8be314c 100644
--- a/arch/powerpc/platforms/cell/io-workarounds.c
+++ b/arch/powerpc/platforms/cell/io-workarounds.c
@@ -1,6 +1,9 @@
 /*
+ * Support PCI IO workaround
+ *
  *  Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>
  *		       IBM, Corp.
+ *  (C) Copyright 2007-2008 TOSHIBA CORPORATION
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -9,335 +12,174 @@
 #undef DEBUG
 
 #include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/pci.h>
+
 #include <asm/io.h>
 #include <asm/machdep.h>
-#include <asm/pci-bridge.h>
+#include <asm/pgtable.h>
 #include <asm/ppc-pci.h>
 
+#include "io-workarounds.h"
 
-#define SPIDER_PCI_REG_BASE		0xd000
-#define SPIDER_PCI_VCI_CNTL_STAT	0x0110
-#define SPIDER_PCI_DUMMY_READ		0x0810
-#define SPIDER_PCI_DUMMY_READ_BASE	0x0814
+#define IOWA_MAX_BUS	8
 
-/* Undefine that to re-enable bogus prefetch
- *
- * Without that workaround, the chip will do bogus prefetch past
- * page boundary from system memory. This setting will disable that,
- * though the documentation is unclear as to the consequences of doing
- * so, either purely performances, or possible misbehaviour... It's not
- * clear wether the chip can handle unaligned accesses at all without
- * prefetching enabled.
- *
- * For now, things appear to be behaving properly with that prefetching
- * disabled and IDE, possibly because IDE isn't doing any unaligned
- * access.
- */
-#define SPIDER_DISABLE_PREFETCH
+static struct iowa_bus iowa_busses[IOWA_MAX_BUS];
+static unsigned int iowa_bus_count;
 
-#define MAX_SPIDERS	3
+static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr)
+{
+	int i, j;
+	struct resource *res;
+	unsigned long vstart, vend;
 
-static struct spider_pci_bus {
-	void __iomem	*regs;
-	unsigned long	mmio_start;
-	unsigned long	mmio_end;
-	unsigned long	pio_vstart;
-	unsigned long	pio_vend;
-} spider_pci_busses[MAX_SPIDERS];
-static int spider_pci_count;
+	for (i = 0; i < iowa_bus_count; i++) {
+		struct iowa_bus *bus = &iowa_busses[i];
+		struct pci_controller *phb = bus->phb;
 
-static struct spider_pci_bus *spider_pci_find(unsigned long vaddr,
-					      unsigned long paddr)
-{
-	int i;
-
-	for (i = 0; i < spider_pci_count; i++) {
-		struct spider_pci_bus *bus = &spider_pci_busses[i];
-		if (paddr && paddr >= bus->mmio_start && paddr < bus->mmio_end)
-			return bus;
-		if (vaddr && vaddr >= bus->pio_vstart && vaddr < bus->pio_vend)
-			return bus;
+		if (vaddr) {
+			vstart = (unsigned long)phb->io_base_virt;
+			vend = vstart + phb->pci_io_size - 1;
+			if ((vaddr >= vstart) && (vaddr <= vend))
+				return bus;
+		}
+
+		if (paddr)
+			for (j = 0; j < 3; j++) {
+				res = &phb->mem_resources[j];
+				if (paddr >= res->start && paddr <= res->end)
+					return bus;
+			}
 	}
+
 	return NULL;
 }
 
-static void spider_io_flush(const volatile void __iomem *addr)
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr)
 {
-	struct spider_pci_bus *bus;
+	struct iowa_bus *bus;
 	int token;
 
-	/* Get platform token (set by ioremap) from address */
 	token = PCI_GET_ADDR_TOKEN(addr);
 
-	/* Fast path if we have a non-0 token, it indicates which bus we
-	 * are on.
-	 *
-	 * If the token is 0, that means either that the ioremap was done
-	 * before we initialized this layer, or it's a PIO operation. We
-	 * fallback to a low path in this case. Hopefully, internal devices
-	 * which are ioremap'ed early should use in_XX/out_XX functions
-	 * instead of the PCI ones and thus not suffer from the slowdown.
-	 *
-	 * Also note that currently, the workaround will not work for areas
-	 * that are not mapped with PTEs (bolted in the hash table). This
-	 * is the case for ioremaps done very early at boot (before
-	 * mem_init_done) and includes the mapping of the ISA IO space.
-	 *
-	 * Fortunately, none of the affected devices is expected to do DMA
-	 * and thus there should be no problem in practice.
-	 *
-	 * In order to improve performances, we only do the PTE search for
-	 * addresses falling in the PHB IO space area. That means it will
-	 * not work for hotplug'ed PHBs but those don't exist with Spider.
-	 */
-	if (token && token <= spider_pci_count)
-		bus = &spider_pci_busses[token - 1];
+	if (token && token <= iowa_bus_count)
+		bus = &iowa_busses[token - 1];
 	else {
 		unsigned long vaddr, paddr;
 		pte_t *ptep;
 
-		/* Fixup physical address */
 		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
+			return NULL;
 
-		/* Check if it's in allowed range for  PIO */
-		if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END)
-			return;
-
-		/* Try to find a PTE. If not, clear the paddr, we'll do
-		 * a vaddr only lookup (PIO only)
-		 */
 		ptep = find_linux_pte(init_mm.pgd, vaddr);
 		if (ptep == NULL)
 			paddr = 0;
 		else
 			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
+		bus = iowa_pci_find(vaddr, paddr);
 
-		bus = spider_pci_find(vaddr, paddr);
 		if (bus == NULL)
-			return;
+			return NULL;
 	}
 
-	/* Now do the workaround
-	 */
-	(void)in_be32(bus->regs + SPIDER_PCI_DUMMY_READ);
+	return bus;
 }
 
-static u8 spider_readb(const volatile void __iomem *addr)
+struct iowa_bus *iowa_pio_find_bus(unsigned long port)
 {
-	u8 val = __do_readb(addr);
-	spider_io_flush(addr);
-	return val;
+	unsigned long vaddr = (unsigned long)pci_io_base + port;
+	return iowa_pci_find(vaddr, 0);
 }
 
-static u16 spider_readw(const volatile void __iomem *addr)
-{
-	u16 val = __do_readw(addr);
-	spider_io_flush(addr);
-	return val;
-}
 
-static u32 spider_readl(const volatile void __iomem *addr)
-{
-	u32 val = __do_readl(addr);
-	spider_io_flush(addr);
-	return val;
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)		\
+static ret iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name)			\
+		return bus->ops->name al;			\
+	return __do_##name al;					\
 }
 
-static u64 spider_readq(const volatile void __iomem *addr)
-{
-	u64 val = __do_readq(addr);
-	spider_io_flush(addr);
-	return val;
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)		\
+static void iowa_##name at					\
+{								\
+	struct iowa_bus *bus;					\
+	bus = iowa_##space##_find_bus(aa);			\
+	if (bus && bus->ops && bus->ops->name) {		\
+		bus->ops->name al;				\
+		return;						\
+	}							\
+	__do_##name al;						\
 }
 
-static u16 spider_readw_be(const volatile void __iomem *addr)
-{
-	u16 val = __do_readw_be(addr);
-	spider_io_flush(addr);
-	return val;
-}
+#include <asm/io-defs.h>
 
-static u32 spider_readl_be(const volatile void __iomem *addr)
-{
-	u32 val = __do_readl_be(addr);
-	spider_io_flush(addr);
-	return val;
-}
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
 
-static u64 spider_readq_be(const volatile void __iomem *addr)
-{
-	u64 val = __do_readq_be(addr);
-	spider_io_flush(addr);
-	return val;
-}
+static struct ppc_pci_io __initdata iowa_pci_io = {
 
-static void spider_readsb(const volatile void __iomem *addr, void *buf,
-			  unsigned long count)
-{
-	__do_readsb(addr, buf, count);
-	spider_io_flush(addr);
-}
+#define DEF_PCI_AC_RET(name, ret, at, al, space, aa)	.name = iowa_##name,
+#define DEF_PCI_AC_NORET(name, at, al, space, aa)	.name = iowa_##name,
 
-static void spider_readsw(const volatile void __iomem *addr, void *buf,
-			  unsigned long count)
-{
-	__do_readsw(addr, buf, count);
-	spider_io_flush(addr);
-}
+#include <asm/io-defs.h>
 
-static void spider_readsl(const volatile void __iomem *addr, void *buf,
-			  unsigned long count)
-{
-	__do_readsl(addr, buf, count);
-	spider_io_flush(addr);
-}
-
-static void spider_memcpy_fromio(void *dest, const volatile void __iomem *src,
-				 unsigned long n)
-{
-	__do_memcpy_fromio(dest, src, n);
-	spider_io_flush(src);
-}
+#undef DEF_PCI_AC_RET
+#undef DEF_PCI_AC_NORET
 
+};
 
-static void __iomem * spider_ioremap(unsigned long addr, unsigned long size,
-				     unsigned long flags)
+static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size,
+						unsigned long flags)
 {
-	struct spider_pci_bus *bus;
+	struct iowa_bus *bus;
 	void __iomem *res = __ioremap(addr, size, flags);
 	int busno;
 
-	pr_debug("spider_ioremap(0x%lx, 0x%lx, 0x%lx) -> 0x%p\n",
-		 addr, size, flags, res);
-
-	bus = spider_pci_find(0, addr);
+	bus = iowa_pci_find(0, addr);
 	if (bus != NULL) {
-		busno = bus - spider_pci_busses;
-		pr_debug(" found bus %d, setting token\n", busno);
+		busno = bus - iowa_busses;
 		PCI_SET_ADDR_TOKEN(res, busno + 1);
 	}
-	pr_debug(" result=0x%p\n", res);
-
 	return res;
 }
 
-static void __init spider_pci_setup_chip(struct spider_pci_bus *bus)
-{
-#ifdef SPIDER_DISABLE_PREFETCH
-	u32 val = in_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT);
-	pr_debug(" PVCI_Control_Status was 0x%08x\n", val);
-	out_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
-#endif
-
-	/* Configure the dummy address for the workaround */
-	out_be32(bus->regs + SPIDER_PCI_DUMMY_READ_BASE, 0x80000000);
-}
-
-static void __init spider_pci_add_one(struct pci_controller *phb)
+/* Regist new bus to support workaround */
+void __init iowa_register_bus(struct pci_controller *phb,
+			struct ppc_pci_io *ops,
+			int (*initfunc)(struct iowa_bus *, void *), void *data)
 {
-	struct spider_pci_bus *bus = &spider_pci_busses[spider_pci_count];
+	struct iowa_bus *bus;
 	struct device_node *np = phb->dn;
-	struct resource rsrc;
-	void __iomem *regs;
 
-	if (spider_pci_count >= MAX_SPIDERS) {
-		printk(KERN_ERR "Too many spider bridges, workarounds"
-		       " disabled for %s\n", np->full_name);
+	if (iowa_bus_count >= IOWA_MAX_BUS) {
+		pr_err("IOWA:Too many pci bridges, "
+		       "workarounds disabled for %s\n", np->full_name);
 		return;
 	}
 
-	/* Get the registers for the beast */
-	if (of_address_to_resource(np, 0, &rsrc)) {
-		printk(KERN_ERR "Failed to get registers for spider %s"
-		       " workarounds disabled\n", np->full_name);
-		return;
-	}
+	bus = &iowa_busses[iowa_bus_count];
+	bus->phb = phb;
+	bus->ops = ops;
 
-	/* Mask out some useless bits in there to get to the base of the
-	 * spider chip
-	 */
-	rsrc.start &= ~0xfffffffful;
-
-	/* Map them */
-	regs = ioremap(rsrc.start + SPIDER_PCI_REG_BASE, 0x1000);
-	if (regs == NULL) {
-		printk(KERN_ERR "Failed to map registers for spider %s"
-		       " workarounds disabled\n", np->full_name);
-		return;
-	}
-
-	spider_pci_count++;
-
-	/* We assume spiders only have one MMIO resource */
-	bus->mmio_start = phb->mem_resources[0].start;
-	bus->mmio_end = phb->mem_resources[0].end + 1;
-
-	bus->pio_vstart = (unsigned long)phb->io_base_virt;
-	bus->pio_vend = bus->pio_vstart + phb->pci_io_size;
-
-	bus->regs = regs;
-
-	printk(KERN_INFO "PCI: Spider MMIO workaround for %s\n",np->full_name);
+	if (initfunc)
+		if ((*initfunc)(bus, data))
+			return;
 
-	pr_debug(" mmio (P) = 0x%016lx..0x%016lx\n",
-		 bus->mmio_start, bus->mmio_end);
-	pr_debug("  pio (V) = 0x%016lx..0x%016lx\n",
-		 bus->pio_vstart, bus->pio_vend);
-	pr_debug(" regs (P) = 0x%016lx (V) = 0x%p\n",
-		 rsrc.start + SPIDER_PCI_REG_BASE, bus->regs);
+	iowa_bus_count++;
 
-	spider_pci_setup_chip(bus);
+	pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name);
 }
 
-static struct ppc_pci_io __initdata spider_pci_io = {
-	.readb = spider_readb,
-	.readw = spider_readw,
-	.readl = spider_readl,
-	.readq = spider_readq,
-	.readw_be = spider_readw_be,
-	.readl_be = spider_readl_be,
-	.readq_be = spider_readq_be,
-	.readsb = spider_readsb,
-	.readsw = spider_readsw,
-	.readsl = spider_readsl,
-	.memcpy_fromio = spider_memcpy_fromio,
-};
-
-static int __init spider_pci_workaround_init(void)
+/* enable IO workaround */
+void __init io_workaround_init(void)
 {
-	struct pci_controller *phb;
-
-	/* Find spider bridges. We assume they have been all probed
-	 * in setup_arch(). If that was to change, we would need to
-	 * update this code to cope with dynamically added busses
-	 */
-	list_for_each_entry(phb, &hose_list, list_node) {
-		struct device_node *np = phb->dn;
-		const char *model = of_get_property(np, "model", NULL);
-
-		/* If no model property or name isn't exactly "pci", skip */
-		if (model == NULL || strcmp(np->name, "pci"))
-			continue;
-		/* If model is not "Spider", skip */
-		if (strcmp(model, "Spider"))
-			continue;
-		spider_pci_add_one(phb);
-	}
-
-	/* No Spider PCI found, exit */
-	if (spider_pci_count == 0)
-		return 0;
+	static int io_workaround_inited;
 
-	/* Setup IO callbacks. We only setup MMIO reads. PIO reads will
-	 * fallback to MMIO reads (though without a token, thus slower)
-	 */
-	ppc_pci_io = spider_pci_io;
-
-	/* Setup ioremap callback */
-	ppc_md.ioremap = spider_ioremap;
-
-	return 0;
+	if (io_workaround_inited)
+		return;
+	ppc_pci_io = iowa_pci_io;
+	ppc_md.ioremap = iowa_ioremap;
+	io_workaround_inited = 1;
 }
-machine_arch_initcall(cell, spider_pci_workaround_init);
diff --git a/arch/powerpc/platforms/cell/io-workarounds.h b/arch/powerpc/platforms/cell/io-workarounds.h
new file mode 100644
index 000000000000..79d8ed3d510f
--- /dev/null
+++ b/arch/powerpc/platforms/cell/io-workarounds.h
@@ -0,0 +1,49 @@
+/*
+ * Support PCI IO workaround
+ *
+ * (C) Copyright 2007-2008 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _IO_WORKAROUNDS_H
+#define _IO_WORKAROUNDS_H
+
+#include <linux/io.h>
+#include <asm/pci-bridge.h>
+
+/* Bus info */
+struct iowa_bus {
+	struct pci_controller *phb;
+	struct ppc_pci_io *ops;
+	void   *private;
+};
+
+void __init io_workaround_init(void);
+void __init iowa_register_bus(struct pci_controller *, struct ppc_pci_io *,
+			      int (*)(struct iowa_bus *, void *), void *);
+struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR);
+struct iowa_bus *iowa_pio_find_bus(unsigned long);
+
+extern struct ppc_pci_io spiderpci_ops;
+extern int spiderpci_iowa_init(struct iowa_bus *, void *);
+
+#define SPIDER_PCI_REG_BASE		0xd000
+#define SPIDER_PCI_REG_SIZE		0x1000
+#define SPIDER_PCI_VCI_CNTL_STAT	0x0110
+#define SPIDER_PCI_DUMMY_READ		0x0810
+#define SPIDER_PCI_DUMMY_READ_BASE	0x0814
+
+#endif /* _IO_WORKAROUNDS_H */
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 5c531e8f9f6f..ab721b50fbba 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -57,6 +57,7 @@
 #include "interrupt.h"
 #include "pervasive.h"
 #include "ras.h"
+#include "io-workarounds.h"
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -117,13 +118,50 @@ static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex);
 
+static int __devinit cell_setup_phb(struct pci_controller *phb)
+{
+	const char *model;
+	struct device_node *np;
+
+	int rc = rtas_setup_phb(phb);
+	if (rc)
+		return rc;
+
+	np = phb->dn;
+	model = of_get_property(np, "model", NULL);
+	if (model == NULL || strcmp(np->name, "pci"))
+		return 0;
+
+	/* Setup workarounds for spider */
+	if (strcmp(model, "Spider"))
+		return 0;
+
+	iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init,
+				  (void *)SPIDER_PCI_REG_BASE);
+	io_workaround_init();
+
+	return 0;
+}
+
 static int __init cell_publish_devices(void)
 {
+	struct device_node *root = of_find_node_by_path("/");
+	struct device_node *np;
 	int node;
 
 	/* Publish OF platform devices for southbridge IOs */
 	of_platform_bus_probe(NULL, NULL, NULL);
 
+	/* On spider based blades, we need to manually create the OF
+	 * platform devices for the PCI host bridges
+	 */
+	for_each_child_of_node(root, np) {
+		if (np->type == NULL || (strcmp(np->type, "pci") != 0 &&
+					 strcmp(np->type, "pciex") != 0))
+			continue;
+		of_platform_device_create(np, NULL, NULL);
+	}
+
 	/* There is no device for the MIC memory controller, thus we create
 	 * a platform device for it to attach the EDAC driver to.
 	 */
@@ -132,6 +170,7 @@ static int __init cell_publish_devices(void)
 			continue;
 		platform_device_register_simple("cbe-mic", node, NULL, 0);
 	}
+
 	return 0;
 }
 machine_subsys_initcall(cell, cell_publish_devices);
@@ -213,7 +252,7 @@ static void __init cell_setup_arch(void)
 
 	/* Find and initialize PCI host bridges */
 	init_pci_config_tokens();
-	find_and_init_phbs();
+
 	cbe_pervasive_init();
 #ifdef CONFIG_DUMMY_CONSOLE
 	conswitchp = &dummy_con;
@@ -249,7 +288,7 @@ define_machine(cell) {
 	.calibrate_decr		= generic_calibrate_decr,
 	.progress		= cell_progress,
 	.init_IRQ       	= cell_init_irq,
-	.pci_setup_phb		= rtas_setup_phb,
+	.pci_setup_phb		= cell_setup_phb,
 #ifdef CONFIG_KEXEC
 	.machine_kexec		= default_machine_kexec,
 	.machine_kexec_prepare	= default_machine_kexec_prepare,
diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c
new file mode 100644
index 000000000000..418b605ac35a
--- /dev/null
+++ b/arch/powerpc/platforms/cell/spider-pci.c
@@ -0,0 +1,184 @@
+/*
+ * IO workarounds for PCI on Celleb/Cell platform
+ *
+ * (C) Copyright 2006-2007 TOSHIBA CORPORATION
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#undef DEBUG
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+
+#include <asm/ppc-pci.h>
+#include <asm/pci-bridge.h>
+
+#include "io-workarounds.h"
+
+#define SPIDER_PCI_DISABLE_PREFETCH
+
+struct spiderpci_iowa_private {
+	void __iomem *regs;
+};
+
+static void spiderpci_io_flush(struct iowa_bus *bus)
+{
+	struct spiderpci_iowa_private *priv;
+	u32 val;
+
+	priv = bus->private;
+	val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ);
+	iosync();
+}
+
+#define SPIDER_PCI_MMIO_READ(name, ret)					\
+static ret spiderpci_##name(const PCI_IO_ADDR addr)			\
+{									\
+	ret val = __do_##name(addr);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+	return val;							\
+}
+
+#define SPIDER_PCI_MMIO_READ_STR(name)					\
+static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, 	\
+			     unsigned long count)			\
+{									\
+	__do_##name(addr, buf, count);					\
+	spiderpci_io_flush(iowa_mem_find_bus(addr));			\
+}
+
+SPIDER_PCI_MMIO_READ(readb, u8)
+SPIDER_PCI_MMIO_READ(readw, u16)
+SPIDER_PCI_MMIO_READ(readl, u32)
+SPIDER_PCI_MMIO_READ(readq, u64)
+SPIDER_PCI_MMIO_READ(readw_be, u16)
+SPIDER_PCI_MMIO_READ(readl_be, u32)
+SPIDER_PCI_MMIO_READ(readq_be, u64)
+SPIDER_PCI_MMIO_READ_STR(readsb)
+SPIDER_PCI_MMIO_READ_STR(readsw)
+SPIDER_PCI_MMIO_READ_STR(readsl)
+
+static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src,
+				    unsigned long n)
+{
+	__do_memcpy_fromio(dest, src, n);
+	spiderpci_io_flush(iowa_mem_find_bus(src));
+}
+
+static int __init spiderpci_pci_setup_chip(struct pci_controller *phb,
+					   void __iomem *regs)
+{
+	void *dummy_page_va;
+	dma_addr_t dummy_page_da;
+
+#ifdef SPIDER_PCI_DISABLE_PREFETCH
+	u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT);
+	pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val);
+	out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8);
+#endif /* SPIDER_PCI_DISABLE_PREFETCH */
+
+	/* setup dummy read */
+	/*
+	 * On CellBlade, we can't know that which XDR memory is used by
+	 * kmalloc() to allocate dummy_page_va.
+	 * In order to imporve the performance, the XDR which is used to
+	 * allocate dummy_page_va is the nearest the spider-pci.
+	 * We have to select the CBE which is the nearest the spider-pci
+	 * to allocate memory from the best XDR, but I don't know that
+	 * how to do.
+	 *
+	 * Celleb does not have this problem, because it has only one XDR.
+	 */
+	dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!dummy_page_va) {
+		pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n");
+		return -1;
+	}
+
+	dummy_page_da = dma_map_single(phb->parent, dummy_page_va,
+				       PAGE_SIZE, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dummy_page_da)) {
+		pr_err("SPIDER-IOWA:Map dummy page filed.\n");
+		kfree(dummy_page_va);
+		return -1;
+	}
+
+	out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da);
+
+	return 0;
+}
+
+int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data)
+{
+	void __iomem *regs = NULL;
+	struct spiderpci_iowa_private *priv;
+	struct device_node *np = bus->phb->dn;
+	struct resource r;
+	unsigned long offset = (unsigned long)data;
+
+	pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n",
+		 np->full_name);
+
+	priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL);
+	if (!priv) {
+		pr_err("SPIDERPCI-IOWA:"
+		       "Can't allocate struct spiderpci_iowa_private");
+		return -1;
+	}
+
+	if (of_address_to_resource(np, 0, &r)) {
+		pr_err("SPIDERPCI-IOWA:Can't get resource.\n");
+		goto error;
+	}
+
+	regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE);
+	if (!regs) {
+		pr_err("SPIDERPCI-IOWA:ioremap failed.\n");
+		goto error;
+	}
+	priv->regs = regs;
+	bus->private = priv;
+
+	if (spiderpci_pci_setup_chip(bus->phb, regs))
+		goto error;
+
+	return 0;
+
+error:
+	kfree(priv);
+	bus->private = NULL;
+
+	if (regs)
+		iounmap(regs);
+
+	return -1;
+}
+
+struct ppc_pci_io spiderpci_ops = {
+	.readb = spiderpci_readb,
+	.readw = spiderpci_readw,
+	.readl = spiderpci_readl,
+	.readq = spiderpci_readq,
+	.readw_be = spiderpci_readw_be,
+	.readl_be = spiderpci_readl_be,
+	.readq_be = spiderpci_readq_be,
+	.readsb = spiderpci_readsb,
+	.readsw = spiderpci_readsw,
+	.readsl = spiderpci_readsl,
+	.memcpy_fromio = spiderpci_memcpy_fromio,
+};
+
diff --git a/arch/powerpc/platforms/celleb/Kconfig b/arch/powerpc/platforms/celleb/Kconfig
deleted file mode 100644
index 372891edcdd2..000000000000
--- a/arch/powerpc/platforms/celleb/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
-config PPC_CELLEB
-	bool "Toshiba's Cell Reference Set 'Celleb' Architecture"
-	depends on PPC_MULTIPLATFORM && PPC64
-	select PPC_CELL
-	select PPC_CELL_NATIVE
-	select PPC_RTAS
-	select PPC_INDIRECT_IO
-	select PPC_OF_PLATFORM_PCI
-	select HAS_TXX9_SERIAL
-	select PPC_UDBG_BEAT
-	select USB_OHCI_BIG_ENDIAN_MMIO
-	select USB_EHCI_BIG_ENDIAN_MMIO
diff --git a/arch/powerpc/platforms/celleb/Makefile b/arch/powerpc/platforms/celleb/Makefile
deleted file mode 100644
index 889d43f715ea..000000000000
--- a/arch/powerpc/platforms/celleb/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-obj-y				+= interrupt.o iommu.o setup.o \
-				   htab.o beat.o hvCall.o pci.o \
-				   scc_epci.o scc_uhc.o \
-				   io-workarounds.o
-
-obj-$(CONFIG_SMP)		+= smp.o
-obj-$(CONFIG_PPC_UDBG_BEAT)	+= udbg_beat.o
-obj-$(CONFIG_SERIAL_TXX9)	+= scc_sio.o
-obj-$(CONFIG_SPU_BASE)		+= spu_priv1.o
diff --git a/arch/powerpc/platforms/celleb/io-workarounds.c b/arch/powerpc/platforms/celleb/io-workarounds.c
deleted file mode 100644
index 423339be1bac..000000000000
--- a/arch/powerpc/platforms/celleb/io-workarounds.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Support for Celleb io workarounds
- *
- * (C) Copyright 2006-2007 TOSHIBA CORPORATION
- *
- * This file is based to arch/powerpc/platform/cell/io-workarounds.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#undef DEBUG
-
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/irq.h>
-
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-#include "pci.h"
-
-#define MAX_CELLEB_PCI_BUS	4
-
-void *celleb_dummy_page_va;
-
-static struct celleb_pci_bus {
-	struct pci_controller *phb;
-	void (*dummy_read)(struct pci_controller *);
-} celleb_pci_busses[MAX_CELLEB_PCI_BUS];
-
-static int celleb_pci_count = 0;
-
-static struct celleb_pci_bus *celleb_pci_find(unsigned long vaddr,
-					      unsigned long paddr)
-{
-	int i, j;
-	struct resource *res;
-
-	for (i = 0; i < celleb_pci_count; i++) {
-		struct celleb_pci_bus *bus = &celleb_pci_busses[i];
-		struct pci_controller *phb = bus->phb;
-		if (paddr)
-			for (j = 0; j < 3; j++) {
-				res = &phb->mem_resources[j];
-				if (paddr >= res->start && paddr <= res->end)
-					return bus;
-			}
-		res = &phb->io_resource;
-		if (vaddr && vaddr >= res->start && vaddr <= res->end)
-			return bus;
-	}
-	return NULL;
-}
-
-static void celleb_io_flush(const PCI_IO_ADDR addr)
-{
-	struct celleb_pci_bus *bus;
-	int token;
-
-	token = PCI_GET_ADDR_TOKEN(addr);
-
-	if (token && token <= celleb_pci_count)
-		bus = &celleb_pci_busses[token - 1];
-	else {
-		unsigned long vaddr, paddr;
-		pte_t *ptep;
-
-		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
-		if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END)
-			return;
-
-		ptep = find_linux_pte(init_mm.pgd, vaddr);
-		if (ptep == NULL)
-			paddr = 0;
-		else
-			paddr = pte_pfn(*ptep) << PAGE_SHIFT;
-		bus = celleb_pci_find(vaddr, paddr);
-
-		if (bus == NULL)
-			return;
-	}
-
-	if (bus->dummy_read)
-		bus->dummy_read(bus->phb);
-}
-
-static u8 celleb_readb(const PCI_IO_ADDR addr)
-{
-	u8 val;
-	val = __do_readb(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static u16 celleb_readw(const PCI_IO_ADDR addr)
-{
-	u16 val;
-	val = __do_readw(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static u32 celleb_readl(const PCI_IO_ADDR addr)
-{
-	u32 val;
-	val = __do_readl(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static u64 celleb_readq(const PCI_IO_ADDR addr)
-{
-	u64 val;
-	val = __do_readq(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static u16 celleb_readw_be(const PCI_IO_ADDR addr)
-{
-	u16 val;
-	val = __do_readw_be(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static u32 celleb_readl_be(const PCI_IO_ADDR addr)
-{
-	u32 val;
-	val = __do_readl_be(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static u64 celleb_readq_be(const PCI_IO_ADDR addr)
-{
-	u64 val;
-	val = __do_readq_be(addr);
-	celleb_io_flush(addr);
-	return val;
-}
-
-static void celleb_readsb(const PCI_IO_ADDR addr,
-			  void *buf, unsigned long count)
-{
-	__do_readsb(addr, buf, count);
-	celleb_io_flush(addr);
-}
-
-static void celleb_readsw(const PCI_IO_ADDR addr,
-			  void *buf, unsigned long count)
-{
-	__do_readsw(addr, buf, count);
-	celleb_io_flush(addr);
-}
-
-static void celleb_readsl(const PCI_IO_ADDR addr,
-			  void *buf, unsigned long count)
-{
-	__do_readsl(addr, buf, count);
-	celleb_io_flush(addr);
-}
-
-static void celleb_memcpy_fromio(void *dest,
-				 const PCI_IO_ADDR src,
-				 unsigned long n)
-{
-	__do_memcpy_fromio(dest, src, n);
-	celleb_io_flush(src);
-}
-
-static void __iomem *celleb_ioremap(unsigned long addr,
-				     unsigned long size,
-				     unsigned long flags)
-{
-	struct celleb_pci_bus *bus;
-	void __iomem *res = __ioremap(addr, size, flags);
-	int busno;
-
-	bus = celleb_pci_find(0, addr);
-	if (bus != NULL) {
-		busno = bus - celleb_pci_busses;
-		PCI_SET_ADDR_TOKEN(res, busno + 1);
-	}
-	return res;
-}
-
-static void celleb_iounmap(volatile void __iomem *addr)
-{
-	return __iounmap(PCI_FIX_ADDR(addr));
-}
-
-static struct ppc_pci_io celleb_pci_io __initdata = {
-	.readb = celleb_readb,
-	.readw = celleb_readw,
-	.readl = celleb_readl,
-	.readq = celleb_readq,
-	.readw_be = celleb_readw_be,
-	.readl_be = celleb_readl_be,
-	.readq_be = celleb_readq_be,
-	.readsb = celleb_readsb,
-	.readsw = celleb_readsw,
-	.readsl = celleb_readsl,
-	.memcpy_fromio = celleb_memcpy_fromio,
-};
-
-void __init celleb_pci_add_one(struct pci_controller *phb,
-			       void (*dummy_read)(struct pci_controller *))
-{
-	struct celleb_pci_bus *bus = &celleb_pci_busses[celleb_pci_count];
-	struct device_node *np = phb->dn;
-
-	if (celleb_pci_count >= MAX_CELLEB_PCI_BUS) {
-		printk(KERN_ERR "Too many pci bridges, workarounds"
-		       " disabled for %s\n", np->full_name);
-		return;
-	}
-
-	celleb_pci_count++;
-
-	bus->phb = phb;
-	bus->dummy_read = dummy_read;
-}
-
-static struct of_device_id celleb_pci_workaround_match[] __initdata = {
-	{
-		.name = "pci-pseudo",
-		.data = fake_pci_workaround_init,
-	}, {
-		.name = "epci",
-		.data = epci_workaround_init,
-	}, {
-	},
-};
-
-int __init celleb_pci_workaround_init(void)
-{
-	struct pci_controller *phb;
-	struct device_node *node;
-	const struct  of_device_id *match;
-	void (*init_func)(struct pci_controller *);
-
-	celleb_dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL);
-	if (!celleb_dummy_page_va) {
-		printk(KERN_ERR "Celleb: dummy read disabled. "
-			"Alloc celleb_dummy_page_va failed\n");
-		return 1;
-	}
-
-	list_for_each_entry(phb, &hose_list, list_node) {
-		node = phb->dn;
-		match = of_match_node(celleb_pci_workaround_match, node);
-
-		if (match) {
-			init_func = match->data;
-			(*init_func)(phb);
-		}
-	}
-
-	ppc_pci_io = celleb_pci_io;
-	ppc_md.ioremap = celleb_ioremap;
-	ppc_md.iounmap = celleb_iounmap;
-
-	return 0;
-}
diff --git a/arch/powerpc/platforms/iseries/exception.S b/arch/powerpc/platforms/iseries/exception.S
index c775cd4b3d6e..8ff330d026ca 100644
--- a/arch/powerpc/platforms/iseries/exception.S
+++ b/arch/powerpc/platforms/iseries/exception.S
@@ -59,8 +59,33 @@ system_reset_iSeries:
 	andc	r4,r4,r5
 	mtspr	SPRN_CTRLT,r4
 
+/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */
+/* In the UP case we'll yeild() later, and we will not access the paca anyway */
+#ifdef CONFIG_SMP
 1:
 	HMT_LOW
+	LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop)
+	ld	r23,0(r23)
+	sync
+	LOAD_REG_IMMEDIATE(r3,current_set)
+	sldi	r28,r24,3		/* get current_set[cpu#] */
+	ldx	r3,r3,r28
+	addi	r1,r3,THREAD_SIZE
+	subi	r1,r1,STACK_FRAME_OVERHEAD
+
+	cmpwi	0,r23,0			/* Keep poking the Hypervisor until */
+	bne	2f			/* we're released */
+	/* Let the Hypervisor know we are alive */
+	/* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */
+	lis	r3,0x8002
+	rldicr	r3,r3,32,15		/* r0 = (r3 << 32) & 0xffff000000000000 */
+	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
+	sc				/* Invoke the hypervisor via a system call */
+	b	1b
+#endif
+
+2:
+	HMT_LOW
 #ifdef CONFIG_SMP
 	lbz	r23,PACAPROCSTART(r13)	/* Test if this processor
 					 * should start */
@@ -91,7 +116,7 @@ iSeries_secondary_smp_loop:
 	li	r0,-1			/* r0=-1 indicates a Hypervisor call */
 	sc				/* Invoke the hypervisor via a system call */
 	mfspr	r13,SPRN_SPRG3		/* Put r13 back ???? */
-	b	1b			/* If SMP not configured, secondaries
+	b	2b			/* If SMP not configured, secondaries
 					 * loop forever */
 
 /***  ISeries-LPAR interrupt handlers ***/
diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c
index c73379ec9141..1d201782d4e5 100644
--- a/arch/powerpc/platforms/ps3/os-area.c
+++ b/arch/powerpc/platforms/ps3/os-area.c
@@ -25,6 +25,7 @@
 #include <linux/syscalls.h>
 #include <linux/ctype.h>
 #include <linux/lmb.h>
+#include <linux/of.h>
 
 #include <asm/prom.h>
 
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 306a9d07491d..07fe5b69b9e2 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -34,3 +34,8 @@ config LPARCFG
 	help
 	Provide system capacity information via human readable
 	<key word>=<value> pairs through a /proc/ppc64/lparcfg interface.
+
+config PPC_PSERIES_DEBUG
+	depends on PPC_PSERIES && PPC_EARLY_DEBUG
+	bool "Enable extra debug logging in platforms/pseries"
+	default y
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index bdae04bb7a01..bd2593ed28dd 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -2,6 +2,10 @@ ifeq ($(CONFIG_PPC64),y)
 EXTRA_CFLAGS		+= -mno-minimal-toc
 endif
 
+ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y)
+EXTRA_CFLAGS		+= -DDEBUG
+endif
+
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o ras.o rtasd.o \
 			   firmware.o power.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 550b2f7d2cc1..a3fd56b186e6 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -39,7 +39,6 @@
 #include <asm/ppc-pci.h>
 #include <asm/rtas.h>
 
-#undef DEBUG
 
 /** Overview:
  *  EEH, or "Extended Error Handling" is a PCI bridge technology for
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
index 1e83fcd0df31..ce37040af870 100644
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ b/arch/powerpc/platforms/pseries/eeh_cache.c
@@ -28,7 +28,6 @@
 #include <asm/pci-bridge.h>
 #include <asm/ppc-pci.h>
 
-#undef DEBUG
 
 /**
  * The pci address cache subsystem.  This subsystem places
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index b765b7c77b65..9d3a40f45974 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -21,17 +21,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
 
 #include <asm/firmware.h>
 #include <asm/prom.h>
 #include <asm/udbg.h>
 
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
 
 typedef struct {
     unsigned long val;
@@ -72,7 +66,7 @@ void __init fw_feature_init(const char *hypertas, unsigned long len)
 	const char *s;
 	int i;
 
-	DBG(" -> fw_feature_init()\n");
+	pr_debug(" -> fw_feature_init()\n");
 
 	for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) {
 		for (i = 0; i < FIRMWARE_MAX_FEATURES; i++) {
@@ -88,5 +82,5 @@ void __init fw_feature_init(const char *hypertas, unsigned long len)
 		}
 	}
 
-	DBG(" <- fw_feature_init()\n");
+	pr_debug(" <- fw_feature_init()\n");
 }
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a65c76308201..176f1f39d2d5 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -47,7 +47,6 @@
 
 #include "plpar_wrappers.h"
 
-#define DBG(fmt...)
 
 static void tce_build_pSeries(struct iommu_table *tbl, long index,
 			      long npages, unsigned long uaddr,
@@ -322,7 +321,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 
 	dn = pci_bus_to_OF_node(bus);
 
-	DBG("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
+	pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name);
 
 	if (bus->self) {
 		/* This is not a root bus, any setup will be done for the
@@ -347,7 +346,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 	for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling)
 		children++;
 
-	DBG("Children: %d\n", children);
+	pr_debug("Children: %d\n", children);
 
 	/* Calculate amount of DMA window per slot. Each window must be
 	 * a power of two (due to pci_alloc_consistent requirements).
@@ -361,8 +360,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 
 		while (pci->phb->dma_window_size * children > 0x80000000ul)
 			pci->phb->dma_window_size >>= 1;
-		DBG("No ISA/IDE, window size is 0x%lx\n",
-			pci->phb->dma_window_size);
+		pr_debug("No ISA/IDE, window size is 0x%lx\n",
+			 pci->phb->dma_window_size);
 		pci->phb->dma_window_base_cur = 0;
 
 		return;
@@ -387,8 +386,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
 	while (pci->phb->dma_window_size * children > 0x70000000ul)
 		pci->phb->dma_window_size >>= 1;
 
-	DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
-
+	pr_debug("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size);
 }
 
 
@@ -401,7 +399,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 
 	dn = pci_bus_to_OF_node(bus);
 
-	DBG("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", dn->full_name);
+	pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n",
+		 dn->full_name);
 
 	/* Find nearest ibm,dma-window, walking up the device tree */
 	for (pdn = dn; pdn != NULL; pdn = pdn->parent) {
@@ -411,14 +410,14 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 	}
 
 	if (dma_window == NULL) {
-		DBG("  no ibm,dma-window property !\n");
+		pr_debug("  no ibm,dma-window property !\n");
 		return;
 	}
 
 	ppci = PCI_DN(pdn);
 
-	DBG("  parent is %s, iommu_table: 0x%p\n",
-	    pdn->full_name, ppci->iommu_table);
+	pr_debug("  parent is %s, iommu_table: 0x%p\n",
+		 pdn->full_name, ppci->iommu_table);
 
 	if (!ppci->iommu_table) {
 		tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
@@ -426,7 +425,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
 		iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window,
 			bus->number);
 		ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
-		DBG("  created table: %p\n", ppci->iommu_table);
+		pr_debug("  created table: %p\n", ppci->iommu_table);
 	}
 
 	if (pdn != dn)
@@ -439,7 +438,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 	struct device_node *dn;
 	struct iommu_table *tbl;
 
-	DBG("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
+	pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev));
 
 	dn = dev->dev.archdata.of_node;
 
@@ -450,7 +449,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
 	if (!dev->bus->self) {
 		struct pci_controller *phb = PCI_DN(dn)->phb;
 
-		DBG(" --> first child, no bridge. Allocating iommu table.\n");
+		pr_debug(" --> first child, no bridge. Allocating iommu table.\n");
 		tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL,
 				   phb->node);
 		iommu_table_setparms(phb, dn, tbl);
@@ -480,7 +479,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 	const void *dma_window = NULL;
 	struct pci_dn *pci;
 
-	DBG("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
+	pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev));
 
 	/* dev setup for LPAR is a little tricky, since the device tree might
 	 * contain the dma-window properties per-device and not neccesarily
@@ -489,7 +488,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 	 * already allocated.
 	 */
 	dn = pci_device_to_OF_node(dev);
-	DBG("  node is %s\n", dn->full_name);
+	pr_debug("  node is %s\n", dn->full_name);
 
 	for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
 	     pdn = pdn->parent) {
@@ -504,13 +503,13 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 				 pci_name(dev), dn? dn->full_name : "<null>");
 		return;
 	}
-	DBG("  parent is %s\n", pdn->full_name);
+	pr_debug("  parent is %s\n", pdn->full_name);
 
 	/* Check for parent == NULL so we don't try to setup the empty EADS
 	 * slots on POWER4 machines.
 	 */
 	if (dma_window == NULL || pdn->parent == NULL) {
-		DBG("  no dma window for device, linking to parent\n");
+		pr_debug("  no dma window for device, linking to parent\n");
 		dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table;
 		return;
 	}
@@ -522,9 +521,9 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
 		iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window,
 			pci->phb->bus->number);
 		pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
-		DBG("  created table: %p\n", pci->iommu_table);
+		pr_debug("  created table: %p\n", pci->iommu_table);
 	} else {
-		DBG("  found DMA window, table: %p\n", pci->iommu_table);
+		pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
 	}
 
 	dev->dev.archdata.dma_data = pci->iommu_table;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 9235c469449e..2cbaedb17f3e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -19,7 +19,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#undef DEBUG_LOW
+/* Enables debugging of low-level hash table routines - careful! */
+#undef DEBUG
 
 #include <linux/kernel.h>
 #include <linux/dma-mapping.h>
@@ -42,11 +43,6 @@
 #include "plpar_wrappers.h"
 #include "pseries.h"
 
-#ifdef DEBUG_LOW
-#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0)
-#else
-#define DBG_LOW(fmt...) do { } while(0)
-#endif
 
 /* in hvCall.S */
 EXPORT_SYMBOL(plpar_hcall);
@@ -196,6 +192,8 @@ void __init udbg_init_debug_lpar(void)
 	udbg_putc = udbg_putcLP;
 	udbg_getc = udbg_getcLP;
 	udbg_getc_poll = udbg_getc_pollLP;
+
+	register_early_udbg_console();
 }
 
 /* returns 0 if couldn't find or use /chosen/stdout as console */
@@ -288,15 +286,15 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	unsigned long hpte_v, hpte_r;
 
 	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
-			"rflags=%lx, vflags=%lx, psize=%d)\n",
-		hpte_group, va, pa, rflags, vflags, psize);
+		pr_debug("hpte_insert(group=%lx, va=%016lx, pa=%016lx, "
+			 "rflags=%lx, vflags=%lx, psize=%d)\n",
+			 hpte_group, va, pa, rflags, vflags, psize);
 
 	hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID;
 	hpte_r = hpte_encode_r(pa, psize) | rflags;
 
 	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
+		pr_debug(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
 
 	/* Now fill in the actual HPTE */
 	/* Set CEC cookie to 0         */
@@ -313,7 +311,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot);
 	if (unlikely(lpar_rc == H_PTEG_FULL)) {
 		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" full\n");
+			pr_debug(" full\n");
 		return -1;
 	}
 
@@ -324,11 +322,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
 	 */
 	if (unlikely(lpar_rc != H_SUCCESS)) {
 		if (!(vflags & HPTE_V_BOLTED))
-			DBG_LOW(" lpar err %d\n", lpar_rc);
+			pr_debug(" lpar err %lu\n", lpar_rc);
 		return -2;
 	}
 	if (!(vflags & HPTE_V_BOLTED))
-		DBG_LOW(" -> slot: %d\n", slot & 7);
+		pr_debug(" -> slot: %lu\n", slot & 7);
 
 	/* Because of iSeries, we have to pass down the secondary
 	 * bucket bit here as well
@@ -420,17 +418,17 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 
 	want_v = hpte_encode_avpn(va, psize, ssize);
 
-	DBG_LOW("    update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ",
-		want_v, slot, flags, psize);
+	pr_debug("    update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...",
+		 want_v, slot, flags, psize);
 
 	lpar_rc = plpar_pte_protect(flags, slot, want_v);
 
 	if (lpar_rc == H_NOT_FOUND) {
-		DBG_LOW("not found !\n");
+		pr_debug("not found !\n");
 		return -1;
 	}
 
-	DBG_LOW("ok\n");
+	pr_debug("ok\n");
 
 	BUG_ON(lpar_rc != H_SUCCESS);
 
@@ -505,8 +503,8 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va,
 	unsigned long lpar_rc;
 	unsigned long dummy1, dummy2;
 
-	DBG_LOW("    inval : slot=%lx, va=%016lx, psize: %d, local: %d",
-		slot, va, psize, local);
+	pr_debug("    inval : slot=%lx, va=%016lx, psize: %d, local: %d\n",
+		 slot, va, psize, local);
 
 	want_v = hpte_encode_avpn(va, psize, ssize);
 	lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2);
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index a1ab25c7082f..2b548afd1003 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -67,8 +67,6 @@ static int ras_check_exception_token;
 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
-/* #define DEBUG */
-
 
 static void request_ras_irqs(struct device_node *np,
 			irq_handler_t handler,
@@ -237,7 +235,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 		printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
 		       *((unsigned long *)&ras_log_buf), status);
 
-#ifndef DEBUG
+#ifndef DEBUG_RTAS_POWER_OFF
 		/* Don't actually power off when debugging so we can test
 		 * without actually failing while injecting errors.
 		 * Error data will not be logged to syslog.
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
index e3078ce41518..befadd4f9524 100644
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ b/arch/powerpc/platforms/pseries/rtasd.c
@@ -29,11 +29,6 @@
 #include <asm/atomic.h>
 #include <asm/machdep.h>
 
-#if 0
-#define DEBUG(A...)	printk(KERN_ERR A)
-#else
-#define DEBUG(A...)
-#endif
 
 static DEFINE_SPINLOCK(rtasd_log_lock);
 
@@ -198,7 +193,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
 	unsigned long s;
 	int len = 0;
 
-	DEBUG("logging event\n");
+	pr_debug("rtasd: logging event\n");
 	if (buf == NULL)
 		return;
 
@@ -409,7 +404,8 @@ static int rtasd(void *unused)
 	daemonize("rtasd");
 
 	printk(KERN_DEBUG "RTAS daemon started\n");
-	DEBUG("will sleep for %d milliseconds\n", (30000/rtas_event_scan_rate));
+	pr_debug("rtasd: will sleep for %d milliseconds\n",
+		 (30000 / rtas_event_scan_rate));
 
 	/* See if we have any error stored in NVRAM */
 	memset(logdata, 0, rtas_error_log_max);
@@ -428,9 +424,9 @@ static int rtasd(void *unused)
 	do_event_scan_all_cpus(1000);
 
 	if (surveillance_timeout != -1) {
-		DEBUG("enabling surveillance\n");
+		pr_debug("rtasd: enabling surveillance\n");
 		enable_surveillance(surveillance_timeout);
-		DEBUG("surveillance enabled\n");
+		pr_debug("rtasd: surveillance enabled\n");
 	}
 
 	/* Delay should be at least one second since some
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index e5b0ea870164..bec3803f0618 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -38,9 +38,7 @@
 #define SCANLOG_HWERROR -1
 #define SCANLOG_CONTINUE 1
 
-#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0)
 
-static int scanlog_debug;
 static unsigned int ibm_scan_log_dump;			/* RTAS token */
 static struct proc_dir_entry *proc_ppc64_scan_log_dump;	/* The proc file */
 
@@ -86,14 +84,14 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
 		memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE);
 		spin_unlock(&rtas_data_buf_lock);
 
-		DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n",
-		      status, data[0], data[1], data[2]);
+		pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \
+			 "data[2]=%x\n", status, data[0], data[1], data[2]);
 		switch (status) {
 		    case SCANLOG_COMPLETE:
-			DEBUG("hit eof\n");
+			pr_debug("scanlog: hit eof\n");
 			return 0;
 		    case SCANLOG_HWERROR:
-			DEBUG("hardware error reading scan log data\n");
+			pr_debug("scanlog: hardware error reading data\n");
 			return -EIO;
 		    case SCANLOG_CONTINUE:
 			/* We may or may not have data yet */
@@ -110,7 +108,8 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
 			/* Assume extended busy */
 			wait_time = rtas_busy_delay_time(status);
 			if (!wait_time) {
-				printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status);
+				printk(KERN_ERR "scanlog: unknown error " \
+				       "from rtas: %d\n", status);
 				return -EIO;
 			}
 		}
@@ -134,15 +133,9 @@ static ssize_t scanlog_write(struct file * file, const char __user * buf,
 
 	if (buf) {
 		if (strncmp(stkbuf, "reset", 5) == 0) {
-			DEBUG("reset scanlog\n");
+			pr_debug("scanlog: reset scanlog\n");
 			status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0);
-			DEBUG("rtas returns %d\n", status);
-		} else if (strncmp(stkbuf, "debugon", 7) == 0) {
-			printk(KERN_ERR "scanlog: debug on\n");
-			scanlog_debug = 1;
-		} else if (strncmp(stkbuf, "debugoff", 8) == 0) {
-			printk(KERN_ERR "scanlog: debug off\n");
-			scanlog_debug = 0;
+			pr_debug("scanlog: rtas returns %d\n", status);
 		}
 	}
 	return count;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index f66aa9c3b135..f5d29f5b13c1 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -16,8 +16,6 @@
  * bootup setup stuff..
  */
 
-#undef DEBUG
-
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
@@ -70,11 +68,6 @@
 #include "plpar_wrappers.h"
 #include "pseries.h"
 
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
 
 int fwnmi_active;  /* TRUE if an FWNMI handler is present */
 
@@ -326,7 +319,7 @@ static int pseries_set_xdabr(unsigned long dabr)
  */
 static void __init pSeries_init_early(void)
 {
-	DBG(" -> pSeries_init_early()\n");
+	pr_debug(" -> pSeries_init_early()\n");
 
 	if (firmware_has_feature(FW_FEATURE_LPAR))
 		find_udbg_vterm();
@@ -338,7 +331,7 @@ static void __init pSeries_init_early(void)
 
 	iommu_init_early_pSeries();
 
-	DBG(" <- pSeries_init_early()\n");
+	pr_debug(" <- pSeries_init_early()\n");
 }
 
 /*
@@ -383,7 +376,7 @@ static int __init pSeries_probe(void)
 	    of_flat_dt_is_compatible(root, "IBM,CBEA"))
 		return 0;
 
-	DBG("pSeries detected, looking for LPAR capability...\n");
+	pr_debug("pSeries detected, looking for LPAR capability...\n");
 
 	/* Now try to figure out if we are running on LPAR */
 	of_scan_flat_dt(pSeries_probe_hypertas, NULL);
@@ -393,8 +386,8 @@ static int __init pSeries_probe(void)
 	else
 		hpte_init_native();
 
-	DBG("Machine is%s LPAR !\n",
-	    (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
+	pr_debug("Machine is%s LPAR !\n",
+	         (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not");
 
 	return 1;
 }
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index ea4c65917a64..9d8f8c84ab89 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -12,7 +12,6 @@
  *      2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
 
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -51,12 +50,6 @@
 #include "plpar_wrappers.h"
 #include "pseries.h"
 
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
 
 /*
  * The primary thread of each non-boot processor is recorded here before
@@ -231,7 +224,7 @@ static void __init smp_init_pseries(void)
 {
 	int i;
 
-	DBG(" -> smp_init_pSeries()\n");
+	pr_debug(" -> smp_init_pSeries()\n");
 
 	/* Mark threads which are still spinning in hold loops. */
 	if (cpu_has_feature(CPU_FTR_SMT)) {
@@ -255,7 +248,7 @@ static void __init smp_init_pseries(void)
 		smp_ops->take_timebase = pSeries_take_timebase;
 	}
 
-	DBG(" <- smp_init_pSeries()\n");
+	pr_debug(" <- smp_init_pSeries()\n");
 }
 
 #ifdef CONFIG_MPIC
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 43df53c30aa0..ebebc28fe895 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -9,7 +9,6 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
 
 #include <linux/types.h>
 #include <linux/threads.h>
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d359d6e92975..7f59188cd9a1 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -143,7 +143,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
  */
 static int
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-		       unsigned long *data)
+		       void **kaddr, unsigned long *pfn)
 {
 	struct axon_ram_bank *bank = device->bd_disk->private_data;
 	loff_t offset;
@@ -154,7 +154,8 @@ axon_ram_direct_access(struct block_device *device, sector_t sector,
 		return -ERANGE;
 	}
 
-	*data = bank->ph_addr + offset;
+	*kaddr = (void *)(bank->ph_addr + offset);
+	*pfn = virt_to_phys(kaddr) >> PAGE_SHIFT;
 
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index 5c1b246aaccc..7b45670c7af3 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -892,3 +892,44 @@ void fsl_rstcr_restart(char *cmd)
 	while (1) ;
 }
 #endif
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+struct platform_diu_data_ops diu_ops = {
+	.diu_size = 1280 * 1024 * 4,	/* default one 1280x1024 buffer */
+};
+EXPORT_SYMBOL(diu_ops);
+
+int __init preallocate_diu_videomemory(void)
+{
+	pr_debug("diu_size=%lu\n", diu_ops.diu_size);
+
+	diu_ops.diu_mem = __alloc_bootmem(diu_ops.diu_size, 8, 0);
+	if (!diu_ops.diu_mem) {
+		printk(KERN_ERR "fsl-diu: cannot allocate %lu bytes\n",
+			diu_ops.diu_size);
+		return -ENOMEM;
+	}
+
+	pr_debug("diu_mem=%p\n", diu_ops.diu_mem);
+
+	rh_init(&diu_ops.diu_rh_info, 4096, ARRAY_SIZE(diu_ops.diu_rh_block),
+		diu_ops.diu_rh_block);
+	return rh_attach_region(&diu_ops.diu_rh_info,
+				(unsigned long) diu_ops.diu_mem,
+				diu_ops.diu_size);
+}
+
+static int __init early_parse_diufb(char *p)
+{
+	if (!p)
+		return 1;
+
+	diu_ops.diu_size = _ALIGN_UP(memparse(p, &p), 8);
+
+	pr_debug("diu_size=%lu\n", diu_ops.diu_size);
+
+	return 0;
+}
+early_param("diufb", early_parse_diufb);
+
+#endif
diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h
index 74c4a9657b33..52c831fa1886 100644
--- a/arch/powerpc/sysdev/fsl_soc.h
+++ b/arch/powerpc/sysdev/fsl_soc.h
@@ -17,5 +17,28 @@ extern int fsl_spi_init(struct spi_board_info *board_infos,
 			void (*deactivate_cs)(u8 cs, u8 polarity));
 
 extern void fsl_rstcr_restart(char *cmd);
+
+#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+#include <linux/bootmem.h>
+#include <asm/rheap.h>
+struct platform_diu_data_ops {
+	rh_block_t diu_rh_block[16];
+	rh_info_t diu_rh_info;
+	unsigned long diu_size;
+	void *diu_mem;
+
+	unsigned int (*get_pixel_format) (unsigned int bits_per_pixel,
+		int monitor_port);
+	void (*set_gamma_table) (int monitor_port, char *gamma_table_base);
+	void (*set_monitor_port) (int monitor_port);
+	void (*set_pixel_clock) (unsigned int pixclock);
+	ssize_t (*show_monitor_port) (int monitor_port, char *buf);
+	int (*set_sysfs_monitor_port) (int val);
+};
+
+extern struct platform_diu_data_ops diu_ops;
+int __init preallocate_diu_videomemory(void);
+#endif
+
 #endif
 #endif
diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
index 047b31027fa6..41af1223e2a0 100644
--- a/arch/powerpc/sysdev/mv64x60_dev.c
+++ b/arch/powerpc/sysdev/mv64x60_dev.c
@@ -338,15 +338,13 @@ static int __init mv64x60_i2c_device_setup(struct device_node *np, int id)
 
 	pdata.freq_m = 8;	/* default */
 	prop = of_get_property(np, "freq_m", NULL);
-	if (!prop)
-		return -ENODEV;
-	pdata.freq_m = *prop;
+	if (prop)
+		pdata.freq_m = *prop;
 
 	pdata.freq_m = 3;	/* default */
 	prop = of_get_property(np, "freq_n", NULL);
-	if (!prop)
-		return -ENODEV;
-	pdata.freq_n = *prop;
+	if (prop)
+		pdata.freq_n = *prop;
 
 	pdata.timeout = 1000;				/* default: 1 second */
 
@@ -433,9 +431,13 @@ static int __init mv64x60_device_setup(void)
 	int err;
 
 	id = 0;
-	for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc")
-		if ((err = mv64x60_mpsc_device_setup(np, id++)))
-			goto error;
+	for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") {
+		err = mv64x60_mpsc_device_setup(np, id++);
+		if (err)
+			printk(KERN_ERR "Failed to initialize MV64x60 "
+					"serial device %s: error %d.\n",
+					np->full_name, err);
+	}
 
 	id = 0;
 	id2 = 0;
@@ -443,38 +445,44 @@ static int __init mv64x60_device_setup(void)
 		pdev = mv64x60_eth_register_shared_pdev(np, id++);
 		if (IS_ERR(pdev)) {
 			err = PTR_ERR(pdev);
-			goto error;
+			printk(KERN_ERR "Failed to initialize MV64x60 "
+					"network block %s: error %d.\n",
+					np->full_name, err);
+			continue;
 		}
 		for_each_child_of_node(np, np2) {
 			if (!of_device_is_compatible(np2,
 					"marvell,mv64360-eth"))
 				continue;
 			err = mv64x60_eth_device_setup(np2, id2++, pdev);
-			if (err) {
-				of_node_put(np2);
-				goto error;
-			}
+			if (err)
+				printk(KERN_ERR "Failed to initialize "
+						"MV64x60 network device %s: "
+						"error %d.\n",
+						np2->full_name, err);
 		}
 	}
 
 	id = 0;
-	for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c")
-		if ((err = mv64x60_i2c_device_setup(np, id++)))
-			goto error;
+	for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") {
+		err = mv64x60_i2c_device_setup(np, id++);
+		if (err)
+			printk(KERN_ERR "Failed to initialize MV64x60 I2C "
+					"bus %s: error %d.\n",
+					np->full_name, err);
+	}
 
 	/* support up to one watchdog timer */
 	np = of_find_compatible_node(np, NULL, "marvell,mv64360-wdt");
 	if (np) {
 		if ((err = mv64x60_wdt_device_setup(np, id)))
-			goto error;
+			printk(KERN_ERR "Failed to initialize MV64x60 "
+					"Watchdog %s: error %d.\n",
+					np->full_name, err);
 		of_node_put(np);
 	}
 
 	return 0;
-
-error:
-	of_node_put(np);
-	return err;
 }
 arch_initcall(mv64x60_device_setup);
 
diff --git a/arch/powerpc/sysdev/mv64x60_udbg.c b/arch/powerpc/sysdev/mv64x60_udbg.c
index ccdb3b0418fc..2792dc8b038c 100644
--- a/arch/powerpc/sysdev/mv64x60_udbg.c
+++ b/arch/powerpc/sysdev/mv64x60_udbg.c
@@ -94,7 +94,7 @@ static void mv64x60_udbg_init(void)
 	if (!np)
 		return;
 
-	block_index = of_get_property(np, "block-index", NULL);
+	block_index = of_get_property(np, "cell-index", NULL);
 	if (!block_index)
 		goto error;
 
diff --git a/arch/ppc/8260_io/fcc_enet.c b/arch/ppc/8260_io/fcc_enet.c
index bcc3aa9d04f3..d38b57e24cee 100644
--- a/arch/ppc/8260_io/fcc_enet.c
+++ b/arch/ppc/8260_io/fcc_enet.c
@@ -165,9 +165,6 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
 #ifdef CONFIG_SBC82xx
 #define F1_RXCLK	9
 #define F1_TXCLK	10
-#elif defined(CONFIG_ADS8272)
-#define F1_RXCLK	11
-#define F1_TXCLK	10
 #else
 #define F1_RXCLK	12
 #define F1_TXCLK	11
@@ -175,13 +172,8 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
 
 /* FCC2 Clock Source Configuration.  There are board specific.
    Can only choose from CLK13-16 */
-#ifdef CONFIG_ADS8272
-#define F2_RXCLK	15
-#define F2_TXCLK	16
-#else
 #define F2_RXCLK	13
 #define F2_TXCLK	14
-#endif
 
 /* FCC3 Clock Source Configuration.  There are board specific.
    Can only choose from CLK13-16 */
@@ -289,10 +281,7 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr);
 /* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */
 #define PC_MDIO		((uint)0x00000002)
 #define PC_MDCK		((uint)0x00000001)
-#elif defined(CONFIG_ADS8272)
-#define PC_MDIO		((uint)0x00002000)
-#define PC_MDCK		((uint)0x00001000)
-#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) || defined(CONFIG_PQ2FADS)
+#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260)
 #define PC_MDIO		((uint)0x00400000)
 #define PC_MDCK		((uint)0x00200000)
 #else
@@ -2118,11 +2107,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev)
 		printk("Can't get FCC IRQ %d\n", fip->fc_interrupt);
 
 #ifdef	PHY_INTERRUPT
-#ifdef CONFIG_ADS8272
-	if (request_irq(PHY_INTERRUPT, mii_link_interrupt, IRQF_SHARED,
-				"mii", dev) < 0)
-		printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT);
-#else
 	/* Make IRQn edge triggered.  This does not work if PHY_INTERRUPT is
 	 * on Port C.
 	 */
@@ -2132,7 +2116,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev)
 	if (request_irq(PHY_INTERRUPT, mii_link_interrupt, 0,
 							"mii", dev) < 0)
 		printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT);
-#endif
 #endif	/* PHY_INTERRUPT */
 
 	/* Set GFMR to enable Ethernet operating mode.
diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c
index c6d047ae77ac..5899aea1644b 100644
--- a/arch/ppc/8xx_io/enet.c
+++ b/arch/ppc/8xx_io/enet.c
@@ -946,29 +946,6 @@ static int __init scc_enet_init(void)
 	*((volatile uint *)BCSR1) &= ~BCSR1_ETHEN;
 #endif
 
-#ifdef CONFIG_MPC885ADS
-
-	/* Deassert PHY reset and enable the PHY.
-	 */
-	{
-		volatile uint __iomem *bcsr = ioremap(BCSR_ADDR, BCSR_SIZE);
-		uint tmp;
-
-		tmp = in_be32(bcsr + 1 /* BCSR1 */);
-		tmp |= BCSR1_ETHEN;
-		out_be32(bcsr + 1, tmp);
-		tmp = in_be32(bcsr + 4 /* BCSR4 */);
-		tmp |= BCSR4_ETH10_RST;
-		out_be32(bcsr + 4, tmp);
-		iounmap(bcsr);
-	}
-
-	/* On MPC885ADS SCC ethernet PHY defaults to the full duplex mode
-	 * upon reset. SCC is set to half duplex by default. So this
-	 * inconsistency should be better fixed by the software.
-	 */
-#endif
-
 	dev->base_addr = (unsigned long)ep;
 #if 0
 	dev->name = "CPM_ENET";
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index abc877faf123..0f1863ed9c1c 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -372,22 +372,6 @@ config MPC8XXFADS
 	bool "FADS"
 	select FADS
 
-config MPC86XADS
-	bool "MPC86XADS"
-	help
-	  MPC86x Application Development System by Freescale Semiconductor.
-	  The MPC86xADS is meant to serve as a platform for s/w and h/w
-	  development around the MPC86X processor families.
-	select FADS
-
-config MPC885ADS
-	bool "MPC885ADS"
-	help
-	  Freescale Semiconductor MPC885 Application Development System (ADS).
-	  Also known as DUET.
-	  The MPC885ADS is meant to serve as a platform for s/w and h/w
-	  development around the MPC885 processor family.
-
 config TQM823L
 	bool "TQM823L"
 	help
@@ -479,53 +463,6 @@ config WINCEPT
 
 endchoice
 
-menu "Freescale Ethernet driver platform-specific options"
-	depends on FS_ENET
-
-	config MPC8xx_SECOND_ETH
-	bool "Second Ethernet channel"
-	depends on (MPC885ADS || MPC86XADS)
-	default y
-	help
-	  This enables support for second Ethernet on MPC885ADS and MPC86xADS boards.
-	  The latter will use SCC1, for 885ADS you can select it below.
-
-	choice
-		prompt "Second Ethernet channel"
-		depends on MPC8xx_SECOND_ETH
-		default MPC8xx_SECOND_ETH_FEC2
-
-		config MPC8xx_SECOND_ETH_FEC2
-		bool "FEC2"
-		depends on MPC885ADS
-		help
-		  Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2
-		  (often 2-nd UART) will not work if this is enabled.
-
-		config MPC8xx_SECOND_ETH_SCC1
-		bool "SCC1"
-		depends on MPC86XADS
-		select MPC8xx_SCC_ENET_FIXED
-		help
-		  Enable SCC1 to serve as 2-nd Ethernet channel. Note that SMC1
-		  (often 1-nd UART) will not work if this is enabled.
-
-		config MPC8xx_SECOND_ETH_SCC3
-		bool "SCC3"
-		depends on MPC885ADS
-		help
-		  Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1
-		  (often 1-nd UART) will not work if this is enabled.
-
-	endchoice
-
-	config MPC8xx_SCC_ENET_FIXED
-	depends on MPC8xx_SECOND_ETH_SCC
-	default n
-	bool "Use fixed MII-less mode for SCC Ethernet"
-
-endmenu
-
 choice
 	prompt "Machine Type"
 	depends on 6xx
@@ -666,9 +603,6 @@ config TQM8260
 	  End of Life: not yet :-)
 	  URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf>
 
-config ADS8272
-	bool "ADS8272"
-
 config PQ2FADS
 	bool "Freescale-PQ2FADS"
 	help
@@ -698,11 +632,6 @@ config EV64360
 	  platform.
 endchoice
 
-config PQ2ADS
-	bool
-	depends on ADS8272
-	default y
-
 config TQM8xxL
 	bool
 	depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L)
@@ -725,15 +654,6 @@ config 8260
 	  this option means that you wish to build a kernel for a machine with
 	  an 8260 class CPU.
 
-config 8272
-	bool
-	depends on 6xx
-	default y if ADS8272
-	select 8260
-	help
-	  The MPC8272 CPM has a different internal dpram setup than other CPM2
-	  devices
-
 config CPM1
 	bool
 	depends on 8xx
@@ -1069,7 +989,7 @@ config PCI_8260
 
 config 8260_PCI9
 	bool "Enable workaround for MPC826x erratum PCI 9"
-	depends on PCI_8260 && !ADS8272
+	depends on PCI_8260
 	default y
 
 choice
diff --git a/arch/ppc/configs/ads8272_defconfig b/arch/ppc/configs/ads8272_defconfig
deleted file mode 100644
index 6619f9118b00..000000000000
--- a/arch/ppc/configs/ads8272_defconfig
+++ /dev/null
@@ -1,930 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.21-rc5
-# Wed Apr  4 20:55:16 2007
-#
-CONFIG_MMU=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_ARCH_HAS_ILOG2_U32=y
-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
-CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_PPC=y
-CONFIG_PPC32=y
-CONFIG_GENERIC_NVRAM=y
-CONFIG_GENERIC_FIND_NEXT_BIT=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-CONFIG_ARCH_MAY_HAVE_PC_FDC=y
-CONFIG_GENERIC_BUG=y
-CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-CONFIG_LOCALVERSION_AUTO=y
-CONFIG_SWAP=y
-CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
-CONFIG_SYSVIPC_SYSCTL=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-# CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
-# CONFIG_AUDIT is not set
-# CONFIG_IKCONFIG is not set
-CONFIG_SYSFS_DEPRECATED=y
-# CONFIG_RELAY is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SYSCTL=y
-CONFIG_EMBEDDED=y
-CONFIG_SYSCTL_SYSCALL=y
-# CONFIG_KALLSYMS is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_ELF_CORE=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-# CONFIG_EPOLL is not set
-CONFIG_SHMEM=y
-CONFIG_SLAB=y
-CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_RT_MUTEXES=y
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-# CONFIG_SLOB is not set
-
-#
-# Loadable module support
-#
-# CONFIG_MODULES is not set
-
-#
-# Block layer
-#
-CONFIG_BLOCK=y
-# CONFIG_LBD is not set
-# CONFIG_BLK_DEV_IO_TRACE is not set
-# CONFIG_LSF is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_DEFAULT_AS is not set
-# CONFIG_DEFAULT_DEADLINE is not set
-CONFIG_DEFAULT_CFQ=y
-# CONFIG_DEFAULT_NOOP is not set
-CONFIG_DEFAULT_IOSCHED="cfq"
-
-#
-# Processor
-#
-CONFIG_6xx=y
-# CONFIG_40x is not set
-# CONFIG_44x is not set
-# CONFIG_8xx is not set
-# CONFIG_E200 is not set
-# CONFIG_E500 is not set
-CONFIG_PPC_FPU=y
-# CONFIG_PPC_DCR_NATIVE is not set
-# CONFIG_KEXEC is not set
-# CONFIG_CPU_FREQ is not set
-# CONFIG_WANT_EARLY_SERIAL is not set
-CONFIG_EMBEDDEDBOOT=y
-CONFIG_PPC_STD_MMU=y
-
-#
-# Platform options
-#
-
-#
-# Freescale Ethernet driver platform-specific options
-#
-# CONFIG_PPC_PREP is not set
-# CONFIG_APUS is not set
-# CONFIG_KATANA is not set
-# CONFIG_WILLOW is not set
-# CONFIG_CPCI690 is not set
-# CONFIG_POWERPMC250 is not set
-# CONFIG_CHESTNUT is not set
-# CONFIG_SPRUCE is not set
-# CONFIG_HDPU is not set
-# CONFIG_EV64260 is not set
-# CONFIG_LOPEC is not set
-# CONFIG_MVME5100 is not set
-# CONFIG_PPLUS is not set
-# CONFIG_PRPMC750 is not set
-# CONFIG_PRPMC800 is not set
-# CONFIG_SANDPOINT is not set
-# CONFIG_RADSTONE_PPC7D is not set
-# CONFIG_PAL4 is not set
-# CONFIG_EST8260 is not set
-# CONFIG_SBC82xx is not set
-# CONFIG_SBS8260 is not set
-# CONFIG_RPX8260 is not set
-# CONFIG_TQM8260 is not set
-CONFIG_ADS8272=y
-# CONFIG_PQ2FADS is not set
-# CONFIG_LITE5200 is not set
-# CONFIG_MPC834x_SYS is not set
-# CONFIG_EV64360 is not set
-CONFIG_PQ2ADS=y
-CONFIG_8260=y
-CONFIG_8272=y
-CONFIG_CPM2=y
-# CONFIG_PC_KEYBOARD is not set
-# CONFIG_SMP is not set
-# CONFIG_HIGHMEM is not set
-CONFIG_ARCH_POPULATES_NODE_MAP=y
-# CONFIG_HZ_100 is not set
-CONFIG_HZ_250=y
-# CONFIG_HZ_300 is not set
-# CONFIG_HZ_1000 is not set
-CONFIG_HZ=250
-CONFIG_PREEMPT_NONE=y
-# CONFIG_PREEMPT_VOLUNTARY is not set
-# CONFIG_PREEMPT is not set
-CONFIG_SELECT_MEMORY_MODEL=y
-CONFIG_FLATMEM_MANUAL=y
-# CONFIG_DISCONTIGMEM_MANUAL is not set
-# CONFIG_SPARSEMEM_MANUAL is not set
-CONFIG_FLATMEM=y
-CONFIG_FLAT_NODE_MEM_MAP=y
-# CONFIG_SPARSEMEM_STATIC is not set
-CONFIG_SPLIT_PTLOCK_CPUS=4
-# CONFIG_RESOURCES_64BIT is not set
-CONFIG_ZONE_DMA_FLAG=1
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_CMDLINE_BOOL is not set
-# CONFIG_PM is not set
-CONFIG_SECCOMP=y
-CONFIG_ISA_DMA_API=y
-
-#
-# Bus options
-#
-CONFIG_ZONE_DMA=y
-# CONFIG_PPC_I8259 is not set
-CONFIG_PPC_INDIRECT_PCI=y
-CONFIG_PCI=y
-CONFIG_PCI_DOMAINS=y
-CONFIG_PCI_8260=y
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-
-#
-# Advanced setup
-#
-# CONFIG_ADVANCED_OPTIONS is not set
-
-#
-# Default settings for advanced configuration options are used
-#
-CONFIG_HIGHMEM_START=0xfe000000
-CONFIG_LOWMEM_SIZE=0x30000000
-CONFIG_KERNEL_START=0xc0000000
-CONFIG_TASK_SIZE=0x80000000
-CONFIG_BOOT_LOAD=0x00400000
-
-#
-# Networking
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-# CONFIG_NETDEBUG is not set
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-CONFIG_XFRM=y
-# CONFIG_XFRM_USER is not set
-# CONFIG_XFRM_SUB_POLICY is not set
-# CONFIG_XFRM_MIGRATE is not set
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-CONFIG_IP_MULTICAST=y
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_FIB_HASH=y
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IP_PNP_RARP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_IP_MROUTE is not set
-# CONFIG_ARPD is not set
-CONFIG_SYN_COOKIES=y
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_XFRM_TUNNEL is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_INET_XFRM_MODE_TRANSPORT=y
-CONFIG_INET_XFRM_MODE_TUNNEL=y
-CONFIG_INET_XFRM_MODE_BEET=y
-CONFIG_INET_DIAG=y
-CONFIG_INET_TCP_DIAG=y
-# CONFIG_TCP_CONG_ADVANCED is not set
-CONFIG_TCP_CONG_CUBIC=y
-CONFIG_DEFAULT_TCP_CONG="cubic"
-# CONFIG_TCP_MD5SIG is not set
-# CONFIG_IPV6 is not set
-# CONFIG_INET6_XFRM_TUNNEL is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_NETWORK_SECMARK is not set
-# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
-# CONFIG_TIPC is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-# CONFIG_IEEE80211 is not set
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
-# CONFIG_CONNECTOR is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-# CONFIG_PNPACPI is not set
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_CPQ_DA is not set
-# CONFIG_BLK_CPQ_CISS_DA is not set
-# CONFIG_BLK_DEV_DAC960 is not set
-# CONFIG_BLK_DEV_UMEM is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-CONFIG_BLK_DEV_LOOP=y
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_SX8 is not set
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_BLK_DEV_RAM_SIZE=32768
-CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
-# CONFIG_CDROM_PKTCDVD is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
-# CONFIG_SGI_IOC4 is not set
-# CONFIG_TIFM_CORE is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_RAID_ATTRS is not set
-# CONFIG_SCSI is not set
-# CONFIG_SCSI_NETLINK is not set
-
-#
-# Serial ATA (prod) and Parallel ATA (experimental) drivers
-#
-# CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-# CONFIG_FUSION is not set
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-# CONFIG_I2O is not set
-
-#
-# Macintosh device drivers
-#
-# CONFIG_MAC_EMUMOUSEBTN is not set
-# CONFIG_WINDFARM is not set
-
-#
-# Network device support
-#
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# ARCnet devices
-#
-# CONFIG_ARCNET is not set
-
-#
-# PHY device support
-#
-CONFIG_PHYLIB=y
-
-#
-# MII PHY device drivers
-#
-# CONFIG_MARVELL_PHY is not set
-CONFIG_DAVICOM_PHY=y
-# CONFIG_QSEMI_PHY is not set
-# CONFIG_LXT_PHY is not set
-# CONFIG_CICADA_PHY is not set
-# CONFIG_VITESSE_PHY is not set
-# CONFIG_SMSC_PHY is not set
-# CONFIG_BROADCOM_PHY is not set
-# CONFIG_FIXED_PHY is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_HAPPYMEAL is not set
-# CONFIG_SUNGEM is not set
-# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
-# CONFIG_NET_TULIP is not set
-# CONFIG_HP100 is not set
-# CONFIG_NET_PCI is not set
-CONFIG_FS_ENET=y
-# CONFIG_FS_ENET_HAS_SCC is not set
-CONFIG_FS_ENET_HAS_FCC=y
-
-#
-# Ethernet (1000 Mbit)
-#
-# CONFIG_ACENIC is not set
-# CONFIG_DL2K is not set
-# CONFIG_E1000 is not set
-# CONFIG_NS83820 is not set
-# CONFIG_HAMACHI is not set
-# CONFIG_YELLOWFIN is not set
-# CONFIG_R8169 is not set
-# CONFIG_SIS190 is not set
-# CONFIG_SKGE is not set
-# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
-# CONFIG_TIGON3 is not set
-# CONFIG_BNX2 is not set
-# CONFIG_QLA3XXX is not set
-# CONFIG_ATL1 is not set
-
-#
-# Ethernet (10000 Mbit)
-#
-# CONFIG_CHELSIO_T1 is not set
-# CONFIG_CHELSIO_T3 is not set
-# CONFIG_IXGB is not set
-# CONFIG_S2IO is not set
-# CONFIG_MYRI10GE is not set
-# CONFIG_NETXEN_NIC is not set
-
-#
-# Token Ring devices
-#
-# CONFIG_TR is not set
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_FDDI is not set
-# CONFIG_HIPPI is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-CONFIG_INPUT=y
-# CONFIG_INPUT_FF_MEMLESS is not set
-
-#
-# Userland interfaces
-#
-# CONFIG_INPUT_MOUSEDEV is not set
-# CONFIG_INPUT_JOYDEV is not set
-# CONFIG_INPUT_TSDEV is not set
-# CONFIG_INPUT_EVDEV is not set
-# CONFIG_INPUT_EVBUG is not set
-
-#
-# Input Device Drivers
-#
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_INPUT_JOYSTICK is not set
-# CONFIG_INPUT_TOUCHSCREEN is not set
-# CONFIG_INPUT_MISC is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-# CONFIG_SERIAL_UARTLITE is not set
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_CPM=y
-CONFIG_SERIAL_CPM_CONSOLE=y
-CONFIG_SERIAL_CPM_SCC1=y
-# CONFIG_SERIAL_CPM_SCC2 is not set
-# CONFIG_SERIAL_CPM_SCC3 is not set
-CONFIG_SERIAL_CPM_SCC4=y
-# CONFIG_SERIAL_CPM_SMC1 is not set
-# CONFIG_SERIAL_CPM_SMC2 is not set
-# CONFIG_SERIAL_JSM is not set
-CONFIG_UNIX98_PTYS=y
-CONFIG_LEGACY_PTYS=y
-CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-CONFIG_HW_RANDOM=y
-# CONFIG_NVRAM is not set
-CONFIG_GEN_RTC=y
-# CONFIG_GEN_RTC_X is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-# CONFIG_APPLICOM is not set
-# CONFIG_AGP is not set
-# CONFIG_DRM is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-# CONFIG_TCG_TPM is not set
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# SPI support
-#
-# CONFIG_SPI is not set
-# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Hardware Monitoring support
-#
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_PC87427 is not set
-# CONFIG_SENSORS_VT1211 is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
-
-#
-# Multifunction device drivers
-#
-# CONFIG_MFD_SM501 is not set
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
-# CONFIG_FB is not set
-# CONFIG_FB_IBM_GXT4500 is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# HID Devices
-#
-CONFIG_HID=y
-# CONFIG_HID_DEBUG is not set
-
-#
-# USB support
-#
-CONFIG_USB_ARCH_HAS_HCD=y
-CONFIG_USB_ARCH_HAS_OHCI=y
-CONFIG_USB_ARCH_HAS_EHCI=y
-# CONFIG_USB is not set
-
-#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
-#
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# LED devices
-#
-# CONFIG_NEW_LEDS is not set
-
-#
-# LED drivers
-#
-
-#
-# LED Triggers
-#
-
-#
-# InfiniBand support
-#
-# CONFIG_INFINIBAND is not set
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
-
-#
-# Real Time Clock
-#
-# CONFIG_RTC_CLASS is not set
-
-#
-# DMA Engine support
-#
-# CONFIG_DMA_ENGINE is not set
-
-#
-# DMA Clients
-#
-
-#
-# DMA Devices
-#
-
-#
-# Auxiliary Display support
-#
-
-#
-# Virtualization
-#
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-# CONFIG_EXT2_FS_XATTR is not set
-# CONFIG_EXT2_FS_XIP is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
-# CONFIG_EXT4DEV_FS is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-CONFIG_FS_POSIX_ACL=y
-# CONFIG_XFS_FS is not set
-# CONFIG_GFS2_FS is not set
-# CONFIG_OCFS2_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-CONFIG_INOTIFY=y
-CONFIG_INOTIFY_USER=y
-# CONFIG_QUOTA is not set
-CONFIG_DNOTIFY=y
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-# CONFIG_FUSE_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_SYSFS=y
-CONFIG_TMPFS=y
-# CONFIG_TMPFS_POSIX_ACL is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-# CONFIG_CONFIGFS_FS is not set
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V3_ACL=y
-CONFIG_NFS_V4=y
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_NFS_ACL_SUPPORT=y
-CONFIG_NFS_COMMON=y
-CONFIG_SUNRPC=y
-CONFIG_SUNRPC_GSS=y
-CONFIG_RPCSEC_GSS_KRB5=y
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-# CONFIG_MSDOS_PARTITION is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_KARMA_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# Distributed Lock Manager
-#
-# CONFIG_DLM is not set
-# CONFIG_SCC_ENET is not set
-# CONFIG_FEC_ENET is not set
-
-#
-# CPM2 Options
-#
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC16 is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
-CONFIG_PLIST=y
-CONFIG_HAS_IOMEM=y
-CONFIG_HAS_IOPORT=y
-# CONFIG_PROFILING is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-CONFIG_ENABLE_MUST_CHECK=y
-# CONFIG_MAGIC_SYSRQ is not set
-# CONFIG_UNUSED_SYMBOLS is not set
-# CONFIG_DEBUG_FS is not set
-# CONFIG_HEADERS_CHECK is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
-# CONFIG_DEBUG_BUGVERBOSE is not set
-# CONFIG_KGDB_CONSOLE is not set
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-CONFIG_CRYPTO_ALGAPI=y
-CONFIG_CRYPTO_BLKCIPHER=y
-CONFIG_CRYPTO_MANAGER=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_XCBC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-# CONFIG_CRYPTO_GF128MUL is not set
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_CBC=y
-CONFIG_CRYPTO_PCBC=y
-# CONFIG_CRYPTO_LRW is not set
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_FCRYPT is not set
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_CAMELLIA is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ppc/configs/mpc86x_ads_defconfig b/arch/ppc/configs/mpc86x_ads_defconfig
deleted file mode 100644
index f63c6f59d68a..000000000000
--- a/arch/ppc/configs/mpc86x_ads_defconfig
+++ /dev/null
@@ -1,633 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc4
-# Tue Jun 14 13:36:35 2005
-#
-CONFIG_MMU=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_HAVE_DEC_LOCK=y
-CONFIG_PPC=y
-CONFIG_PPC32=y
-CONFIG_GENERIC_NVRAM=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-# CONFIG_CLEAN_COMPILE is not set
-CONFIG_BROKEN=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-# CONFIG_HOTPLUG is not set
-CONFIG_KOBJECT_UEVENT=y
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-# CONFIG_BASE_FULL is not set
-CONFIG_FUTEX=y
-# CONFIG_EPOLL is not set
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-# CONFIG_SHMEM is not set
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-CONFIG_TINY_SHMEM=y
-CONFIG_BASE_SMALL=1
-
-#
-# Loadable module support
-#
-CONFIG_MODULES=y
-# CONFIG_MODULE_UNLOAD is not set
-CONFIG_OBSOLETE_MODPARM=y
-# CONFIG_MODVERSIONS is not set
-# CONFIG_MODULE_SRCVERSION_ALL is not set
-# CONFIG_KMOD is not set
-
-#
-# Processor
-#
-# CONFIG_6xx is not set
-# CONFIG_40x is not set
-# CONFIG_44x is not set
-# CONFIG_POWER3 is not set
-# CONFIG_POWER4 is not set
-CONFIG_8xx=y
-# CONFIG_E500 is not set
-# CONFIG_MATH_EMULATION is not set
-# CONFIG_CPU_FREQ is not set
-CONFIG_EMBEDDEDBOOT=y
-# CONFIG_PM is not set
-CONFIG_NOT_COHERENT_CACHE=y
-
-#
-# Platform options
-#
-CONFIG_FADS=y
-# CONFIG_RPXLITE is not set
-# CONFIG_RPXCLASSIC is not set
-# CONFIG_BSEIP is not set
-# CONFIG_MPC8XXFADS is not set
-CONFIG_MPC86XADS=y
-# CONFIG_TQM823L is not set
-# CONFIG_TQM850L is not set
-# CONFIG_TQM855L is not set
-# CONFIG_TQM860L is not set
-# CONFIG_FPS850L is not set
-# CONFIG_SPD823TS is not set
-# CONFIG_IVMS8 is not set
-# CONFIG_IVML24 is not set
-# CONFIG_SM850 is not set
-# CONFIG_HERMES_PRO is not set
-# CONFIG_IP860 is not set
-# CONFIG_LWMON is not set
-# CONFIG_PCU_E is not set
-# CONFIG_CCM is not set
-# CONFIG_LANTEC is not set
-# CONFIG_MBX is not set
-# CONFIG_WINCEPT is not set
-# CONFIG_SMP is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_HIGHMEM is not set
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_ISA_DMA_API=y
-
-#
-# Bus options
-#
-# CONFIG_PCI is not set
-# CONFIG_PCI_DOMAINS is not set
-# CONFIG_PCI_QSPAN is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Advanced setup
-#
-# CONFIG_ADVANCED_OPTIONS is not set
-
-#
-# Default settings for advanced configuration options are used
-#
-CONFIG_HIGHMEM_START=0xfe000000
-CONFIG_LOWMEM_SIZE=0x30000000
-CONFIG_KERNEL_START=0xc0000000
-CONFIG_TASK_SIZE=0x80000000
-CONFIG_CONSISTENT_START=0xff100000
-CONFIG_CONSISTENT_SIZE=0x00200000
-CONFIG_BOOT_LOAD=0x00400000
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-# CONFIG_STANDALONE is not set
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-CONFIG_BLK_DEV_LOOP=y
-# CONFIG_BLK_DEV_CRYPTOLOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_LBD is not set
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-
-#
-# Macintosh device drivers
-#
-
-#
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-# CONFIG_IP_PNP_BOOTP is not set
-# CONFIG_IP_PNP_RARP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-CONFIG_IPV6=m
-# CONFIG_IPV6_PRIVACY is not set
-# CONFIG_INET6_AH is not set
-# CONFIG_INET6_ESP is not set
-# CONFIG_INET6_IPCOMP is not set
-# CONFIG_INET6_TUNNEL is not set
-# CONFIG_IPV6_TUNNEL is not set
-# CONFIG_NETFILTER is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-# CONFIG_MII is not set
-# CONFIG_OAKNET is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-
-#
-# Token Ring devices
-#
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-# CONFIG_PPP is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-# CONFIG_INPUT is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-CONFIG_SOUND_GAMEPORT=y
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_CPM=y
-CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_SERIAL_CPM_SCC1 is not set
-# CONFIG_SERIAL_CPM_SCC2 is not set
-# CONFIG_SERIAL_CPM_SCC3 is not set
-# CONFIG_SERIAL_CPM_SCC4 is not set
-CONFIG_SERIAL_CPM_SMC1=y
-# CONFIG_SERIAL_CPM_SMC2 is not set
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_NVRAM is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_AGP is not set
-# CONFIG_DRM is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-# CONFIG_INFINIBAND is not set
-
-#
-# File systems
-#
-# CONFIG_EXT2_FS is not set
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_FS_XATTR is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_QUOTA is not set
-# CONFIG_DNOTIFY is not set
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-CONFIG_PROC_KCORE=y
-CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
-# CONFIG_DEVPTS_FS_XATTR is not set
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLBFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
-CONFIG_NFS_V4=y
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_LOCKD_V4=y
-CONFIG_SUNRPC=y
-CONFIG_SUNRPC_GSS=y
-CONFIG_RPCSEC_GSS_KRB5=y
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-# CONFIG_PARTITION_ADVANCED is not set
-CONFIG_MSDOS_PARTITION=y
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# MPC8xx CPM Options
-#
-CONFIG_SCC_ENET=y
-CONFIG_SCC1_ENET=y
-# CONFIG_SCC2_ENET is not set
-# CONFIG_SCC3_ENET is not set
-# CONFIG_FEC_ENET is not set
-# CONFIG_ENET_BIG_BUFFERS is not set
-
-#
-# Generic MPC8xx Options
-#
-# CONFIG_8xx_COPYBACK is not set
-# CONFIG_8xx_CPU6 is not set
-CONFIG_NO_UCODE_PATCH=y
-# CONFIG_USB_SOF_UCODE_PATCH is not set
-# CONFIG_I2C_SPI_UCODE_PATCH is not set
-# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set
-
-#
-# Library routines
-#
-# CONFIG_CRC_CCITT is not set
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
-
-#
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-CONFIG_CRYPTO=y
-# CONFIG_CRYPTO_HMAC is not set
-# CONFIG_CRYPTO_NULL is not set
-# CONFIG_CRYPTO_MD4 is not set
-CONFIG_CRYPTO_MD5=y
-# CONFIG_CRYPTO_SHA1 is not set
-# CONFIG_CRYPTO_SHA256 is not set
-# CONFIG_CRYPTO_SHA512 is not set
-# CONFIG_CRYPTO_WP512 is not set
-# CONFIG_CRYPTO_TGR192 is not set
-CONFIG_CRYPTO_DES=y
-# CONFIG_CRYPTO_BLOWFISH is not set
-# CONFIG_CRYPTO_TWOFISH is not set
-# CONFIG_CRYPTO_SERPENT is not set
-# CONFIG_CRYPTO_AES is not set
-# CONFIG_CRYPTO_CAST5 is not set
-# CONFIG_CRYPTO_CAST6 is not set
-# CONFIG_CRYPTO_TEA is not set
-# CONFIG_CRYPTO_ARC4 is not set
-# CONFIG_CRYPTO_KHAZAD is not set
-# CONFIG_CRYPTO_ANUBIS is not set
-# CONFIG_CRYPTO_DEFLATE is not set
-# CONFIG_CRYPTO_MICHAEL_MIC is not set
-# CONFIG_CRYPTO_CRC32C is not set
-# CONFIG_CRYPTO_TEST is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ppc/configs/mpc885ads_defconfig b/arch/ppc/configs/mpc885ads_defconfig
deleted file mode 100644
index 016f94d9325f..000000000000
--- a/arch/ppc/configs/mpc885ads_defconfig
+++ /dev/null
@@ -1,622 +0,0 @@
-#
-# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.12-rc6
-# Thu Jun  9 21:17:29 2005
-#
-CONFIG_MMU=y
-CONFIG_GENERIC_HARDIRQS=y
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
-CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_HAVE_DEC_LOCK=y
-CONFIG_PPC=y
-CONFIG_PPC32=y
-CONFIG_GENERIC_NVRAM=y
-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
-
-#
-# Code maturity level options
-#
-CONFIG_EXPERIMENTAL=y
-# CONFIG_CLEAN_COMPILE is not set
-CONFIG_BROKEN=y
-CONFIG_BROKEN_ON_SMP=y
-CONFIG_INIT_ENV_ARG_LIMIT=32
-
-#
-# General setup
-#
-CONFIG_LOCALVERSION=""
-# CONFIG_SWAP is not set
-CONFIG_SYSVIPC=y
-# CONFIG_POSIX_MQUEUE is not set
-# CONFIG_BSD_PROCESS_ACCT is not set
-CONFIG_SYSCTL=y
-# CONFIG_AUDIT is not set
-CONFIG_HOTPLUG=y
-CONFIG_KOBJECT_UEVENT=y
-# CONFIG_IKCONFIG is not set
-CONFIG_EMBEDDED=y
-# CONFIG_KALLSYMS is not set
-CONFIG_PRINTK=y
-CONFIG_BUG=y
-CONFIG_BASE_FULL=y
-CONFIG_FUTEX=y
-# CONFIG_EPOLL is not set
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
-CONFIG_SHMEM=y
-CONFIG_CC_ALIGN_FUNCTIONS=0
-CONFIG_CC_ALIGN_LABELS=0
-CONFIG_CC_ALIGN_LOOPS=0
-CONFIG_CC_ALIGN_JUMPS=0
-# CONFIG_TINY_SHMEM is not set
-CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
-# CONFIG_MODULES is not set
-
-#
-# Processor
-#
-# CONFIG_6xx is not set
-# CONFIG_40x is not set
-# CONFIG_44x is not set
-# CONFIG_POWER3 is not set
-# CONFIG_POWER4 is not set
-CONFIG_8xx=y
-# CONFIG_E500 is not set
-# CONFIG_MATH_EMULATION is not set
-# CONFIG_CPU_FREQ is not set
-CONFIG_EMBEDDEDBOOT=y
-# CONFIG_PM is not set
-CONFIG_NOT_COHERENT_CACHE=y
-
-#
-# Platform options
-#
-# CONFIG_RPXLITE is not set
-# CONFIG_RPXCLASSIC is not set
-# CONFIG_BSEIP is not set
-# CONFIG_FADS is not set
-CONFIG_MPC885ADS=y
-# CONFIG_TQM823L is not set
-# CONFIG_TQM850L is not set
-# CONFIG_TQM855L is not set
-# CONFIG_TQM860L is not set
-# CONFIG_FPS850L is not set
-# CONFIG_SPD823TS is not set
-# CONFIG_IVMS8 is not set
-# CONFIG_IVML24 is not set
-# CONFIG_SM850 is not set
-# CONFIG_HERMES_PRO is not set
-# CONFIG_IP860 is not set
-# CONFIG_LWMON is not set
-# CONFIG_PCU_E is not set
-# CONFIG_CCM is not set
-# CONFIG_LANTEC is not set
-# CONFIG_MBX is not set
-# CONFIG_WINCEPT is not set
-# CONFIG_SMP is not set
-# CONFIG_PREEMPT is not set
-# CONFIG_HIGHMEM is not set
-CONFIG_BINFMT_ELF=y
-# CONFIG_BINFMT_MISC is not set
-# CONFIG_CMDLINE_BOOL is not set
-CONFIG_ISA_DMA_API=y
-
-#
-# Bus options
-#
-# CONFIG_PCI is not set
-# CONFIG_PCI_DOMAINS is not set
-# CONFIG_PCI_QSPAN is not set
-
-#
-# PCCARD (PCMCIA/CardBus) support
-#
-# CONFIG_PCCARD is not set
-
-#
-# Advanced setup
-#
-# CONFIG_ADVANCED_OPTIONS is not set
-
-#
-# Default settings for advanced configuration options are used
-#
-CONFIG_HIGHMEM_START=0xfe000000
-CONFIG_LOWMEM_SIZE=0x30000000
-CONFIG_KERNEL_START=0xc0000000
-CONFIG_TASK_SIZE=0x80000000
-CONFIG_CONSISTENT_START=0xff100000
-CONFIG_CONSISTENT_SIZE=0x00200000
-CONFIG_BOOT_LOAD=0x00400000
-
-#
-# Device Drivers
-#
-
-#
-# Generic Driver Options
-#
-CONFIG_STANDALONE=y
-CONFIG_PREVENT_FIRMWARE_BUILD=y
-# CONFIG_FW_LOADER is not set
-
-#
-# Memory Technology Devices (MTD)
-#
-# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
-# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
-
-#
-# Block devices
-#
-# CONFIG_BLK_DEV_FD is not set
-# CONFIG_BLK_DEV_COW_COMMON is not set
-# CONFIG_BLK_DEV_LOOP is not set
-# CONFIG_BLK_DEV_NBD is not set
-# CONFIG_BLK_DEV_RAM is not set
-CONFIG_BLK_DEV_RAM_COUNT=16
-CONFIG_INITRAMFS_SOURCE=""
-# CONFIG_LBD is not set
-# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-# CONFIG_IOSCHED_AS is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-# CONFIG_ATA_OVER_ETH is not set
-
-#
-# ATA/ATAPI/MFM/RLL support
-#
-# CONFIG_IDE is not set
-
-#
-# SCSI device support
-#
-# CONFIG_SCSI is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
-
-#
-# Fusion MPT device support
-#
-
-#
-# IEEE 1394 (FireWire) support
-#
-# CONFIG_IEEE1394 is not set
-
-#
-# I2O device support
-#
-
-#
-# Macintosh device drivers
-#
-
-#
-# Networking support
-#
-CONFIG_NET=y
-
-#
-# Networking options
-#
-CONFIG_PACKET=y
-# CONFIG_PACKET_MMAP is not set
-CONFIG_UNIX=y
-# CONFIG_NET_KEY is not set
-CONFIG_INET=y
-# CONFIG_IP_MULTICAST is not set
-# CONFIG_IP_ADVANCED_ROUTER is not set
-CONFIG_IP_PNP=y
-CONFIG_IP_PNP_DHCP=y
-CONFIG_IP_PNP_BOOTP=y
-# CONFIG_IP_PNP_RARP is not set
-# CONFIG_NET_IPIP is not set
-# CONFIG_NET_IPGRE is not set
-# CONFIG_ARPD is not set
-# CONFIG_SYN_COOKIES is not set
-# CONFIG_INET_AH is not set
-# CONFIG_INET_ESP is not set
-# CONFIG_INET_IPCOMP is not set
-# CONFIG_INET_TUNNEL is not set
-CONFIG_IP_TCPDIAG=y
-# CONFIG_IP_TCPDIAG_IPV6 is not set
-# CONFIG_IPV6 is not set
-# CONFIG_NETFILTER is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
-# CONFIG_IP_SCTP is not set
-# CONFIG_ATM is not set
-# CONFIG_BRIDGE is not set
-# CONFIG_VLAN_8021Q is not set
-# CONFIG_DECNET is not set
-# CONFIG_LLC2 is not set
-# CONFIG_IPX is not set
-# CONFIG_ATALK is not set
-# CONFIG_X25 is not set
-# CONFIG_LAPB is not set
-# CONFIG_NET_DIVERT is not set
-# CONFIG_ECONET is not set
-# CONFIG_WAN_ROUTER is not set
-
-#
-# QoS and/or fair queueing
-#
-# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
-
-#
-# Network testing
-#
-# CONFIG_NET_PKTGEN is not set
-# CONFIG_NETPOLL is not set
-# CONFIG_NET_POLL_CONTROLLER is not set
-# CONFIG_HAMRADIO is not set
-# CONFIG_IRDA is not set
-# CONFIG_BT is not set
-CONFIG_NETDEVICES=y
-# CONFIG_DUMMY is not set
-# CONFIG_BONDING is not set
-# CONFIG_EQUALIZER is not set
-# CONFIG_TUN is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
-CONFIG_NET_ETHERNET=y
-CONFIG_MII=y
-# CONFIG_OAKNET is not set
-
-#
-# Ethernet (1000 Mbit)
-#
-
-#
-# Ethernet (10000 Mbit)
-#
-
-#
-# Token Ring devices
-#
-
-#
-# Wireless LAN (non-hamradio)
-#
-# CONFIG_NET_RADIO is not set
-
-#
-# Wan interfaces
-#
-# CONFIG_WAN is not set
-CONFIG_PPP=y
-# CONFIG_PPP_MULTILINK is not set
-# CONFIG_PPP_FILTER is not set
-CONFIG_PPP_ASYNC=y
-CONFIG_PPP_SYNC_TTY=y
-CONFIG_PPP_DEFLATE=y
-# CONFIG_PPP_BSDCOMP is not set
-# CONFIG_PPPOE is not set
-# CONFIG_SLIP is not set
-# CONFIG_SHAPER is not set
-# CONFIG_NETCONSOLE is not set
-
-#
-# ISDN subsystem
-#
-# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
-# CONFIG_PHONE is not set
-
-#
-# Input device support
-#
-# CONFIG_INPUT is not set
-
-#
-# Hardware I/O ports
-#
-# CONFIG_SERIO is not set
-# CONFIG_GAMEPORT is not set
-
-#
-# Character devices
-#
-# CONFIG_VT is not set
-# CONFIG_SERIAL_NONSTANDARD is not set
-
-#
-# Serial drivers
-#
-# CONFIG_SERIAL_8250 is not set
-
-#
-# Non-8250 serial port support
-#
-CONFIG_SERIAL_CORE=y
-CONFIG_SERIAL_CORE_CONSOLE=y
-CONFIG_SERIAL_CPM=y
-CONFIG_SERIAL_CPM_CONSOLE=y
-# CONFIG_SERIAL_CPM_SCC1 is not set
-# CONFIG_SERIAL_CPM_SCC2 is not set
-# CONFIG_SERIAL_CPM_SCC3 is not set
-# CONFIG_SERIAL_CPM_SCC4 is not set
-CONFIG_SERIAL_CPM_SMC1=y
-CONFIG_SERIAL_CPM_SMC2=y
-CONFIG_UNIX98_PTYS=y
-# CONFIG_LEGACY_PTYS is not set
-
-#
-# IPMI
-#
-# CONFIG_IPMI_HANDLER is not set
-
-#
-# Watchdog Cards
-#
-# CONFIG_WATCHDOG is not set
-# CONFIG_NVRAM is not set
-# CONFIG_GEN_RTC is not set
-# CONFIG_DTLK is not set
-# CONFIG_R3964 is not set
-
-#
-# Ftape, the floppy tape device driver
-#
-# CONFIG_AGP is not set
-# CONFIG_DRM is not set
-# CONFIG_RAW_DRIVER is not set
-
-#
-# TPM devices
-#
-
-#
-# I2C support
-#
-# CONFIG_I2C is not set
-
-#
-# Dallas's 1-wire bus
-#
-# CONFIG_W1 is not set
-
-#
-# Misc devices
-#
-
-#
-# Multimedia devices
-#
-# CONFIG_VIDEO_DEV is not set
-
-#
-# Digital Video Broadcasting Devices
-#
-# CONFIG_DVB is not set
-
-#
-# Graphics support
-#
-# CONFIG_FB is not set
-
-#
-# Sound
-#
-# CONFIG_SOUND is not set
-
-#
-# USB support
-#
-# CONFIG_USB_ARCH_HAS_HCD is not set
-# CONFIG_USB_ARCH_HAS_OHCI is not set
-
-#
-# USB Gadget Support
-#
-# CONFIG_USB_GADGET is not set
-
-#
-# MMC/SD Card support
-#
-# CONFIG_MMC is not set
-
-#
-# InfiniBand support
-#
-# CONFIG_INFINIBAND is not set
-
-#
-# File systems
-#
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-# CONFIG_EXT2_FS_POSIX_ACL is not set
-# CONFIG_EXT2_FS_SECURITY is not set
-CONFIG_EXT3_FS=y
-CONFIG_EXT3_FS_XATTR=y
-# CONFIG_EXT3_FS_POSIX_ACL is not set
-# CONFIG_EXT3_FS_SECURITY is not set
-CONFIG_JBD=y
-# CONFIG_JBD_DEBUG is not set
-CONFIG_FS_MBCACHE=y
-# CONFIG_REISERFS_FS is not set
-# CONFIG_JFS_FS is not set
-
-#
-# XFS support
-#
-# CONFIG_XFS_FS is not set
-# CONFIG_MINIX_FS is not set
-# CONFIG_ROMFS_FS is not set
-# CONFIG_QUOTA is not set
-# CONFIG_DNOTIFY is not set
-# CONFIG_AUTOFS_FS is not set
-# CONFIG_AUTOFS4_FS is not set
-
-#
-# CD-ROM/DVD Filesystems
-#
-# CONFIG_ISO9660_FS is not set
-# CONFIG_UDF_FS is not set
-
-#
-# DOS/FAT/NT Filesystems
-#
-# CONFIG_MSDOS_FS is not set
-# CONFIG_VFAT_FS is not set
-# CONFIG_NTFS_FS is not set
-
-#
-# Pseudo filesystems
-#
-CONFIG_PROC_FS=y
-# CONFIG_PROC_KCORE is not set
-CONFIG_SYSFS=y
-# CONFIG_DEVFS_FS is not set
-# CONFIG_DEVPTS_FS_XATTR is not set
-# CONFIG_TMPFS is not set
-# CONFIG_HUGETLBFS is not set
-# CONFIG_HUGETLB_PAGE is not set
-CONFIG_RAMFS=y
-
-#
-# Miscellaneous filesystems
-#
-# CONFIG_ADFS_FS is not set
-# CONFIG_AFFS_FS is not set
-# CONFIG_HFS_FS is not set
-# CONFIG_HFSPLUS_FS is not set
-# CONFIG_BEFS_FS is not set
-# CONFIG_BFS_FS is not set
-# CONFIG_EFS_FS is not set
-# CONFIG_CRAMFS is not set
-# CONFIG_VXFS_FS is not set
-# CONFIG_HPFS_FS is not set
-# CONFIG_QNX4FS_FS is not set
-# CONFIG_SYSV_FS is not set
-# CONFIG_UFS_FS is not set
-
-#
-# Network File Systems
-#
-CONFIG_NFS_FS=y
-# CONFIG_NFS_V3 is not set
-# CONFIG_NFS_V4 is not set
-# CONFIG_NFS_DIRECTIO is not set
-# CONFIG_NFSD is not set
-CONFIG_ROOT_NFS=y
-CONFIG_LOCKD=y
-CONFIG_SUNRPC=y
-# CONFIG_RPCSEC_GSS_KRB5 is not set
-# CONFIG_RPCSEC_GSS_SPKM3 is not set
-# CONFIG_SMB_FS is not set
-# CONFIG_CIFS is not set
-# CONFIG_NCP_FS is not set
-# CONFIG_CODA_FS is not set
-# CONFIG_AFS_FS is not set
-
-#
-# Partition Types
-#
-CONFIG_PARTITION_ADVANCED=y
-# CONFIG_ACORN_PARTITION is not set
-# CONFIG_OSF_PARTITION is not set
-# CONFIG_AMIGA_PARTITION is not set
-# CONFIG_ATARI_PARTITION is not set
-# CONFIG_MAC_PARTITION is not set
-CONFIG_MSDOS_PARTITION=y
-# CONFIG_BSD_DISKLABEL is not set
-# CONFIG_MINIX_SUBPARTITION is not set
-# CONFIG_SOLARIS_X86_PARTITION is not set
-# CONFIG_UNIXWARE_DISKLABEL is not set
-# CONFIG_LDM_PARTITION is not set
-# CONFIG_SGI_PARTITION is not set
-# CONFIG_ULTRIX_PARTITION is not set
-# CONFIG_SUN_PARTITION is not set
-# CONFIG_EFI_PARTITION is not set
-
-#
-# Native Language Support
-#
-# CONFIG_NLS is not set
-
-#
-# MPC8xx CPM Options
-#
-CONFIG_SCC_ENET=y
-# CONFIG_SCC1_ENET is not set
-# CONFIG_SCC2_ENET is not set
-CONFIG_SCC3_ENET=y
-# CONFIG_FEC_ENET is not set
-# CONFIG_ENET_BIG_BUFFERS is not set
-
-#
-# Generic MPC8xx Options
-#
-CONFIG_8xx_COPYBACK=y
-CONFIG_8xx_CPU6=y
-CONFIG_NO_UCODE_PATCH=y
-# CONFIG_USB_SOF_UCODE_PATCH is not set
-# CONFIG_I2C_SPI_UCODE_PATCH is not set
-# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set
-
-#
-# Library routines
-#
-CONFIG_CRC_CCITT=y
-# CONFIG_CRC32 is not set
-# CONFIG_LIBCRC32C is not set
-CONFIG_ZLIB_INFLATE=y
-CONFIG_ZLIB_DEFLATE=y
-
-#
-# Profiling support
-#
-# CONFIG_PROFILING is not set
-
-#
-# Kernel hacking
-#
-# CONFIG_PRINTK_TIME is not set
-# CONFIG_DEBUG_KERNEL is not set
-CONFIG_LOG_BUF_SHIFT=14
-
-#
-# Security options
-#
-# CONFIG_KEYS is not set
-# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
-# CONFIG_CRYPTO is not set
-
-#
-# Hardware crypto devices
-#
diff --git a/arch/ppc/kernel/ppc_ksyms.c b/arch/ppc/kernel/ppc_ksyms.c
index 2ba659f401be..d9036ef0b658 100644
--- a/arch/ppc/kernel/ppc_ksyms.c
+++ b/arch/ppc/kernel/ppc_ksyms.c
@@ -88,6 +88,7 @@ EXPORT_SYMBOL(strncpy);
 EXPORT_SYMBOL(strcat);
 EXPORT_SYMBOL(strlen);
 EXPORT_SYMBOL(strcmp);
+EXPORT_SYMBOL(strncmp);
 
 EXPORT_SYMBOL(csum_partial);
 EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S
index 84ed33ab4c2d..927253bfc826 100644
--- a/arch/ppc/lib/string.S
+++ b/arch/ppc/lib/string.S
@@ -121,6 +121,20 @@ _GLOBAL(strcmp)
 	beq	1b
 	blr
 
+_GLOBAL(strncmp)
+	PPC_LCMPI r5,0
+	beqlr
+	mtctr	r5
+	addi	r5,r3,-1
+	addi	r4,r4,-1
+1:	lbzu	r3,1(r5)
+	cmpwi	1,r3,0
+	lbzu	r0,1(r4)
+	subf.	r3,r0,r3
+	beqlr	1
+	bdnzt	eq,1b
+	blr
+
 _GLOBAL(strlen)
 	addi	r4,r3,-1
 1:	lbzu	r0,1(r4)
diff --git a/arch/ppc/platforms/Makefile b/arch/ppc/platforms/Makefile
index 40f53fbe6d35..6260231987cb 100644
--- a/arch/ppc/platforms/Makefile
+++ b/arch/ppc/platforms/Makefile
@@ -4,7 +4,6 @@
 
 obj-$(CONFIG_PPC_PREP)		+= prep_pci.o prep_setup.o
 obj-$(CONFIG_PREP_RESIDUAL)	+= residual.o
-obj-$(CONFIG_PQ2ADS)		+= pq2ads.o
 obj-$(CONFIG_TQM8260)		+= tqm8260_setup.o
 obj-$(CONFIG_CPCI690)		+= cpci690.o
 obj-$(CONFIG_EV64260)		+= ev64260.o
@@ -24,6 +23,3 @@ obj-$(CONFIG_SBC82xx)		+= sbc82xx.o
 obj-$(CONFIG_SPRUCE)		+= spruce.o
 obj-$(CONFIG_LITE5200)		+= lite5200.o
 obj-$(CONFIG_EV64360)		+= ev64360.o
-obj-$(CONFIG_MPC86XADS)		+= mpc866ads_setup.o
-obj-$(CONFIG_MPC885ADS)		+= mpc885ads_setup.o
-obj-$(CONFIG_ADS8272)		+= mpc8272ads_setup.o
diff --git a/arch/ppc/platforms/fads.h b/arch/ppc/platforms/fads.h
index 2f9f0f60e3f7..5219366667b3 100644
--- a/arch/ppc/platforms/fads.h
+++ b/arch/ppc/platforms/fads.h
@@ -22,29 +22,6 @@
 
 #include <asm/ppcboot.h>
 
-#if defined(CONFIG_MPC86XADS)
-
-#define BOARD_CHIP_NAME "MPC86X"
-
-/* U-Boot maps BCSR to 0xff080000 */
-#define BCSR_ADDR		((uint)0xff080000)
-
-/* MPC86XADS has one more CPLD and an additional BCSR.
- */
-#define CFG_PHYDEV_ADDR		((uint)0xff0a0000)
-#define BCSR5			((uint)(CFG_PHYDEV_ADDR + 0x300))
-
-#define BCSR5_T1_RST		0x10
-#define BCSR5_ATM155_RST	0x08
-#define BCSR5_ATM25_RST		0x04
-#define BCSR5_MII1_EN		0x02
-#define BCSR5_MII1_RST		0x01
-
-/* There is no PHY link change interrupt */
-#define PHY_INTERRUPT	(-1)
-
-#else /* FADS */
-
 /* Memory map is configured by the PROM startup.
  * I tried to follow the FADS manual, although the startup PROM
  * dictates this and we simply have to move some of the physical
@@ -55,8 +32,6 @@
 /* PHY link change interrupt */
 #define PHY_INTERRUPT	SIU_IRQ2
 
-#endif /* CONFIG_MPC86XADS */
-
 #define BCSR_SIZE		((uint)(64 * 1024))
 #define BCSR0			((uint)(BCSR_ADDR + 0x00))
 #define BCSR1			((uint)(BCSR_ADDR + 0x04))
diff --git a/arch/ppc/platforms/mpc8272ads_setup.c b/arch/ppc/platforms/mpc8272ads_setup.c
deleted file mode 100644
index 47f4b38edb5f..000000000000
--- a/arch/ppc/platforms/mpc8272ads_setup.c
+++ /dev/null
@@ -1,367 +0,0 @@
-/*
- * arch/ppc/platforms/mpc8272ads_setup.c
- *
- * MPC82xx Board-specific PlatformDevice descriptions
- *
- * 2005 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/ioport.h>
-#include <linux/fs_enet_pd.h>
-#include <linux/platform_device.h>
-#include <linux/phy.h>
-
-#include <asm/io.h>
-#include <asm/mpc8260.h>
-#include <asm/cpm2.h>
-#include <asm/immap_cpm2.h>
-#include <asm/irq.h>
-#include <asm/ppc_sys.h>
-#include <asm/ppcboot.h>
-#include <linux/fs_uart_pd.h>
-
-#include "pq2ads_pd.h"
-
-static void init_fcc1_ioports(struct fs_platform_info*);
-static void init_fcc2_ioports(struct fs_platform_info*);
-static void init_scc1_uart_ioports(struct fs_uart_platform_info*);
-static void init_scc4_uart_ioports(struct fs_uart_platform_info*);
-
-static struct fs_uart_platform_info mpc8272_uart_pdata[] = {
-	[fsid_scc1_uart] = {
-		.init_ioports 	= init_scc1_uart_ioports,
-		.fs_no		= fsid_scc1_uart,
-		.brg		= 1,
-		.tx_num_fifo	= 4,
-		.tx_buf_size	= 32,
-		.rx_num_fifo	= 4,
-		.rx_buf_size	= 32,
-	},
-	[fsid_scc4_uart] = {
-		.init_ioports 	= init_scc4_uart_ioports,
-		.fs_no		= fsid_scc4_uart,
-		.brg		= 4,
-		.tx_num_fifo	= 4,
-		.tx_buf_size	= 32,
-		.rx_num_fifo	= 4,
-		.rx_buf_size	= 32,
-	},
-};
-
-static struct fs_mii_bb_platform_info m82xx_mii_bb_pdata = {
-	.mdio_dat.bit	= 18,
-	.mdio_dir.bit	= 18,
-	.mdc_dat.bit	= 19,
-	.delay		= 1,
-};
-
-static struct fs_platform_info mpc82xx_enet_pdata[] = {
-	[fsid_fcc1] = {
-		.fs_no		= fsid_fcc1,
-		.cp_page	= CPM_CR_FCC1_PAGE,
-		.cp_block 	= CPM_CR_FCC1_SBLOCK,
-
-		.clk_trx 	= (PC_F1RXCLK | PC_F1TXCLK),
-		.clk_route	= CMX1_CLK_ROUTE,
-		.clk_mask	= CMX1_CLK_MASK,
-		.init_ioports 	= init_fcc1_ioports,
-
-		.mem_offset	= FCC1_MEM_OFFSET,
-
-		.rx_ring	= 32,
-		.tx_ring	= 32,
-		.rx_copybreak	= 240,
-		.use_napi	= 0,
-		.napi_weight	= 17,
-		.bus_id		= "0:00",
-	},
-	[fsid_fcc2] = {
-		.fs_no		= fsid_fcc2,
-		.cp_page	= CPM_CR_FCC2_PAGE,
-		.cp_block 	= CPM_CR_FCC2_SBLOCK,
-		.clk_trx 	= (PC_F2RXCLK | PC_F2TXCLK),
-		.clk_route	= CMX2_CLK_ROUTE,
-		.clk_mask	= CMX2_CLK_MASK,
-		.init_ioports	= init_fcc2_ioports,
-
-		.mem_offset	= FCC2_MEM_OFFSET,
-
-		.rx_ring	= 32,
-		.tx_ring	= 32,
-		.rx_copybreak	= 240,
-		.use_napi	= 0,
-		.napi_weight	= 17,
-		.bus_id		= "0:03",
-	},
-};
-
-static void init_fcc1_ioports(struct fs_platform_info* pdata)
-{
-	struct io_port *io;
-	u32 tempval;
-	cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
-	u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32));
-
-	io = &immap->im_ioport;
-
-	/* Enable the PHY */
-	clrbits32(bcsr, BCSR1_FETHIEN);
-	setbits32(bcsr, BCSR1_FETH_RST);
-
-	/* FCC1 pins are on port A/C. */
-	/* Configure port A and C pins for FCC1 Ethernet. */
-
-	tempval = in_be32(&io->iop_pdira);
-	tempval &= ~PA1_DIRA0;
-	tempval |= PA1_DIRA1;
-	out_be32(&io->iop_pdira, tempval);
-
-	tempval = in_be32(&io->iop_psora);
-	tempval &= ~PA1_PSORA0;
-	tempval |= PA1_PSORA1;
-	out_be32(&io->iop_psora, tempval);
-
-	setbits32(&io->iop_ppara,PA1_DIRA0 | PA1_DIRA1);
-
-	/* Alter clocks */
-	tempval = PC_F1TXCLK|PC_F1RXCLK;
-
-	clrbits32(&io->iop_psorc, tempval);
-	clrbits32(&io->iop_pdirc, tempval);
-	setbits32(&io->iop_pparc, tempval);
-
-	clrbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_MASK);
-	setbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_ROUTE);
-	iounmap(bcsr);
-	iounmap(immap);
-}
-
-static void init_fcc2_ioports(struct fs_platform_info* pdata)
-{
-	cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
-	u32 *bcsr = ioremap(BCSR_ADDR+12, sizeof(u32));
-
-	struct io_port *io;
-	u32 tempval;
-
-	immap = cpm2_immr;
-
-	io = &immap->im_ioport;
-
-	/* Enable the PHY */
-	clrbits32(bcsr, BCSR3_FETHIEN2);
-	setbits32(bcsr, BCSR3_FETH2_RST);
-
-	/* FCC2 are port B/C. */
-	/* Configure port A and C pins for FCC2 Ethernet. */
-
-	tempval = in_be32(&io->iop_pdirb);
-	tempval &= ~PB2_DIRB0;
-	tempval |= PB2_DIRB1;
-	out_be32(&io->iop_pdirb, tempval);
-
-	tempval = in_be32(&io->iop_psorb);
-	tempval &= ~PB2_PSORB0;
-	tempval |= PB2_PSORB1;
-	out_be32(&io->iop_psorb, tempval);
-
-	setbits32(&io->iop_pparb,PB2_DIRB0 | PB2_DIRB1);
-
-	tempval = PC_F2RXCLK|PC_F2TXCLK;
-
-	/* Alter clocks */
-	clrbits32(&io->iop_psorc,tempval);
-	clrbits32(&io->iop_pdirc,tempval);
-	setbits32(&io->iop_pparc,tempval);
-
-	clrbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_MASK);
-	setbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_ROUTE);
-
-	iounmap(bcsr);
-	iounmap(immap);
-}
-
-
-static void __init mpc8272ads_fixup_enet_pdata(struct platform_device *pdev,
-					      int idx)
-{
-	bd_t* bi = (void*)__res;
-	int fs_no = fsid_fcc1+pdev->id-1;
-
-	if(fs_no >= ARRAY_SIZE(mpc82xx_enet_pdata)) {
-		return;
-	}
-
-	mpc82xx_enet_pdata[fs_no].dpram_offset=
-			(u32)cpm2_immr->im_dprambase;
-	mpc82xx_enet_pdata[fs_no].fcc_regs_c =
-			(u32)cpm2_immr->im_fcc_c;
-	memcpy(&mpc82xx_enet_pdata[fs_no].macaddr,bi->bi_enetaddr,6);
-
-	/* prevent dup mac */
-	if(fs_no == fsid_fcc2)
-		mpc82xx_enet_pdata[fs_no].macaddr[5] ^= 1;
-
-	pdev->dev.platform_data = &mpc82xx_enet_pdata[fs_no];
-}
-
-static void mpc8272ads_fixup_uart_pdata(struct platform_device *pdev,
-					      int idx)
-{
-	bd_t *bd = (bd_t *) __res;
-	struct fs_uart_platform_info *pinfo;
-	int num = ARRAY_SIZE(mpc8272_uart_pdata);
-	int id = fs_uart_id_scc2fsid(idx);
-
-	/* no need to alter anything if console */
-	if ((id < num) && (!pdev->dev.platform_data)) {
-		pinfo = &mpc8272_uart_pdata[id];
-		pinfo->uart_clk = bd->bi_intfreq;
-		pdev->dev.platform_data = pinfo;
-	}
-}
-
-static void init_scc1_uart_ioports(struct fs_uart_platform_info* pdata)
-{
-	cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
-
-        /* SCC1 is only on port D */
-	setbits32(&immap->im_ioport.iop_ppard,0x00000003);
-	clrbits32(&immap->im_ioport.iop_psord,0x00000001);
-	setbits32(&immap->im_ioport.iop_psord,0x00000002);
-	clrbits32(&immap->im_ioport.iop_pdird,0x00000001);
-	setbits32(&immap->im_ioport.iop_pdird,0x00000002);
-
-        /* Wire BRG1 to SCC1 */
-	clrbits32(&immap->im_cpmux.cmx_scr,0x00ffffff);
-
-	iounmap(immap);
-}
-
-static void init_scc4_uart_ioports(struct fs_uart_platform_info* pdata)
-{
-	cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
-
-	setbits32(&immap->im_ioport.iop_ppard,0x00000600);
-	clrbits32(&immap->im_ioport.iop_psord,0x00000600);
-	clrbits32(&immap->im_ioport.iop_pdird,0x00000200);
-	setbits32(&immap->im_ioport.iop_pdird,0x00000400);
-
-        /* Wire BRG4 to SCC4 */
-	clrbits32(&immap->im_cpmux.cmx_scr,0x000000ff);
-	setbits32(&immap->im_cpmux.cmx_scr,0x0000001b);
-
-	iounmap(immap);
-}
-
-static void __init mpc8272ads_fixup_mdio_pdata(struct platform_device *pdev,
-					      int idx)
-{
-	m82xx_mii_bb_pdata.irq[0] = PHY_INTERRUPT;
-	m82xx_mii_bb_pdata.irq[1] = PHY_POLL;
-	m82xx_mii_bb_pdata.irq[2] = PHY_POLL;
-	m82xx_mii_bb_pdata.irq[3] = PHY_INTERRUPT;
-	m82xx_mii_bb_pdata.irq[31] = PHY_POLL;
-
-
-	m82xx_mii_bb_pdata.mdio_dat.offset =
-				(u32)&cpm2_immr->im_ioport.iop_pdatc;
-
-	m82xx_mii_bb_pdata.mdio_dir.offset =
-				(u32)&cpm2_immr->im_ioport.iop_pdirc;
-
-	m82xx_mii_bb_pdata.mdc_dat.offset =
-				(u32)&cpm2_immr->im_ioport.iop_pdatc;
-
-
-	pdev->dev.platform_data = &m82xx_mii_bb_pdata;
-}
-
-static int mpc8272ads_platform_notify(struct device *dev)
-{
-	static const struct platform_notify_dev_map dev_map[] = {
-		{
-			.bus_id = "fsl-cpm-fcc",
-			.rtn = mpc8272ads_fixup_enet_pdata,
-		},
-		{
-			.bus_id = "fsl-cpm-scc:uart",
-			.rtn = mpc8272ads_fixup_uart_pdata,
-		},
-		{
-			.bus_id = "fsl-bb-mdio",
-			.rtn = mpc8272ads_fixup_mdio_pdata,
-		},
-		{
-			.bus_id = NULL
-		}
-	};
-	platform_notify_map(dev_map,dev);
-
-	return 0;
-
-}
-
-int __init mpc8272ads_init(void)
-{
-	printk(KERN_NOTICE "mpc8272ads: Init\n");
-
-	platform_notify = mpc8272ads_platform_notify;
-
-	ppc_sys_device_initfunc();
-
-	ppc_sys_device_disable_all();
-	ppc_sys_device_enable(MPC82xx_CPM_FCC1);
-	ppc_sys_device_enable(MPC82xx_CPM_FCC2);
-
-	/* to be ready for console, let's attach pdata here */
-#ifdef CONFIG_SERIAL_CPM_SCC1
-	ppc_sys_device_setfunc(MPC82xx_CPM_SCC1, PPC_SYS_FUNC_UART);
-	ppc_sys_device_enable(MPC82xx_CPM_SCC1);
-
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SCC4
-	ppc_sys_device_setfunc(MPC82xx_CPM_SCC4, PPC_SYS_FUNC_UART);
-	ppc_sys_device_enable(MPC82xx_CPM_SCC4);
-#endif
-
-	ppc_sys_device_enable(MPC82xx_MDIO_BB);
-
-	return 0;
-}
-
-/*
-   To prevent confusion, console selection is gross:
-   by 0 assumed SCC1 and by 1 assumed SCC4
- */
-struct platform_device* early_uart_get_pdev(int index)
-{
-	bd_t *bd = (bd_t *) __res;
-	struct fs_uart_platform_info *pinfo;
-
-	struct platform_device* pdev = NULL;
-	if(index) { /*assume SCC4 here*/
-		pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC4];
-		pinfo = &mpc8272_uart_pdata[fsid_scc4_uart];
-	} else { /*over SCC1*/
-		pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC1];
-		pinfo = &mpc8272_uart_pdata[fsid_scc1_uart];
-	}
-
-	pinfo->uart_clk = bd->bi_intfreq;
-	pdev->dev.platform_data = pinfo;
-	ppc_sys_fixup_mem_resource(pdev, CPM_MAP_ADDR);
-	return NULL;
-}
-
-arch_initcall(mpc8272ads_init);
diff --git a/arch/ppc/platforms/mpc885ads.h b/arch/ppc/platforms/mpc885ads.h
deleted file mode 100644
index d3bbbb3c9a1f..000000000000
--- a/arch/ppc/platforms/mpc885ads.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * A collection of structures, addresses, and values associated with
- * the Freescale MPC885ADS board.
- * Copied from the FADS stuff.
- *
- * Author: MontaVista Software, Inc.
- *         source@mvista.com
- *
- * 2005 (c) MontaVista Software, Inc.  This file is licensed under the
- * terms of the GNU General Public License version 2.  This program is licensed
- * "as is" without any warranty of any kind, whether express or implied.
- */
-
-#ifdef __KERNEL__
-#ifndef __ASM_MPC885ADS_H__
-#define __ASM_MPC885ADS_H__
-
-
-#include <asm/ppcboot.h>
-
-/* U-Boot maps BCSR to 0xff080000 */
-#define BCSR_ADDR		((uint)0xff080000)
-#define BCSR_SIZE		((uint)32)
-#define BCSR0			((uint)(BCSR_ADDR + 0x00))
-#define BCSR1			((uint)(BCSR_ADDR + 0x04))
-#define BCSR2			((uint)(BCSR_ADDR + 0x08))
-#define BCSR3			((uint)(BCSR_ADDR + 0x0c))
-#define BCSR4			((uint)(BCSR_ADDR + 0x10))
-
-#define CFG_PHYDEV_ADDR		((uint)0xff0a0000)
-#define BCSR5			((uint)(CFG_PHYDEV_ADDR + 0x300))
-
-#define IMAP_ADDR		((uint)0xff000000)
-#define IMAP_SIZE		((uint)(64 * 1024))
-
-#define PCMCIA_MEM_ADDR		((uint)0xff020000)
-#define PCMCIA_MEM_SIZE		((uint)(64 * 1024))
-
-/* Bits of interest in the BCSRs.
- */
-#define BCSR1_ETHEN		((uint)0x20000000)
-#define BCSR1_IRDAEN		((uint)0x10000000)
-#define BCSR1_RS232EN_1		((uint)0x01000000)
-#define BCSR1_PCCEN		((uint)0x00800000)
-#define BCSR1_PCCVCC0		((uint)0x00400000)
-#define BCSR1_PCCVPP0		((uint)0x00200000)
-#define BCSR1_PCCVPP1		((uint)0x00100000)
-#define BCSR1_PCCVPP_MASK	(BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
-#define BCSR1_RS232EN_2		((uint)0x00040000)
-#define BCSR1_PCCVCC1		((uint)0x00010000)
-#define BCSR1_PCCVCC_MASK	(BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
-
-#define BCSR4_ETH10_RST		((uint)0x80000000)	/* 10Base-T PHY reset*/
-#define BCSR4_USB_LO_SPD	((uint)0x04000000)
-#define BCSR4_USB_VCC		((uint)0x02000000)
-#define BCSR4_USB_FULL_SPD	((uint)0x00040000)
-#define BCSR4_USB_EN		((uint)0x00020000)
-
-#define BCSR5_MII2_EN		0x40
-#define BCSR5_MII2_RST		0x20
-#define BCSR5_T1_RST		0x10
-#define BCSR5_ATM155_RST	0x08
-#define BCSR5_ATM25_RST		0x04
-#define BCSR5_MII1_EN		0x02
-#define BCSR5_MII1_RST		0x01
-
-/* Interrupt level assignments */
-#define PHY_INTERRUPT	SIU_IRQ7	/* PHY link change interrupt */
-#define SIU_INT_FEC1	SIU_LEVEL1	/* FEC1 interrupt */
-#define SIU_INT_FEC2	SIU_LEVEL3	/* FEC2 interrupt */
-#define FEC_INTERRUPT	SIU_INT_FEC1	/* FEC interrupt */
-
-/* We don't use the 8259 */
-#define NR_8259_INTS	0
-
-/* CPM Ethernet through SCC3 */
-#define PA_ENET_RXD	((ushort)0x0040)
-#define PA_ENET_TXD	((ushort)0x0080)
-#define PE_ENET_TCLK	((uint)0x00004000)
-#define PE_ENET_RCLK	((uint)0x00008000)
-#define PE_ENET_TENA	((uint)0x00000010)
-#define PC_ENET_CLSN	((ushort)0x0400)
-#define PC_ENET_RENA	((ushort)0x0800)
-
-/* Control bits in the SICR to route TCLK (CLK5) and RCLK (CLK6) to
- * SCC3.  Also, make sure GR3 (bit 8) and SC3 (bit 9) are zero */
-#define SICR_ENET_MASK	((uint)0x00ff0000)
-#define SICR_ENET_CLKRT	((uint)0x002c0000)
-
-#define BOARD_CHIP_NAME "MPC885"
-
-#endif /* __ASM_MPC885ADS_H__ */
-#endif /* __KERNEL__ */
diff --git a/arch/ppc/platforms/mpc885ads_setup.c b/arch/ppc/platforms/mpc885ads_setup.c
deleted file mode 100644
index ba06cc08cdab..000000000000
--- a/arch/ppc/platforms/mpc885ads_setup.c
+++ /dev/null
@@ -1,476 +0,0 @@
-/*arch/ppc/platforms/mpc885ads_setup.c
- *
- * Platform setup for the Freescale mpc885ads board
- *
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * Copyright 2005 MontaVista Software Inc.
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/ioport.h>
-#include <linux/device.h>
-
-#include <linux/fs_enet_pd.h>
-#include <linux/fs_uart_pd.h>
-#include <linux/mii.h>
-
-#include <asm/delay.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/time.h>
-#include <asm/ppcboot.h>
-#include <asm/8xx_immap.h>
-#include <asm/cpm1.h>
-#include <asm/ppc_sys.h>
-
-extern unsigned char __res[];
-static void setup_smc1_ioports(struct fs_uart_platform_info*);
-static void setup_smc2_ioports(struct fs_uart_platform_info*);
-
-static struct fs_mii_fec_platform_info	mpc8xx_mdio_fec_pdata;
-static void setup_fec1_ioports(struct fs_platform_info*);
-static void setup_fec2_ioports(struct fs_platform_info*);
-static void setup_scc3_ioports(struct fs_platform_info*);
-
-static struct fs_uart_platform_info mpc885_uart_pdata[] = {
-	[fsid_smc1_uart] = {
-		.brg		= 1,
- 		.fs_no 		= fsid_smc1_uart,
- 		.init_ioports	= setup_smc1_ioports,
-		.tx_num_fifo	= 4,
-		.tx_buf_size	= 32,
-		.rx_num_fifo	= 4,
-		.rx_buf_size	= 32,
- 	},
- 	[fsid_smc2_uart] = {
- 		.brg		= 2,
- 		.fs_no 		= fsid_smc2_uart,
- 		.init_ioports	= setup_smc2_ioports,
-		.tx_num_fifo	= 4,
-		.tx_buf_size	= 32,
-		.rx_num_fifo	= 4,
-		.rx_buf_size	= 32,
- 	},
-};
-
-static struct fs_platform_info mpc8xx_enet_pdata[] = {
-	[fsid_fec1] = {
-	 .rx_ring = 128,
-	 .tx_ring = 16,
-	 .rx_copybreak = 240,
-
-	 .use_napi = 1,
-	 .napi_weight = 17,
-
-	 .init_ioports = setup_fec1_ioports,
-
-          .bus_id = "0:00",
-          .has_phy = 1,
-	 },
-	[fsid_fec2] = {
-	     .rx_ring = 128,
-	     .tx_ring = 16,
-	     .rx_copybreak = 240,
-
-	     .use_napi = 1,
-	     .napi_weight = 17,
-
-	     .init_ioports = setup_fec2_ioports,
-
- 	     .bus_id = "0:01",
- 	     .has_phy = 1,
-	     },
-	[fsid_scc3] = {
-		.rx_ring = 64,
-		.tx_ring = 8,
-		.rx_copybreak = 240,
-
-		.use_napi = 1,
-		.napi_weight = 17,
-
-		.init_ioports = setup_scc3_ioports,
-#ifdef CONFIG_FIXED_MII_10_FDX
-		.bus_id = "fixed@100:1",
-#else
-		.bus_id = "0:02",
- #endif
-	},
-};
-
-void __init board_init(void)
-{
-	cpm8xx_t *cp = cpmp;
- 	unsigned int *bcsr_io;
-
-#ifdef CONFIG_FS_ENET
-	immap_t *immap = (immap_t *) IMAP_ADDR;
-#endif
-	bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
-
-	if (bcsr_io == NULL) {
-		printk(KERN_CRIT "Could not remap BCSR\n");
-		return;
-	}
-#ifdef CONFIG_SERIAL_CPM_SMC1
-	cp->cp_simode &= ~(0xe0000000 >> 17);	/* brg1 */
-	clrbits32(bcsr_io, BCSR1_RS232EN_1);
-        cp->cp_smc[0].smc_smcm |= (SMCM_RX | SMCM_TX);
-        cp->cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
-#else
-	setbits32(bcsr_io,BCSR1_RS232EN_1);
-	cp->cp_smc[0].smc_smcmr = 0;
-	cp->cp_smc[0].smc_smce = 0;
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SMC2
-	cp->cp_simode &= ~(0xe0000000 >> 1);
-	cp->cp_simode |= (0x20000000 >> 1);	/* brg2 */
-	clrbits32(bcsr_io,BCSR1_RS232EN_2);
-        cp->cp_smc[1].smc_smcm |= (SMCM_RX | SMCM_TX);
-        cp->cp_smc[1].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
-#else
-	setbits32(bcsr_io,BCSR1_RS232EN_2);
-	cp->cp_smc[1].smc_smcmr = 0;
-	cp->cp_smc[1].smc_smce = 0;
-#endif
-	iounmap(bcsr_io);
-
-#ifdef CONFIG_FS_ENET
-	/* use MDC for MII (common) */
-	setbits16(&immap->im_ioport.iop_pdpar, 0x0080);
-	clrbits16(&immap->im_ioport.iop_pddir, 0x0080);
-	bcsr_io = ioremap(BCSR5, sizeof(unsigned long));
-	clrbits32(bcsr_io,BCSR5_MII1_EN);
-	clrbits32(bcsr_io,BCSR5_MII1_RST);
-#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
-	clrbits32(bcsr_io,BCSR5_MII2_EN);
-	clrbits32(bcsr_io,BCSR5_MII2_RST);
-#endif
-	iounmap(bcsr_io);
-#endif
-}
-
-static void setup_fec1_ioports(struct fs_platform_info* pdata)
-{
-	immap_t *immap = (immap_t *) IMAP_ADDR;
-
-	/* configure FEC1 pins  */
-	setbits16(&immap->im_ioport.iop_papar, 0xf830);
-	setbits16(&immap->im_ioport.iop_padir, 0x0830);
-	clrbits16(&immap->im_ioport.iop_padir, 0xf000);
-	setbits32(&immap->im_cpm.cp_pbpar, 0x00001001);
-
-	clrbits32(&immap->im_cpm.cp_pbdir, 0x00001001);
-	setbits16(&immap->im_ioport.iop_pcpar, 0x000c);
-	clrbits16(&immap->im_ioport.iop_pcdir, 0x000c);
-	setbits32(&immap->im_cpm.cp_pepar, 0x00000003);
-
-	setbits32(&immap->im_cpm.cp_pedir, 0x00000003);
-	clrbits32(&immap->im_cpm.cp_peso, 0x00000003);
-	clrbits32(&immap->im_cpm.cp_cptr, 0x00000100);
-}
-
-static void setup_fec2_ioports(struct fs_platform_info* pdata)
-{
-	immap_t *immap = (immap_t *) IMAP_ADDR;
-
-	/* configure FEC2 pins */
-	setbits32(&immap->im_cpm.cp_pepar, 0x0003fffc);
-	setbits32(&immap->im_cpm.cp_pedir, 0x0003fffc);
-	clrbits32(&immap->im_cpm.cp_peso, 0x000087fc);
-	setbits32(&immap->im_cpm.cp_peso, 0x00037800);
-	clrbits32(&immap->im_cpm.cp_cptr, 0x00000080);
-}
-
-static void setup_scc3_ioports(struct fs_platform_info* pdata)
-{
-	immap_t *immap = (immap_t *) IMAP_ADDR;
-	unsigned *bcsr_io;
-
-	bcsr_io = ioremap(BCSR_ADDR, BCSR_SIZE);
-
-	if (bcsr_io == NULL) {
-		printk(KERN_CRIT "Could not remap BCSR\n");
-		return;
-	}
-
-	/* Enable the PHY.
-	 */
-	clrbits32(bcsr_io+4, BCSR4_ETH10_RST);
-	udelay(1000);
-	setbits32(bcsr_io+4, BCSR4_ETH10_RST);
-	/* Configure port A pins for Txd and Rxd.
-	 */
-	setbits16(&immap->im_ioport.iop_papar, PA_ENET_RXD | PA_ENET_TXD);
-	clrbits16(&immap->im_ioport.iop_padir, PA_ENET_RXD | PA_ENET_TXD);
-
-	/* Configure port C pins to enable CLSN and RENA.
-	 */
-	clrbits16(&immap->im_ioport.iop_pcpar, PC_ENET_CLSN | PC_ENET_RENA);
-	clrbits16(&immap->im_ioport.iop_pcdir, PC_ENET_CLSN | PC_ENET_RENA);
-	setbits16(&immap->im_ioport.iop_pcso, PC_ENET_CLSN | PC_ENET_RENA);
-
-	/* Configure port E for TCLK and RCLK.
-	 */
-	setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TCLK | PE_ENET_RCLK);
-	clrbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA);
-	clrbits32(&immap->im_cpm.cp_pedir,
-		  PE_ENET_TCLK | PE_ENET_RCLK | PE_ENET_TENA);
-	clrbits32(&immap->im_cpm.cp_peso, PE_ENET_TCLK | PE_ENET_RCLK);
-	setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA);
-
-	/* Configure Serial Interface clock routing.
-	 * First, clear all SCC bits to zero, then set the ones we want.
-	 */
-	clrbits32(&immap->im_cpm.cp_sicr, SICR_ENET_MASK);
-	setbits32(&immap->im_cpm.cp_sicr, SICR_ENET_CLKRT);
-
-	/* Disable Rx and Tx. SMC1 sshould be stopped if SCC3 eternet are used.
-	 */
-	immap->im_cpm.cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
-	/* On the MPC885ADS SCC ethernet PHY is initialized in the full duplex mode
-	 * by H/W setting after reset. SCC ethernet controller support only half duplex.
-	 * This discrepancy of modes causes a lot of carrier lost errors.
-	 */
-
-	/* In the original SCC enet driver the following code is placed at
-	   the end of the initialization */
-	setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA);
-	clrbits32(&immap->im_cpm.cp_pedir, PE_ENET_TENA);
-	setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA);
-
-	setbits32(bcsr_io+4, BCSR1_ETHEN);
-	iounmap(bcsr_io);
-}
-
-static int mac_count = 0;
-
-static void mpc885ads_fixup_enet_pdata(struct platform_device *pdev, int fs_no)
-{
- 	struct fs_platform_info *fpi;
-	bd_t *bd = (bd_t *) __res;
-	char *e;
-	int i;
-
-	if(fs_no >= ARRAY_SIZE(mpc8xx_enet_pdata)) {
-		printk(KERN_ERR"No network-suitable #%d device on bus", fs_no);
-		return;
-	}
-
-	fpi = &mpc8xx_enet_pdata[fs_no];
-
-	switch (fs_no) {
-	case fsid_fec1:
-		fpi->init_ioports = &setup_fec1_ioports;
-		break;
-	case fsid_fec2:
-		fpi->init_ioports = &setup_fec2_ioports;
-		break;
-	case fsid_scc3:
-		fpi->init_ioports = &setup_scc3_ioports;
-		break;
-	default:
-    	        printk(KERN_WARNING "Device %s is not supported!\n", pdev->name);
-	        return;
-	}
-
-	pdev->dev.platform_data = fpi;
-	fpi->fs_no = fs_no;
-
-	e = (unsigned char *)&bd->bi_enetaddr;
-	for (i = 0; i < 6; i++)
-		fpi->macaddr[i] = *e++;
-
-	fpi->macaddr[5] += mac_count++;
-
-}
-
-static void mpc885ads_fixup_fec_enet_pdata(struct platform_device *pdev,
-					   int idx)
-{
-	/* This is for FEC devices only */
-	if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-fec")))
-		return;
-	mpc885ads_fixup_enet_pdata(pdev, fsid_fec1 + pdev->id - 1);
-}
-
-static void __init mpc885ads_fixup_scc_enet_pdata(struct platform_device *pdev,
-						  int idx)
-{
-	/* This is for SCC devices only */
-	if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-scc")))
-		return;
-
-	mpc885ads_fixup_enet_pdata(pdev, fsid_scc1 + pdev->id - 1);
-}
-
-static void setup_smc1_ioports(struct fs_uart_platform_info* pdata)
-{
-        immap_t *immap = (immap_t *) IMAP_ADDR;
-        unsigned *bcsr_io;
-        unsigned int iobits = 0x000000c0;
-
-        bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
-
-        if (bcsr_io == NULL) {
-                printk(KERN_CRIT "Could not remap BCSR1\n");
-                return;
-        }
-        clrbits32(bcsr_io,BCSR1_RS232EN_1);
-        iounmap(bcsr_io);
-
-        setbits32(&immap->im_cpm.cp_pbpar, iobits);
-        clrbits32(&immap->im_cpm.cp_pbdir, iobits);
-        clrbits16(&immap->im_cpm.cp_pbodr, iobits);
-}
-
-static void setup_smc2_ioports(struct fs_uart_platform_info* pdata)
-{
-        immap_t *immap = (immap_t *) IMAP_ADDR;
-        unsigned *bcsr_io;
-        unsigned int iobits = 0x00000c00;
-
-        bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
-
-        if (bcsr_io == NULL) {
-                printk(KERN_CRIT "Could not remap BCSR1\n");
-                return;
-        }
-        clrbits32(bcsr_io,BCSR1_RS232EN_2);
-        iounmap(bcsr_io);
-
-#ifndef CONFIG_SERIAL_CPM_ALT_SMC2
-        setbits32(&immap->im_cpm.cp_pbpar, iobits);
-        clrbits32(&immap->im_cpm.cp_pbdir, iobits);
-        clrbits16(&immap->im_cpm.cp_pbodr, iobits);
-#else
-        setbits16(&immap->im_ioport.iop_papar, iobits);
-        clrbits16(&immap->im_ioport.iop_padir, iobits);
-        clrbits16(&immap->im_ioport.iop_paodr, iobits);
-#endif
-}
-
-static void __init mpc885ads_fixup_uart_pdata(struct platform_device *pdev,
-                                              int idx)
-{
-	bd_t *bd = (bd_t *) __res;
-	struct fs_uart_platform_info *pinfo;
-	int num = ARRAY_SIZE(mpc885_uart_pdata);
-
-	int id = fs_uart_id_smc2fsid(idx);
-
-	/* no need to alter anything if console */
-	if ((id < num) && (!pdev->dev.platform_data)) {
-		pinfo = &mpc885_uart_pdata[id];
-		pinfo->uart_clk = bd->bi_intfreq;
-		pdev->dev.platform_data = pinfo;
-	}
-}
-
-
-static int mpc885ads_platform_notify(struct device *dev)
-{
-
-	static const struct platform_notify_dev_map dev_map[] = {
-		{
-			.bus_id = "fsl-cpm-fec",
-			.rtn = mpc885ads_fixup_fec_enet_pdata,
-		},
-		{
-			.bus_id = "fsl-cpm-scc",
-			.rtn = mpc885ads_fixup_scc_enet_pdata,
-		},
-		{
-			.bus_id = "fsl-cpm-smc:uart",
-			.rtn = mpc885ads_fixup_uart_pdata
-		},
-		{
-			.bus_id = NULL
-		}
-	};
-
-	platform_notify_map(dev_map,dev);
-
-	return 0;
-}
-
-int __init mpc885ads_init(void)
-{
-	struct fs_mii_fec_platform_info* fmpi;
-	bd_t *bd = (bd_t *) __res;
-
-	printk(KERN_NOTICE "mpc885ads: Init\n");
-
-	platform_notify = mpc885ads_platform_notify;
-
-	ppc_sys_device_initfunc();
-	ppc_sys_device_disable_all();
-
-	ppc_sys_device_enable(MPC8xx_CPM_FEC1);
-
-	ppc_sys_device_enable(MPC8xx_MDIO_FEC);
-	fmpi = ppc_sys_platform_devices[MPC8xx_MDIO_FEC].dev.platform_data =
-		&mpc8xx_mdio_fec_pdata;
-
-	fmpi->mii_speed = ((((bd->bi_intfreq + 4999999) / 2500000) / 2) & 0x3F) << 1;
-
-	/* No PHY interrupt line here */
-	fmpi->irq[0xf] = SIU_IRQ7;
-
-#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3
-	ppc_sys_device_enable(MPC8xx_CPM_SCC3);
-
-#endif
-#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2
-	ppc_sys_device_enable(MPC8xx_CPM_FEC2);
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SMC1
-	ppc_sys_device_enable(MPC8xx_CPM_SMC1);
-	ppc_sys_device_setfunc(MPC8xx_CPM_SMC1, PPC_SYS_FUNC_UART);
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SMC2
-	ppc_sys_device_enable(MPC8xx_CPM_SMC2);
-	ppc_sys_device_setfunc(MPC8xx_CPM_SMC2, PPC_SYS_FUNC_UART);
-#endif
-	return 0;
-}
-
-arch_initcall(mpc885ads_init);
-
-/*
-   To prevent confusion, console selection is gross:
-   by 0 assumed SMC1 and by 1 assumed SMC2
- */
-struct platform_device* early_uart_get_pdev(int index)
-{
-	bd_t *bd = (bd_t *) __res;
-	struct fs_uart_platform_info *pinfo;
-
-	struct platform_device* pdev = NULL;
-	if(index) { /*assume SMC2 here*/
-		pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC2];
-		pinfo = &mpc885_uart_pdata[1];
-	} else { /*over SMC1*/
-		pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC1];
-		pinfo = &mpc885_uart_pdata[0];
-	}
-
-	pinfo->uart_clk = bd->bi_intfreq;
-	pdev->dev.platform_data = pinfo;
-	ppc_sys_fixup_mem_resource(pdev, IMAP_ADDR);
-	return NULL;
-}
-
diff --git a/arch/ppc/platforms/pq2ads.c b/arch/ppc/platforms/pq2ads.c
deleted file mode 100644
index 7fc2e02f5246..000000000000
--- a/arch/ppc/platforms/pq2ads.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * PQ2ADS platform support
- *
- * Author: Kumar Gala <galak@kernel.crashing.org>
- * Derived from: est8260_setup.c by Allen Curtis
- *
- * Copyright 2004 Freescale Semiconductor, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/init.h>
-
-#include <asm/io.h>
-#include <asm/mpc8260.h>
-#include <asm/cpm2.h>
-#include <asm/immap_cpm2.h>
-
-void __init
-m82xx_board_setup(void)
-{
-	cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t));
-	u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32));
-
-	/* Enable the 2nd UART port */
-	clrbits32(bcsr, BCSR1_RS232_EN2);
-
-#ifdef CONFIG_SERIAL_CPM_SCC1
-	clrbits32((u32*)&immap->im_scc[0].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
-	clrbits32((u32*)&immap->im_scc[0].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SCC2
-	clrbits32((u32*)&immap->im_scc[1].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
-	clrbits32((u32*)&immap->im_scc[1].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SCC3
-	clrbits32((u32*)&immap->im_scc[2].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
-	clrbits32((u32*)&immap->im_scc[2].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
-#endif
-
-#ifdef CONFIG_SERIAL_CPM_SCC4
-	clrbits32((u32*)&immap->im_scc[3].scc_sccm, UART_SCCM_TX | UART_SCCM_RX);
-	clrbits32((u32*)&immap->im_scc[3].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT);
-#endif
-
-	iounmap(bcsr);
-	iounmap(immap);
-}
diff --git a/arch/ppc/platforms/pq2ads.h b/arch/ppc/platforms/pq2ads.h
deleted file mode 100644
index 2b287f4e0ca3..000000000000
--- a/arch/ppc/platforms/pq2ads.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * A collection of structures, addresses, and values associated with
- * the Motorola MPC8260ADS/MPC8266ADS-PCI boards.
- * Copied from the RPX-Classic and SBS8260 stuff.
- *
- * Copyright (c) 2001 Dan Malek (dan@mvista.com)
- */
-#ifdef __KERNEL__
-#ifndef __MACH_ADS8260_DEFS
-#define __MACH_ADS8260_DEFS
-
-
-#include <asm/ppcboot.h>
-
-#if defined(CONFIG_ADS8272)
-#define BOARD_CHIP_NAME "8272"
-#endif
-
-/* Memory map is configured by the PROM startup.
- * We just map a few things we need.  The CSR is actually 4 byte-wide
- * registers that can be accessed as 8-, 16-, or 32-bit values.
- */
-#define CPM_MAP_ADDR		((uint)0xf0000000)
-#define BCSR_ADDR		((uint)0xf4500000)
-#define BCSR_SIZE		((uint)(32 * 1024))
-
-#define BOOTROM_RESTART_ADDR	((uint)0xff000104)
-
-/* For our show_cpuinfo hooks. */
-#define CPUINFO_VENDOR		"Motorola"
-#define CPUINFO_MACHINE		"PQ2 ADS PowerPC"
-
-/* The ADS8260 has 16, 32-bit wide control/status registers, accessed
- * only on word boundaries.
- * Not all are used (yet), or are interesting to us (yet).
- */
-
-/* Things of interest in the CSR.
-*/
-#define BCSR0_LED0		((uint)0x02000000)	/* 0 == on */
-#define BCSR0_LED1		((uint)0x01000000)	/* 0 == on */
-#define BCSR1_FETHIEN		((uint)0x08000000)	/* 0 == enable */
-#define BCSR1_FETH_RST		((uint)0x04000000)	/* 0 == reset */
-#define BCSR1_RS232_EN1		((uint)0x02000000)	/* 0 == enable */
-#define BCSR1_RS232_EN2		((uint)0x01000000)	/* 0 == enable */
-#define BCSR3_FETHIEN2		((uint)0x10000000)	/* 0 == enable */
-#define BCSR3_FETH2_RST 	((uint)0x80000000)	/* 0 == reset */
-
-#define PHY_INTERRUPT	SIU_INT_IRQ7
-
-#ifdef CONFIG_PCI
-/* PCI interrupt controller */
-#define PCI_INT_STAT_REG	0xF8200000
-#define PCI_INT_MASK_REG	0xF8200004
-#define PIRQA			(NR_CPM_INTS + 0)
-#define PIRQB			(NR_CPM_INTS + 1)
-#define PIRQC			(NR_CPM_INTS + 2)
-#define PIRQD			(NR_CPM_INTS + 3)
-
-/*
- * PCI memory map definitions for MPC8266ADS-PCI.
- *
- * processor view
- *	local address		PCI address		target
- *	0x80000000-0x9FFFFFFF	0x80000000-0x9FFFFFFF	PCI mem with prefetch
- *	0xA0000000-0xBFFFFFFF	0xA0000000-0xBFFFFFFF	PCI mem w/o prefetch
- *	0xF4000000-0xF7FFFFFF	0x00000000-0x03FFFFFF	PCI IO
- *
- * PCI master view
- *	local address		PCI address		target
- *	0x00000000-0x1FFFFFFF	0x00000000-0x1FFFFFFF	MPC8266 local memory
- */
-
-/* All the other PCI memory map definitions reside at syslib/m82xx_pci.h
-   Here we should redefine what is unique for this board */
-#define M82xx_PCI_SLAVE_MEM_LOCAL	0x00000000	/* Local base */
-#define M82xx_PCI_SLAVE_MEM_BUS		0x00000000	/* PCI base */
-#define M82xx_PCI_SLAVE_MEM_SIZE	0x10000000	/* 256 Mb */
-
-#define M82xx_PCI_SLAVE_SEC_WND_SIZE	~(0x40000000 - 1U)	/* 2 x 512Mb  */
-#define M82xx_PCI_SLAVE_SEC_WND_BASE	0x80000000		/* PCI Memory base */
-
-#if defined(CONFIG_ADS8272)
-#define PCI_INT_TO_SIU 	SIU_INT_IRQ2
-#elif defined(CONFIG_PQ2FADS)
-#define PCI_INT_TO_SIU 	SIU_INT_IRQ6
-#else
-#warning PCI Bridge will be without interrupts support
-#endif
-
-#endif /* CONFIG_PCI */
-
-#endif /* __MACH_ADS8260_DEFS */
-#endif /* __KERNEL__ */
diff --git a/arch/ppc/platforms/pq2ads_pd.h b/arch/ppc/platforms/pq2ads_pd.h
deleted file mode 100644
index 672483df8079..000000000000
--- a/arch/ppc/platforms/pq2ads_pd.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef __PQ2ADS_PD_H
-#define __PQ2ADS_PD_H
-/*
- * arch/ppc/platforms/82xx/pq2ads_pd.h
- *
- * Some defines for MPC82xx board-specific PlatformDevice descriptions
- *
- * 2005 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This file is licensed under the terms of the GNU General Public License
- * version 2. This program is licensed "as is" without any warranty of any
- * kind, whether express or implied.
- */
-
-/* FCC1 Clock Source Configuration.  These can be redefined in the board specific file.
-   Can only choose from CLK9-12 */
-
-#define F1_RXCLK	11
-#define F1_TXCLK	10
-
-/* FCC2 Clock Source Configuration.  These can be redefined in the board specific file.
-   Can only choose from CLK13-16 */
-#define F2_RXCLK	15
-#define F2_TXCLK	16
-
-/* FCC3 Clock Source Configuration.  These can be redefined in the board specific file.
-   Can only choose from CLK13-16 */
-#define F3_RXCLK	13
-#define F3_TXCLK	14
-
-#endif
diff --git a/arch/ppc/syslib/m8260_setup.c b/arch/ppc/syslib/m8260_setup.c
index 46588fa94381..b40583724de3 100644
--- a/arch/ppc/syslib/m8260_setup.c
+++ b/arch/ppc/syslib/m8260_setup.c
@@ -175,12 +175,6 @@ m8260_init_IRQ(void)
 	 * in case the boot rom changed something on us.
 	 */
 	cpm2_immr->im_intctl.ic_siprr = 0x05309770;
-
-#if defined(CONFIG_PCI) && (defined(CONFIG_ADS8272) || defined(CONFIG_PQ2FADS))
- 	/* Initialize stuff for the 82xx CPLD IC and install demux  */
- 	pq2pci_init_irq();
-#endif
-
 }
 
 /*
diff --git a/arch/ppc/syslib/m82xx_pci.c b/arch/ppc/syslib/m82xx_pci.c
index fe860d52e2e4..657a1c25a2ab 100644
--- a/arch/ppc/syslib/m82xx_pci.c
+++ b/arch/ppc/syslib/m82xx_pci.c
@@ -150,14 +150,6 @@ pq2pci_init_irq(void)
 {
 	int irq;
 	volatile cpm2_map_t *immap = cpm2_immr;
-#if defined CONFIG_ADS8272
-	/* configure chip select for PCI interrupt controller */
-	immap->im_memctl.memc_br3 = PCI_INT_STAT_REG | 0x00001801;
-	immap->im_memctl.memc_or3 = 0xffff8010;
-#elif defined CONFIG_PQ2FADS
-	immap->im_memctl.memc_br8 = PCI_INT_STAT_REG | 0x00001801;
-	immap->im_memctl.memc_or8 = 0xffff8010;
-#endif
 	for (irq = NR_CPM_INTS; irq < NR_CPM_INTS + 4; irq++)
 		irq_desc[irq].chip = &pq2pci_ic;
 
@@ -222,26 +214,6 @@ pq2ads_setup_pci(struct pci_controller *hose)
 	immap->im_memctl.memc_pcibr1  = M82xx_PCI_SEC_WND_BASE | PCIBR_ENABLE;
 #endif
 
-#if defined CONFIG_ADS8272
-	immap->im_siu_conf.siu_82xx.sc_siumcr =
-		(immap->im_siu_conf.siu_82xx.sc_siumcr &
-		~(SIUMCR_BBD | SIUMCR_ESE | SIUMCR_PBSE |
-		SIUMCR_CDIS | SIUMCR_DPPC11 | SIUMCR_L2CPC11 |
-		SIUMCR_LBPC11 | SIUMCR_APPC11 |
-		SIUMCR_CS10PC11 | SIUMCR_BCTLC11 | SIUMCR_MMR11)) |
-		SIUMCR_DPPC11 | SIUMCR_L2CPC01 | SIUMCR_LBPC00 |
-		SIUMCR_APPC10 | SIUMCR_CS10PC00 |
-		SIUMCR_BCTLC00 | SIUMCR_MMR11 ;
-
-#elif defined CONFIG_PQ2FADS
-	/*
-	 * Setting required to enable IRQ1-IRQ7 (SIUMCR [DPPC]),
-	 * and local bus for PCI (SIUMCR [LBPC]).
-	 */
-	immap->im_siu_conf.siu_82xx.sc_siumcr = (immap->im_siu_conf.siu_82xx.sc_siumcr &
-				~(SIUMCR_L2CPC11 | SIUMCR_LBPC11 | SIUMCR_CS10PC11 | SIUMCR_APPC11) |
-				SIUMCR_BBD | SIUMCR_LBPC01 | SIUMCR_DPPC11 | SIUMCR_APPC10);
-#endif
 	/* Enable PCI  */
 	immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN);
 
@@ -284,12 +256,6 @@ pq2ads_setup_pci(struct pci_controller *hose)
 	immap->im_pci.pci_pibar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_BUS  >> PITA_ADDR_SHIFT);
 	immap->im_pci.pci_pitar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_LOCAL>> PITA_ADDR_SHIFT);
 
-#if defined CONFIG_ADS8272
-	/* PCI int highest prio */
-	immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x01236745;
-#elif defined CONFIG_PQ2FADS
-	immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x03124567;
-#endif
 	/* park bus on PCI */
 	immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI;
 
@@ -320,10 +286,6 @@ void __init pq2_find_bridges(void)
 	hose->bus_offset = 0;
 	hose->last_busno = 0xff;
 
-#ifdef CONFIG_ADS8272
-	hose->set_cfg_type = 1;
-#endif
-
 	setup_m8260_indirect_pci(hose,
 				 (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr,
 				 (unsigned long)&cpm2_immr->im_pci.pci_cfg_data);
diff --git a/arch/ppc/syslib/m8xx_setup.c b/arch/ppc/syslib/m8xx_setup.c
index 19749e9bcf91..18da720fc1b0 100644
--- a/arch/ppc/syslib/m8xx_setup.c
+++ b/arch/ppc/syslib/m8xx_setup.c
@@ -141,16 +141,6 @@ m8xx_setup_arch(void)
 #endif
 #endif
 
-#if defined (CONFIG_MPC86XADS) || defined (CONFIG_MPC885ADS)
-#if defined(CONFIG_MTD_PHYSMAP)
-       physmap_configure(binfo->bi_flashstart, binfo->bi_flashsize,
-                                               MPC8xxADS_BANK_WIDTH, NULL);
-#ifdef CONFIG_MTD_PARTITIONS
-       physmap_set_partitions(mpc8xxads_partitions, mpc8xxads_part_num);
-#endif /* CONFIG_MTD_PARTITIONS */
-#endif /* CONFIG_MTD_PHYSMAP */
-#endif
-
 	board_init();
 }
 
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index f6a68e178fc5..8f5f02160ffc 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK
 	default y
 	depends on SMP && PREEMPT
 
+config PGSTE
+	bool
+	default y if KVM
+
 mainmenu "Linux Kernel Configuration"
 
 config S390
@@ -69,6 +73,7 @@ config S390
 	select HAVE_OPROFILE
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_KVM if 64BIT
 
 source "init/Kconfig"
 
@@ -515,6 +520,13 @@ config ZFCPDUMP
 	  Select this option if you want to build an zfcpdump enabled kernel.
 	  Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
 
+config S390_GUEST
+bool "s390 guest support (EXPERIMENTAL)"
+	depends on 64BIT && EXPERIMENTAL
+	select VIRTIO
+	select VIRTIO_RING
+	help
+	  Select this option if you want to run the kernel under s390 linux
 endmenu
 
 source "net/Kconfig"
@@ -536,3 +548,5 @@ source "security/Kconfig"
 source "crypto/Kconfig"
 
 source "lib/Kconfig"
+
+source "arch/s390/kvm/Kconfig"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index f708be367b03..792a4e7743ce 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start
 head-y		:= arch/s390/kernel/head.o arch/s390/kernel/init_task.o
 
 core-y		+= arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \
-		   arch/s390/appldata/ arch/s390/hypfs/
+		   arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/
 libs-y		+= arch/s390/lib/
 drivers-y	+= drivers/s390/
 drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 540a67f979b6..68ec4083bf73 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void)
 	/* Running on a P/390 ? */
 	if (cpuinfo->cpu_id.machine == 0x7490)
 		machine_flags |= 4;
+
+	/* Running under KVM ? */
+	if (cpuinfo->cpu_id.version == 0xfe)
+		machine_flags |= 64;
 }
 
 #ifdef CONFIG_64BIT
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 7141147e6b63..a9d18aafa5f4 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p)
 early_param("ipldelay", early_parse_ipldelay);
 
 #ifdef CONFIG_S390_SWITCH_AMODE
+#ifdef CONFIG_PGSTE
+unsigned int switch_amode = 1;
+#else
 unsigned int switch_amode = 0;
+#endif
 EXPORT_SYMBOL_GPL(switch_amode);
 
 static void set_amode_and_uaccess(unsigned long user_amode,
@@ -797,9 +801,13 @@ setup_arch(char **cmdline_p)
 	       "This machine has an IEEE fpu\n" :
 	       "This machine has no IEEE fpu\n");
 #else /* CONFIG_64BIT */
-	printk((MACHINE_IS_VM) ?
-	       "We are running under VM (64 bit mode)\n" :
-	       "We are running native (64 bit mode)\n");
+	if (MACHINE_IS_VM)
+		printk("We are running under VM (64 bit mode)\n");
+	else if (MACHINE_IS_KVM) {
+		printk("We are running under KVM (64 bit mode)\n");
+		add_preferred_console("ttyS", 1, NULL);
+	} else
+		printk("We are running native (64 bit mode)\n");
 #endif /* CONFIG_64BIT */
 
 	/* Save unparsed command line copy for /proc/cmdline */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c5f05b3fb2c3..ca90ee3f930e 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk)
 	S390_lowcore.steal_clock -= cputime << 12;
 	account_system_time(tsk, 0, cputime);
 }
+EXPORT_SYMBOL_GPL(account_system_vtime);
 
 static inline void set_vtimer(__u64 expires)
 {
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
new file mode 100644
index 000000000000..1761b74d639b
--- /dev/null
+++ b/arch/s390/kvm/Kconfig
@@ -0,0 +1,46 @@
+#
+# KVM configuration
+#
+config HAVE_KVM
+       bool
+
+menuconfig VIRTUALIZATION
+	bool "Virtualization"
+	default y
+	---help---
+	  Say Y here to get to see options for using your Linux host to run other
+	  operating systems inside virtual machines (guests).
+	  This option alone does not add any kernel code.
+
+	  If you say N, all options in this submenu will be skipped and disabled.
+
+if VIRTUALIZATION
+
+config KVM
+	tristate "Kernel-based Virtual Machine (KVM) support"
+	depends on HAVE_KVM && EXPERIMENTAL
+	select PREEMPT_NOTIFIERS
+	select ANON_INODES
+	select S390_SWITCH_AMODE
+	select PREEMPT
+	---help---
+	  Support hosting paravirtualized guest machines using the SIE
+	  virtualization capability on the mainframe. This should work
+	  on any 64bit machine.
+
+	  This module provides access to the hardware capabilities through
+	  a character device node named /dev/kvm.
+
+	  To compile this as a module, choose M here: the module
+	  will be called kvm.
+
+	  If unsure, say N.
+
+config KVM_TRACE
+       bool
+
+# OK, it's a little counter-intuitive to do this, but it puts it neatly under
+# the virtualization menu.
+source drivers/virtio/Kconfig
+
+endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
new file mode 100644
index 000000000000..e5221ec0b8e3
--- /dev/null
+++ b/arch/s390/kvm/Makefile
@@ -0,0 +1,14 @@
+# Makefile for kernel virtual machines on s390
+#
+# Copyright IBM Corp. 2008
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License (version 2 only)
+# as published by the Free Software Foundation.
+
+common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o)
+
+EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm
+
+kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o
+obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
new file mode 100644
index 000000000000..f639a152869f
--- /dev/null
+++ b/arch/s390/kvm/diag.c
@@ -0,0 +1,67 @@
+/*
+ * diag.c - handling diagnose instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+
+static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
+	vcpu->stat.diagnose_44++;
+	vcpu_put(vcpu);
+	schedule();
+	vcpu_load(vcpu);
+	return 0;
+}
+
+static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
+{
+	unsigned int reg = vcpu->arch.sie_block->ipa & 0xf;
+	unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
+	switch (subcode) {
+	case 3:
+		vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+		break;
+	case 4:
+		vcpu->run->s390_reset_flags = 0;
+		break;
+	default:
+		return -ENOTSUPP;
+	}
+
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
+	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
+	vcpu->run->exit_reason = KVM_EXIT_S390_RESET;
+	VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx",
+	  vcpu->run->s390_reset_flags);
+	return -EREMOTE;
+}
+
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
+{
+	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+
+	switch (code) {
+	case 0x44:
+		return __diag_time_slice_end(vcpu);
+	case 0x308:
+		return __diag_ipl_functions(vcpu);
+	default:
+		return -ENOTSUPP;
+	}
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
new file mode 100644
index 000000000000..4e0633c413f3
--- /dev/null
+++ b/arch/s390/kvm/gaccess.h
@@ -0,0 +1,274 @@
+/*
+ * gaccess.h -  access guest memory
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#ifndef __KVM_S390_GACCESS_H
+#define __KVM_S390_GACCESS_H
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/uaccess.h>
+
+static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu,
+					       u64 guestaddr)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestaddr < 2 * PAGE_SIZE)
+		guestaddr += prefix;
+	else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE))
+		guestaddr -= prefix;
+
+	if (guestaddr > memsize)
+		return (void __user __force *) ERR_PTR(-EFAULT);
+
+	guestaddr += origin;
+
+	return (void __user *) guestaddr;
+}
+
+static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u64 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 7);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u64 __user *) uptr);
+}
+
+static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u32 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 3);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u32 __user *) uptr);
+}
+
+static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u16 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 1);
+
+	if (IS_ERR(uptr))
+		return PTR_ERR(uptr);
+
+	return get_user(*result, (u16 __user *) uptr);
+}
+
+static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+			       u8 *result)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return get_user(*result, (u8 __user *) uptr);
+}
+
+static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u64 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 7);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u64 __user *) uptr);
+}
+
+static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u32 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 3);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u32 __user *) uptr);
+}
+
+static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr,
+				u16 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	BUG_ON(guestaddr & 1);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u16 __user *) uptr);
+}
+
+static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr,
+			       u8 value)
+{
+	void __user *uptr = __guestaddr_to_user(vcpu, guestaddr);
+
+	if (IS_ERR((void __force *) uptr))
+		return PTR_ERR((void __force *) uptr);
+
+	return put_user(value, (u8 __user *) uptr);
+}
+
+
+static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest,
+				       const void *from, unsigned long n)
+{
+	int rc;
+	unsigned long i;
+	const u8 *data = from;
+
+	for (i = 0; i < n; i++) {
+		rc = put_guest_u8(vcpu, guestdest++, *(data++));
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest,
+				const void *from, unsigned long n)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if ((guestdest < prefix) && (guestdest + n > prefix))
+		goto slowpath;
+
+	if ((guestdest < prefix + 2 * PAGE_SIZE)
+	    && (guestdest + n > prefix + 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if (guestdest < 2 * PAGE_SIZE)
+		guestdest += prefix;
+	else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE))
+		guestdest -= prefix;
+
+	if (guestdest + n > memsize)
+		return -EFAULT;
+
+	if (guestdest + n < guestdest)
+		return -EFAULT;
+
+	guestdest += origin;
+
+	return copy_to_user((void __user *) guestdest, from, n);
+slowpath:
+	return __copy_to_guest_slow(vcpu, guestdest, from, n);
+}
+
+static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to,
+					 u64 guestsrc, unsigned long n)
+{
+	int rc;
+	unsigned long i;
+	u8 *data = to;
+
+	for (i = 0; i < n; i++) {
+		rc = get_guest_u8(vcpu, guestsrc++, data++);
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to,
+				  u64 guestsrc, unsigned long n)
+{
+	u64 prefix  = vcpu->arch.sie_block->prefix;
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if ((guestsrc < prefix) && (guestsrc + n > prefix))
+		goto slowpath;
+
+	if ((guestsrc < prefix + 2 * PAGE_SIZE)
+	    && (guestsrc + n > prefix + 2 * PAGE_SIZE))
+		goto slowpath;
+
+	if (guestsrc < 2 * PAGE_SIZE)
+		guestsrc += prefix;
+	else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE))
+		guestsrc -= prefix;
+
+	if (guestsrc + n > memsize)
+		return -EFAULT;
+
+	if (guestsrc + n < guestsrc)
+		return -EFAULT;
+
+	guestsrc += origin;
+
+	return copy_from_user(to, (void __user *) guestsrc, n);
+slowpath:
+	return __copy_from_guest_slow(vcpu, to, guestsrc, n);
+}
+
+static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest,
+					 const void *from, unsigned long n)
+{
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestdest + n > memsize)
+		return -EFAULT;
+
+	if (guestdest + n < guestdest)
+		return -EFAULT;
+
+	guestdest += origin;
+
+	return copy_to_user((void __user *) guestdest, from, n);
+}
+
+static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to,
+					   u64 guestsrc, unsigned long n)
+{
+	u64 origin  = vcpu->kvm->arch.guest_origin;
+	u64 memsize = vcpu->kvm->arch.guest_memsize;
+
+	if (guestsrc + n > memsize)
+		return -EFAULT;
+
+	if (guestsrc + n < guestsrc)
+		return -EFAULT;
+
+	guestsrc += origin;
+
+	return copy_from_user(to, (void __user *) guestsrc, n);
+}
+#endif
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
new file mode 100644
index 000000000000..349581a26103
--- /dev/null
+++ b/arch/s390/kvm/intercept.c
@@ -0,0 +1,216 @@
+/*
+ * intercept.c - in-kernel handling for sie intercepts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+
+#include <asm/kvm_host.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int handle_lctg(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
+			((vcpu->arch.sie_block->ipb & 0xff00) << 4);
+	u64 useraddr;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctg++;
+	if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f)
+		return -ENOTSUPP;
+
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	reg = reg1;
+
+	VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+		   disp2);
+
+	do {
+		rc = get_guest_u64(vcpu, useraddr,
+				   &vcpu->arch.sie_block->gcr[reg]);
+		if (rc == -EFAULT) {
+			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			break;
+		}
+		useraddr += 8;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	return 0;
+}
+
+static int handle_lctl(struct kvm_vcpu *vcpu)
+{
+	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 useraddr;
+	u32 val = 0;
+	int reg, rc;
+
+	vcpu->stat.instruction_lctl++;
+
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2,
+		   disp2);
+
+	reg = reg1;
+	do {
+		rc = get_guest_u32(vcpu, useraddr, &val);
+		if (rc == -EFAULT) {
+			kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+			break;
+		}
+		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
+		vcpu->arch.sie_block->gcr[reg] |= val;
+		useraddr += 4;
+		if (reg == reg3)
+			break;
+		reg = (reg + 1) % 16;
+	} while (1);
+	return 0;
+}
+
+static intercept_handler_t instruction_handlers[256] = {
+	[0x83] = kvm_s390_handle_diag,
+	[0xae] = kvm_s390_handle_sigp,
+	[0xb2] = kvm_s390_handle_priv,
+	[0xb7] = handle_lctl,
+	[0xeb] = handle_lctg,
+};
+
+static int handle_noop(struct kvm_vcpu *vcpu)
+{
+	switch (vcpu->arch.sie_block->icptcode) {
+	case 0x10:
+		vcpu->stat.exit_external_request++;
+		break;
+	case 0x14:
+		vcpu->stat.exit_external_interrupt++;
+		break;
+	default:
+		break; /* nothing */
+	}
+	return 0;
+}
+
+static int handle_stop(struct kvm_vcpu *vcpu)
+{
+	int rc;
+
+	vcpu->stat.exit_stop_request++;
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
+		vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
+		rc = __kvm_s390_vcpu_store_status(vcpu,
+						  KVM_S390_STORE_STATUS_NOADDR);
+		if (rc >= 0)
+			rc = -ENOTSUPP;
+	}
+
+	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
+		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
+		rc = -ENOTSUPP;
+	} else
+		rc = 0;
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	return rc;
+}
+
+static int handle_validity(struct kvm_vcpu *vcpu)
+{
+	int viwhy = vcpu->arch.sie_block->ipb >> 16;
+	vcpu->stat.exit_validity++;
+	if (viwhy == 0x37) {
+		fault_in_pages_writeable((char __user *)
+					 vcpu->kvm->arch.guest_origin +
+					 vcpu->arch.sie_block->prefix,
+					 PAGE_SIZE);
+		return 0;
+	}
+	VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
+		   viwhy);
+	return -ENOTSUPP;
+}
+
+static int handle_instruction(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	vcpu->stat.exit_instruction++;
+	handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8];
+	if (handler)
+		return handler(vcpu);
+	return -ENOTSUPP;
+}
+
+static int handle_prog(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.exit_program_interruption++;
+	return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc);
+}
+
+static int handle_instruction_and_prog(struct kvm_vcpu *vcpu)
+{
+	int rc, rc2;
+
+	vcpu->stat.exit_instr_and_program++;
+	rc = handle_instruction(vcpu);
+	rc2 = handle_prog(vcpu);
+
+	if (rc == -ENOTSUPP)
+		vcpu->arch.sie_block->icptcode = 0x04;
+	if (rc)
+		return rc;
+	return rc2;
+}
+
+static const intercept_handler_t intercept_funcs[0x48 >> 2] = {
+	[0x00 >> 2] = handle_noop,
+	[0x04 >> 2] = handle_instruction,
+	[0x08 >> 2] = handle_prog,
+	[0x0C >> 2] = handle_instruction_and_prog,
+	[0x10 >> 2] = handle_noop,
+	[0x14 >> 2] = handle_noop,
+	[0x1C >> 2] = kvm_s390_handle_wait,
+	[0x20 >> 2] = handle_validity,
+	[0x28 >> 2] = handle_stop,
+};
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t func;
+	u8 code = vcpu->arch.sie_block->icptcode;
+
+	if (code & 3 || code > 0x48)
+		return -ENOTSUPP;
+	func = intercept_funcs[code >> 2];
+	if (func)
+		return func(vcpu);
+	return -ENOTSUPP;
+}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
new file mode 100644
index 000000000000..fcd1ed8015c1
--- /dev/null
+++ b/arch/s390/kvm/interrupt.c
@@ -0,0 +1,592 @@
+/*
+ * interrupt.c - handling kvm guest interrupts
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ */
+
+#include <asm/lowcore.h>
+#include <asm/uaccess.h>
+#include <linux/kvm_host.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+static int psw_extint_disabled(struct kvm_vcpu *vcpu)
+{
+	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
+}
+
+static int psw_interrupts_disabled(struct kvm_vcpu *vcpu)
+{
+	if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) ||
+	    (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT))
+		return 0;
+	return 1;
+}
+
+static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+				      struct interrupt_info *inti)
+{
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
+			return 1;
+		return 0;
+	case KVM_S390_INT_SERVICE:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+			return 1;
+		return 0;
+	case KVM_S390_INT_VIRTIO:
+		if (psw_extint_disabled(vcpu))
+			return 0;
+		if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
+			return 1;
+		return 0;
+	case KVM_S390_PROGRAM_INT:
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_SIGP_SET_PREFIX:
+	case KVM_S390_RESTART:
+		return 1;
+	default:
+		BUG();
+	}
+	return 0;
+}
+
+static void __set_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __unset_cpu_idle(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1);
+	atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
+	clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask);
+}
+
+static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
+{
+	atomic_clear_mask(CPUSTAT_ECALL_PEND |
+		CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT,
+		&vcpu->arch.sie_block->cpuflags);
+	vcpu->arch.sie_block->lctl = 0x0000;
+}
+
+static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
+{
+	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
+}
+
+static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
+				      struct interrupt_info *inti)
+{
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+	case KVM_S390_INT_SERVICE:
+	case KVM_S390_INT_VIRTIO:
+		if (psw_extint_disabled(vcpu))
+			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+		else
+			vcpu->arch.sie_block->lctl |= LCTL_CR0;
+		break;
+	case KVM_S390_SIGP_STOP:
+		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+		break;
+	default:
+		BUG();
+	}
+}
+
+static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+				   struct interrupt_info *inti)
+{
+	const unsigned short table[] = { 2, 4, 4, 6 };
+	int rc, exception = 0;
+
+	switch (inti->type) {
+	case KVM_S390_INT_EMERGENCY:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+		vcpu->stat.deliver_emergency_signal++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_INT_SERVICE:
+		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+			   inti->ext.ext_params);
+		vcpu->stat.deliver_service_signal++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_INT_VIRTIO:
+		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx",
+			   inti->ext.ext_params, inti->ext.ext_params2);
+		vcpu->stat.deliver_virtio_interrupt++;
+		rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_EXT_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM,
+			inti->ext.ext_params2);
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_SIGP_STOP:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+		vcpu->stat.deliver_stop_signal++;
+		__set_intercept_indicator(vcpu, inti);
+		break;
+
+	case KVM_S390_SIGP_SET_PREFIX:
+		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
+			   inti->prefix.address);
+		vcpu->stat.deliver_prefix_signal++;
+		vcpu->arch.sie_block->prefix = inti->prefix.address;
+		vcpu->arch.sie_block->ihcpu = 0xffff;
+		break;
+
+	case KVM_S390_RESTART:
+		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+		vcpu->stat.deliver_restart_signal++;
+		rc = copy_to_guest(vcpu, offsetof(struct _lowcore,
+		  restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	case KVM_S390_PROGRAM_INT:
+		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+			   inti->pgm.code,
+			   table[vcpu->arch.sie_block->ipa >> 14]);
+		vcpu->stat.deliver_program_int++;
+		rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = put_guest_u16(vcpu, __LC_PGM_ILC,
+			table[vcpu->arch.sie_block->ipa >> 14]);
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW,
+			 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+
+		rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+			__LC_PGM_NEW_PSW, sizeof(psw_t));
+		if (rc == -EFAULT)
+			exception = 1;
+		break;
+
+	default:
+		BUG();
+	}
+
+	if (exception) {
+		VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering"
+			   " interrupt");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		if (inti->type == KVM_S390_PROGRAM_INT) {
+			printk(KERN_WARNING "kvm: recursive program check\n");
+			BUG();
+		}
+	}
+}
+
+static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+{
+	int rc, exception = 0;
+
+	if (psw_extint_disabled(vcpu))
+		return 0;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		return 0;
+	rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004);
+	if (rc == -EFAULT)
+		exception = 1;
+	rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW,
+		 &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	if (rc == -EFAULT)
+		exception = 1;
+	rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw,
+		__LC_EXT_NEW_PSW, sizeof(psw_t));
+	if (rc == -EFAULT)
+		exception = 1;
+
+	if (exception) {
+		VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \
+			   " ckc interrupt");
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		return 0;
+	}
+
+	return 1;
+}
+
+int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct interrupt_info  *inti;
+	int rc = 0;
+
+	if (atomic_read(&li->active)) {
+		spin_lock_bh(&li->lock);
+		list_for_each_entry(inti, &li->list, list)
+			if (__interrupt_is_deliverable(vcpu, inti)) {
+				rc = 1;
+				break;
+			}
+		spin_unlock_bh(&li->lock);
+	}
+
+	if ((!rc) && atomic_read(&fi->active)) {
+		spin_lock_bh(&fi->lock);
+		list_for_each_entry(inti, &fi->list, list)
+			if (__interrupt_is_deliverable(vcpu, inti)) {
+				rc = 1;
+				break;
+			}
+		spin_unlock_bh(&fi->lock);
+	}
+
+	if ((!rc) && (vcpu->arch.sie_block->ckc <
+		get_clock() + vcpu->arch.sie_block->epoch)) {
+		if ((!psw_extint_disabled(vcpu)) &&
+			(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+			rc = 1;
+	}
+
+	return rc;
+}
+
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
+{
+	u64 now, sltime;
+	DECLARE_WAITQUEUE(wait, current);
+
+	vcpu->stat.exit_wait_state++;
+	if (kvm_cpu_has_interrupt(vcpu))
+		return 0;
+
+	if (psw_interrupts_disabled(vcpu)) {
+		VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
+		__unset_cpu_idle(vcpu);
+		return -ENOTSUPP; /* disabled wait */
+	}
+
+	if (psw_extint_disabled(vcpu) ||
+	    (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) {
+		VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
+		goto no_timer;
+	}
+
+	now = get_clock() + vcpu->arch.sie_block->epoch;
+	if (vcpu->arch.sie_block->ckc < now) {
+		__unset_cpu_idle(vcpu);
+		return 0;
+	}
+
+	sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
+
+	vcpu->arch.ckc_timer.expires = jiffies + sltime;
+
+	add_timer(&vcpu->arch.ckc_timer);
+	VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime);
+no_timer:
+	spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	__set_cpu_idle(vcpu);
+	vcpu->arch.local_int.timer_due = 0;
+	add_wait_queue(&vcpu->arch.local_int.wq, &wait);
+	while (list_empty(&vcpu->arch.local_int.list) &&
+		list_empty(&vcpu->arch.local_int.float_int->list) &&
+		(!vcpu->arch.local_int.timer_due) &&
+		!signal_pending(current)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+		spin_unlock_bh(&vcpu->arch.local_int.lock);
+		spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+		vcpu_put(vcpu);
+		schedule();
+		vcpu_load(vcpu);
+		spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
+		spin_lock_bh(&vcpu->arch.local_int.lock);
+	}
+	__unset_cpu_idle(vcpu);
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(&vcpu->wq, &wait);
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+	spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
+	del_timer(&vcpu->arch.ckc_timer);
+	return 0;
+}
+
+void kvm_s390_idle_wakeup(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+
+	spin_lock_bh(&vcpu->arch.local_int.lock);
+	vcpu->arch.local_int.timer_due = 1;
+	if (waitqueue_active(&vcpu->arch.local_int.wq))
+		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&vcpu->arch.local_int.lock);
+}
+
+
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct float_interrupt *fi = vcpu->arch.local_int.float_int;
+	struct interrupt_info  *n, *inti = NULL;
+	int deliver;
+
+	__reset_intercept_indicators(vcpu);
+	if (atomic_read(&li->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&li->lock);
+			list_for_each_entry_safe(inti, n, &li->list, list) {
+				if (__interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&li->list))
+				atomic_set(&li->active, 0);
+			spin_unlock_bh(&li->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+
+	if ((vcpu->arch.sie_block->ckc <
+		get_clock() + vcpu->arch.sie_block->epoch))
+		__try_deliver_ckc_interrupt(vcpu);
+
+	if (atomic_read(&fi->active)) {
+		do {
+			deliver = 0;
+			spin_lock_bh(&fi->lock);
+			list_for_each_entry_safe(inti, n, &fi->list, list) {
+				if (__interrupt_is_deliverable(vcpu, inti)) {
+					list_del(&inti->list);
+					deliver = 1;
+					break;
+				}
+				__set_intercept_indicator(vcpu, inti);
+			}
+			if (list_empty(&fi->list))
+				atomic_set(&fi->active, 0);
+			spin_unlock_bh(&fi->lock);
+			if (deliver) {
+				__do_deliver_interrupt(vcpu, inti);
+				kfree(inti);
+			}
+		} while (deliver);
+	}
+}
+
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
+{
+	struct local_interrupt *li = &vcpu->arch.local_int;
+	struct interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_PROGRAM_INT;;
+	inti->pgm.code = code;
+
+	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
+	spin_lock_bh(&li->lock);
+	list_add(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	BUG_ON(waitqueue_active(&li->wq));
+	spin_unlock_bh(&li->lock);
+	return 0;
+}
+
+int kvm_s390_inject_vm(struct kvm *kvm,
+		       struct kvm_s390_interrupt *s390int)
+{
+	struct local_interrupt *li;
+	struct float_interrupt *fi;
+	struct interrupt_info *inti;
+	int sigcpu;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	switch (s390int->type) {
+	case KVM_S390_INT_VIRTIO:
+		VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx",
+			 s390int->parm, s390int->parm64);
+		inti->type = s390int->type;
+		inti->ext.ext_params = s390int->parm;
+		inti->ext.ext_params2 = s390int->parm64;
+		break;
+	case KVM_S390_INT_SERVICE:
+		VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm);
+		inti->type = s390int->type;
+		inti->ext.ext_params = s390int->parm;
+		break;
+	case KVM_S390_PROGRAM_INT:
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_INT_EMERGENCY:
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+
+	mutex_lock(&kvm->lock);
+	fi = &kvm->arch.float_int;
+	spin_lock_bh(&fi->lock);
+	list_add_tail(&inti->list, &fi->list);
+	atomic_set(&fi->active, 1);
+	sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
+	if (sigcpu == KVM_MAX_VCPUS) {
+		do {
+			sigcpu = fi->next_rr_cpu++;
+			if (sigcpu == KVM_MAX_VCPUS)
+				sigcpu = fi->next_rr_cpu = 0;
+		} while (fi->local_int[sigcpu] == NULL);
+	}
+	li = fi->local_int[sigcpu];
+	spin_lock_bh(&li->lock);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	spin_unlock_bh(&fi->lock);
+	mutex_unlock(&kvm->lock);
+	return 0;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+			 struct kvm_s390_interrupt *s390int)
+{
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	switch (s390int->type) {
+	case KVM_S390_PROGRAM_INT:
+		if (s390int->parm & 0xffff0000) {
+			kfree(inti);
+			return -EINVAL;
+		}
+		inti->type = s390int->type;
+		inti->pgm.code = s390int->parm;
+		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+			   s390int->parm);
+		break;
+	case KVM_S390_SIGP_STOP:
+	case KVM_S390_RESTART:
+	case KVM_S390_SIGP_SET_PREFIX:
+	case KVM_S390_INT_EMERGENCY:
+		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
+		inti->type = s390int->type;
+		break;
+	case KVM_S390_INT_VIRTIO:
+	case KVM_S390_INT_SERVICE:
+	default:
+		kfree(inti);
+		return -EINVAL;
+	}
+
+	mutex_lock(&vcpu->kvm->lock);
+	li = &vcpu->arch.local_int;
+	spin_lock_bh(&li->lock);
+	if (inti->type == KVM_S390_PROGRAM_INT)
+		list_add(&inti->list, &li->list);
+	else
+		list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	if (inti->type == KVM_S390_SIGP_STOP)
+		li->action_bits |= ACTION_STOP_ON_STOP;
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&vcpu->arch.local_int.wq);
+	spin_unlock_bh(&li->lock);
+	mutex_unlock(&vcpu->kvm->lock);
+	return 0;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
new file mode 100644
index 000000000000..98d1e73e01f1
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.c
@@ -0,0 +1,685 @@
+/*
+ * s390host.c --  hosting zSeries kernel virtual machines
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ *               Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <asm/lowcore.h>
+#include <asm/pgtable.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+	{ "userspace_handled", VCPU_STAT(exit_userspace) },
+	{ "exit_validity", VCPU_STAT(exit_validity) },
+	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
+	{ "exit_external_request", VCPU_STAT(exit_external_request) },
+	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
+	{ "exit_instruction", VCPU_STAT(exit_instruction) },
+	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
+	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+	{ "instruction_lctg", VCPU_STAT(instruction_lctg) },
+	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
+	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
+	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
+	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
+	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
+	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
+	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
+	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
+	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
+	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
+	{ "instruction_spx", VCPU_STAT(instruction_spx) },
+	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
+	{ "instruction_stap", VCPU_STAT(instruction_stap) },
+	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
+	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
+	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
+	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
+	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
+	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
+	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "diagnose_44", VCPU_STAT(diagnose_44) },
+	{ NULL }
+};
+
+
+/* Section: not file related */
+void kvm_arch_hardware_enable(void *garbage)
+{
+	/* every s390 is virtualization enabled ;-) */
+}
+
+void kvm_arch_hardware_disable(void *garbage)
+{
+}
+
+void decache_vcpus_on_cpu(int cpu)
+{
+}
+
+int kvm_arch_hardware_setup(void)
+{
+	return 0;
+}
+
+void kvm_arch_hardware_unsetup(void)
+{
+}
+
+void kvm_arch_check_processor_compat(void *rtn)
+{
+}
+
+int kvm_arch_init(void *opaque)
+{
+	return 0;
+}
+
+void kvm_arch_exit(void)
+{
+}
+
+/* Section: device related */
+long kvm_arch_dev_ioctl(struct file *filp,
+			unsigned int ioctl, unsigned long arg)
+{
+	if (ioctl == KVM_S390_ENABLE_SIE)
+		return s390_enable_sie();
+	return -EINVAL;
+}
+
+int kvm_dev_ioctl_check_extension(long ext)
+{
+	return 0;
+}
+
+/* Section: vm related */
+/*
+ * Get (and clear) the dirty memory log for a memory slot.
+ */
+int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
+			       struct kvm_dirty_log *log)
+{
+	return 0;
+}
+
+long kvm_arch_vm_ioctl(struct file *filp,
+		       unsigned int ioctl, unsigned long arg)
+{
+	struct kvm *kvm = filp->private_data;
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			break;
+		r = kvm_s390_inject_vm(kvm, &s390int);
+		break;
+	}
+	default:
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+struct kvm *kvm_arch_create_vm(void)
+{
+	struct kvm *kvm;
+	int rc;
+	char debug_name[16];
+
+	rc = s390_enable_sie();
+	if (rc)
+		goto out_nokvm;
+
+	rc = -ENOMEM;
+	kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL);
+	if (!kvm)
+		goto out_nokvm;
+
+	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+	if (!kvm->arch.sca)
+		goto out_nosca;
+
+	sprintf(debug_name, "kvm-%u", current->pid);
+
+	kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
+	if (!kvm->arch.dbf)
+		goto out_nodbf;
+
+	spin_lock_init(&kvm->arch.float_int.lock);
+	INIT_LIST_HEAD(&kvm->arch.float_int.list);
+
+	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
+	VM_EVENT(kvm, 3, "%s", "vm created");
+
+	try_module_get(THIS_MODULE);
+
+	return kvm;
+out_nodbf:
+	free_page((unsigned long)(kvm->arch.sca));
+out_nosca:
+	kfree(kvm);
+out_nokvm:
+	return ERR_PTR(rc);
+}
+
+void kvm_arch_destroy_vm(struct kvm *kvm)
+{
+	debug_unregister(kvm->arch.dbf);
+	free_page((unsigned long)(kvm->arch.sca));
+	kfree(kvm);
+	module_put(THIS_MODULE);
+}
+
+/* Section: vcpu related */
+int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but does'nt call it */
+	BUG();
+}
+
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+	save_fp_regs(&vcpu->arch.host_fpregs);
+	save_access_regs(vcpu->arch.host_acrs);
+	vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
+	restore_fp_regs(&vcpu->arch.guest_fpregs);
+	restore_access_regs(vcpu->arch.guest_acrs);
+
+	if (signal_pending(current))
+		atomic_set_mask(CPUSTAT_STOP_INT,
+			&vcpu->arch.sie_block->cpuflags);
+}
+
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+{
+	save_fp_regs(&vcpu->arch.guest_fpregs);
+	save_access_regs(vcpu->arch.guest_acrs);
+	restore_fp_regs(&vcpu->arch.host_fpregs);
+	restore_access_regs(vcpu->arch.host_acrs);
+}
+
+static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
+{
+	/* this equals initial cpu reset in pop, but we don't switch to ESA */
+	vcpu->arch.sie_block->gpsw.mask = 0UL;
+	vcpu->arch.sie_block->gpsw.addr = 0UL;
+	vcpu->arch.sie_block->prefix    = 0UL;
+	vcpu->arch.sie_block->ihcpu     = 0xffff;
+	vcpu->arch.sie_block->cputm     = 0UL;
+	vcpu->arch.sie_block->ckc       = 0UL;
+	vcpu->arch.sie_block->todpr     = 0;
+	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
+	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
+	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
+	vcpu->arch.guest_fpregs.fpc = 0;
+	asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
+	vcpu->arch.sie_block->gbea = 1;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH);
+	vcpu->arch.sie_block->gmslm = 0xffffffffffUL;
+	vcpu->arch.sie_block->gmsor = 0x000000000000;
+	vcpu->arch.sie_block->ecb   = 2;
+	vcpu->arch.sie_block->eca   = 0xC1002001U;
+	setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
+		 (unsigned long) vcpu);
+	get_cpu_id(&vcpu->arch.cpu_id);
+	vcpu->arch.cpu_id.version = 0xfe;
+	return 0;
+}
+
+struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
+				      unsigned int id)
+{
+	struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL);
+	int rc = -ENOMEM;
+
+	if (!vcpu)
+		goto out_nomem;
+
+	vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL);
+
+	if (!vcpu->arch.sie_block)
+		goto out_free_cpu;
+
+	vcpu->arch.sie_block->icpua = id;
+	BUG_ON(!kvm->arch.sca);
+	BUG_ON(kvm->arch.sca->cpu[id].sda);
+	kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
+	vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
+
+	spin_lock_init(&vcpu->arch.local_int.lock);
+	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
+	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
+	spin_lock_bh(&kvm->arch.float_int.lock);
+	kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
+	init_waitqueue_head(&vcpu->arch.local_int.wq);
+	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
+	spin_unlock_bh(&kvm->arch.float_int.lock);
+
+	rc = kvm_vcpu_init(vcpu, kvm, id);
+	if (rc)
+		goto out_free_cpu;
+	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+		 vcpu->arch.sie_block);
+
+	try_module_get(THIS_MODULE);
+
+	return vcpu;
+out_free_cpu:
+	kfree(vcpu);
+out_nomem:
+	return ERR_PTR(rc);
+}
+
+void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 3, "%s", "destroy cpu");
+	free_page((unsigned long)(vcpu->arch.sie_block));
+	kfree(vcpu);
+	module_put(THIS_MODULE);
+}
+
+int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
+{
+	/* kvm common code refers to this, but never calls it */
+	BUG();
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
+{
+	vcpu_load(vcpu);
+	kvm_s390_vcpu_initial_reset(vcpu);
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_gprs, &regs->gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+	vcpu_load(vcpu);
+	memcpy(&regs->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs));
+	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+				  struct kvm_sregs *sregs)
+{
+	vcpu_load(vcpu);
+	memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs));
+	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+	memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs));
+	vcpu->arch.guest_fpregs.fpc = fpu->fpc;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+	vcpu_load(vcpu);
+	memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs));
+	fpu->fpc = vcpu->arch.guest_fpregs.fpc;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
+{
+	int rc = 0;
+
+	vcpu_load(vcpu);
+	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+		rc = -EBUSY;
+	else
+		vcpu->arch.sie_block->gpsw = psw;
+	vcpu_put(vcpu);
+	return rc;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+				  struct kvm_translation *tr)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu,
+				    struct kvm_debug_guest *dbg)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	return -EINVAL; /* not implemented yet */
+}
+
+static void __vcpu_run(struct kvm_vcpu *vcpu)
+{
+	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16);
+
+	if (need_resched())
+		schedule();
+
+	vcpu->arch.sie_block->icptcode = 0;
+	local_irq_disable();
+	kvm_guest_enter();
+	local_irq_enable();
+	VCPU_EVENT(vcpu, 6, "entering sie flags %x",
+		   atomic_read(&vcpu->arch.sie_block->cpuflags));
+	sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs);
+	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
+		   vcpu->arch.sie_block->icptcode);
+	local_irq_disable();
+	kvm_guest_exit();
+	local_irq_enable();
+
+	memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16);
+}
+
+int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	int rc;
+	sigset_t sigsaved;
+
+	vcpu_load(vcpu);
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
+	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+
+	BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
+
+	switch (kvm_run->exit_reason) {
+	case KVM_EXIT_S390_SIEIC:
+		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
+		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
+		break;
+	case KVM_EXIT_UNKNOWN:
+	case KVM_EXIT_S390_RESET:
+		break;
+	default:
+		BUG();
+	}
+
+	might_sleep();
+
+	do {
+		kvm_s390_deliver_pending_interrupts(vcpu);
+		__vcpu_run(vcpu);
+		rc = kvm_handle_sie_intercept(vcpu);
+	} while (!signal_pending(current) && !rc);
+
+	if (signal_pending(current) && !rc)
+		rc = -EINTR;
+
+	if (rc == -ENOTSUPP) {
+		/* intercept cannot be handled in-kernel, prepare kvm-run */
+		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
+		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
+		kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
+		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
+		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
+		rc = 0;
+	}
+
+	if (rc == -EREMOTE) {
+		/* intercept was handled, but userspace support is needed
+		 * kvm_run has been prepared by the handler */
+		rc = 0;
+	}
+
+	if (vcpu->sigset_active)
+		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+
+	vcpu_put(vcpu);
+
+	vcpu->stat.exit_userspace++;
+	return rc;
+}
+
+static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from,
+		       unsigned long n, int prefix)
+{
+	if (prefix)
+		return copy_to_guest(vcpu, guestdest, from, n);
+	else
+		return copy_to_guest_absolute(vcpu, guestdest, from, n);
+}
+
+/*
+ * store status at address
+ * we use have two special cases:
+ * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
+ * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
+ */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	const unsigned char archmode = 1;
+	int prefix;
+
+	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
+		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
+			return -EFAULT;
+		addr = SAVE_AREA_BASE;
+		prefix = 0;
+	} else if (addr == KVM_S390_STORE_STATUS_PREFIXED) {
+		if (copy_to_guest(vcpu, 163ul, &archmode, 1))
+			return -EFAULT;
+		addr = SAVE_AREA_BASE;
+		prefix = 1;
+	} else
+		prefix = 0;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs),
+			vcpu->arch.guest_fpregs.fprs, 128, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs),
+			vcpu->arch.guest_gprs, 128, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw),
+			&vcpu->arch.sie_block->gpsw, 16, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg),
+			&vcpu->arch.sie_block->prefix, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu,
+			addr + offsetof(struct save_area_s390x, fp_ctrl_reg),
+			&vcpu->arch.guest_fpregs.fpc, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg),
+			&vcpu->arch.sie_block->todpr, 4, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer),
+			&vcpu->arch.sie_block->cputm, 8, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp),
+			&vcpu->arch.sie_block->ckc, 8, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs),
+			&vcpu->arch.guest_acrs, 64, prefix))
+		return -EFAULT;
+
+	if (__guestcopy(vcpu,
+			addr + offsetof(struct save_area_s390x, ctrl_regs),
+			&vcpu->arch.sie_block->gcr, 128, prefix))
+		return -EFAULT;
+	return 0;
+}
+
+static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	int rc;
+
+	vcpu_load(vcpu);
+	rc = __kvm_s390_vcpu_store_status(vcpu, addr);
+	vcpu_put(vcpu);
+	return rc;
+}
+
+long kvm_arch_vcpu_ioctl(struct file *filp,
+			 unsigned int ioctl, unsigned long arg)
+{
+	struct kvm_vcpu *vcpu = filp->private_data;
+	void __user *argp = (void __user *)arg;
+
+	switch (ioctl) {
+	case KVM_S390_INTERRUPT: {
+		struct kvm_s390_interrupt s390int;
+
+		if (copy_from_user(&s390int, argp, sizeof(s390int)))
+			return -EFAULT;
+		return kvm_s390_inject_vcpu(vcpu, &s390int);
+	}
+	case KVM_S390_STORE_STATUS:
+		return kvm_s390_vcpu_store_status(vcpu, arg);
+	case KVM_S390_SET_INITIAL_PSW: {
+		psw_t psw;
+
+		if (copy_from_user(&psw, argp, sizeof(psw)))
+			return -EFAULT;
+		return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
+	}
+	case KVM_S390_INITIAL_RESET:
+		return kvm_arch_vcpu_ioctl_initial_reset(vcpu);
+	default:
+		;
+	}
+	return -EINVAL;
+}
+
+/* Section: memory related */
+int kvm_arch_set_memory_region(struct kvm *kvm,
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old,
+				int user_alloc)
+{
+	/* A few sanity checks. We can have exactly one memory slot which has
+	   to start at guest virtual zero and which has to be located at a
+	   page boundary in userland and which has to end at a page boundary.
+	   The memory in userland is ok to be fragmented into various different
+	   vmas. It is okay to mmap() and munmap() stuff in this slot after
+	   doing this call at any time */
+
+	if (mem->slot)
+		return -EINVAL;
+
+	if (mem->guest_phys_addr)
+		return -EINVAL;
+
+	if (mem->userspace_addr & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	if (mem->memory_size & (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	kvm->arch.guest_origin = mem->userspace_addr;
+	kvm->arch.guest_memsize = mem->memory_size;
+
+	/* FIXME: we do want to interrupt running CPUs and update their memory
+	   configuration now to avoid race conditions. But hey, changing the
+	   memory layout while virtual CPUs are running is usually bad
+	   programming practice. */
+
+	return 0;
+}
+
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+{
+	return gfn;
+}
+
+static int __init kvm_s390_init(void)
+{
+	return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE);
+}
+
+static void __exit kvm_s390_exit(void)
+{
+	kvm_exit();
+}
+
+module_init(kvm_s390_init);
+module_exit(kvm_s390_exit);
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
new file mode 100644
index 000000000000..3893cf12eacf
--- /dev/null
+++ b/arch/s390/kvm/kvm-s390.h
@@ -0,0 +1,64 @@
+/*
+ * kvm_s390.h -  definition for kvm on s390
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#ifndef ARCH_S390_KVM_S390_H
+#define ARCH_S390_KVM_S390_H
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+
+typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
+
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
+#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \
+	  d_args); \
+} while (0)
+
+#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\
+do { \
+	debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \
+	  "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \
+	  d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\
+	  d_args); \
+} while (0)
+
+static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+{
+	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+}
+
+int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_idle_wakeup(unsigned long data);
+void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
+int kvm_s390_inject_vm(struct kvm *kvm,
+		struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
+		struct kvm_s390_interrupt *s390int);
+int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
+
+/* implemented in priv.c */
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu);
+
+/* implemented in sigp.c */
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
+
+/* implemented in kvm-s390.c */
+int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
+				 unsigned long addr);
+/* implemented in diag.c */
+int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
new file mode 100644
index 000000000000..1465946325c5
--- /dev/null
+++ b/arch/s390/kvm/priv.c
@@ -0,0 +1,323 @@
+/*
+ * priv.c - handling privileged instructions
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/errno.h>
+#include <asm/current.h>
+#include <asm/debug.h>
+#include <asm/ebcdic.h>
+#include <asm/sysinfo.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+static int handle_set_prefix(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	u32 address = 0;
+	u8 tmp;
+
+	vcpu->stat.instruction_spx++;
+
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	/* must be word boundary */
+	if (operand2 & 3) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	/* get the value */
+	if (get_guest_u32(vcpu, operand2, &address)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	address = address & 0x7fffe000u;
+
+	/* make sure that the new value is valid memory */
+	if (copy_from_guest_absolute(vcpu, &tmp, address, 1) ||
+	   (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	vcpu->arch.sie_block->prefix = address;
+	vcpu->arch.sie_block->ihcpu = 0xffff;
+
+	VCPU_EVENT(vcpu, 5, "setting prefix to %x", address);
+out:
+	return 0;
+}
+
+static int handle_store_prefix(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	u32 address;
+
+	vcpu->stat.instruction_stpx++;
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	/* must be word boundary */
+	if (operand2 & 3) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	address = vcpu->arch.sie_block->prefix;
+	address = address & 0x7fffe000u;
+
+	/* get the value */
+	if (put_guest_u32(vcpu, operand2, address)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "storing prefix to %x", address);
+out:
+	return 0;
+}
+
+static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 useraddr;
+	int rc;
+
+	vcpu->stat.instruction_stap++;
+	useraddr = disp2;
+	if (base2)
+		useraddr += vcpu->arch.guest_gprs[base2];
+
+	if (useraddr & 1) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id);
+	if (rc == -EFAULT) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr);
+out:
+	return 0;
+}
+
+static int handle_skey(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_storage_key++;
+	vcpu->arch.sie_block->gpsw.addr -= 4;
+	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+	return 0;
+}
+
+static int handle_stsch(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_stsch++;
+	VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3");
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	return 0;
+}
+
+static int handle_chsc(struct kvm_vcpu *vcpu)
+{
+	vcpu->stat.instruction_chsc++;
+	VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3");
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
+	return 0;
+}
+
+static unsigned int kvm_stfl(void)
+{
+	asm volatile(
+		"	.insn	s,0xb2b10000,0(0)\n" /* stfl */
+		"0:\n"
+		EX_TABLE(0b, 0b));
+	return S390_lowcore.stfl_fac_list;
+}
+
+static int handle_stfl(struct kvm_vcpu *vcpu)
+{
+	unsigned int facility_list = kvm_stfl();
+	int rc;
+
+	vcpu->stat.instruction_stfl++;
+	facility_list &= ~(1UL<<24); /* no stfle */
+
+	rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
+			   &facility_list, sizeof(facility_list));
+	if (rc == -EFAULT)
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	else
+		VCPU_EVENT(vcpu, 5, "store facility list value %x",
+			   facility_list);
+	return 0;
+}
+
+static int handle_stidp(struct kvm_vcpu *vcpu)
+{
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	int rc;
+
+	vcpu->stat.instruction_stidp++;
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	if (operand2 & 7) {
+		kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+		goto out;
+	}
+
+	rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data);
+	if (rc == -EFAULT) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out;
+	}
+
+	VCPU_EVENT(vcpu, 5, "%s", "store cpu id");
+out:
+	return 0;
+}
+
+static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	int cpus = 0;
+	int n;
+
+	spin_lock_bh(&fi->lock);
+	for (n = 0; n < KVM_MAX_VCPUS; n++)
+		if (fi->local_int[n])
+			cpus++;
+	spin_unlock_bh(&fi->lock);
+
+	/* deal with other level 3 hypervisors */
+	if (stsi(mem, 3, 2, 2) == -ENOSYS)
+		mem->count = 0;
+	if (mem->count < 8)
+		mem->count++;
+	for (n = mem->count - 1; n > 0 ; n--)
+		memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+
+	mem->vm[0].cpus_total = cpus;
+	mem->vm[0].cpus_configured = cpus;
+	mem->vm[0].cpus_standby = 0;
+	mem->vm[0].cpus_reserved = 0;
+	mem->vm[0].caf = 1000;
+	memcpy(mem->vm[0].name, "KVMguest", 8);
+	ASCEBC(mem->vm[0].name, 8);
+	memcpy(mem->vm[0].cpi, "KVM/Linux       ", 16);
+	ASCEBC(mem->vm[0].cpi, 16);
+}
+
+static int handle_stsi(struct kvm_vcpu *vcpu)
+{
+	int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28;
+	int sel1 = vcpu->arch.guest_gprs[0] & 0xff;
+	int sel2 = vcpu->arch.guest_gprs[1] & 0xffff;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u64 operand2;
+	unsigned long mem;
+
+	vcpu->stat.instruction_stsi++;
+	VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
+
+	operand2 = disp2;
+	if (base2)
+		operand2 += vcpu->arch.guest_gprs[base2];
+
+	if (operand2 & 0xfff && fc > 0)
+		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+	switch (fc) {
+	case 0:
+		vcpu->arch.guest_gprs[0] = 3 << 28;
+		vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+		return 0;
+	case 1: /* same handling for 1 and 2 */
+	case 2:
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_fail;
+		if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS)
+			goto out_mem;
+		break;
+	case 3:
+		if (sel1 != 2 || sel2 != 2)
+			goto out_fail;
+		mem = get_zeroed_page(GFP_KERNEL);
+		if (!mem)
+			goto out_fail;
+		handle_stsi_3_2_2(vcpu, (void *) mem);
+		break;
+	default:
+		goto out_fail;
+	}
+
+	if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) {
+		kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+		goto out_mem;
+	}
+	free_page(mem);
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.guest_gprs[0] = 0;
+	return 0;
+out_mem:
+	free_page(mem);
+out_fail:
+	/* condition code 3 */
+	vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
+	return 0;
+}
+
+static intercept_handler_t priv_handlers[256] = {
+	[0x02] = handle_stidp,
+	[0x10] = handle_set_prefix,
+	[0x11] = handle_store_prefix,
+	[0x12] = handle_store_cpu_address,
+	[0x29] = handle_skey,
+	[0x2a] = handle_skey,
+	[0x2b] = handle_skey,
+	[0x34] = handle_stsch,
+	[0x5f] = handle_chsc,
+	[0x7d] = handle_stsi,
+	[0xb1] = handle_stfl,
+};
+
+int kvm_s390_handle_priv(struct kvm_vcpu *vcpu)
+{
+	intercept_handler_t handler;
+
+	handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff];
+	if (handler)
+		return handler(vcpu);
+	return -ENOTSUPP;
+}
diff --git a/arch/s390/kvm/sie64a.S b/arch/s390/kvm/sie64a.S
new file mode 100644
index 000000000000..934fd6a885f6
--- /dev/null
+++ b/arch/s390/kvm/sie64a.S
@@ -0,0 +1,47 @@
+/*
+ * sie64a.S - low level sie call
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/errno.h>
+#include <asm/asm-offsets.h>
+
+SP_R5 =	5 * 8	# offset into stackframe
+SP_R6 =	6 * 8
+
+/*
+ * sie64a calling convention:
+ * %r2 pointer to sie control block
+ * %r3 guest register save area
+ */
+	.globl	sie64a
+sie64a:
+	lgr	%r5,%r3
+	stmg	%r5,%r14,SP_R5(%r15)	# save register on entry
+	lgr	%r14,%r2		# pointer to sie control block
+	lmg	%r0,%r13,0(%r3)		# load guest gprs 0-13
+sie_inst:
+	sie	0(%r14)
+	lg	%r14,SP_R5(%r15)
+	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	lghi	%r2,0
+	lmg	%r6,%r14,SP_R6(%r15)
+	br	%r14
+
+sie_err:
+	lg	%r14,SP_R5(%r15)
+	stmg	%r0,%r13,0(%r14)	# save guest gprs 0-13
+	lghi	%r2,-EFAULT
+	lmg	%r6,%r14,SP_R6(%r15)
+	br	%r14
+
+	.section __ex_table,"a"
+	.quad	sie_inst,sie_err
+	.previous
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
new file mode 100644
index 000000000000..0a236acfb5f6
--- /dev/null
+++ b/arch/s390/kvm/sigp.c
@@ -0,0 +1,288 @@
+/*
+ * sigp.c - handlinge interprocessor communication
+ *
+ * Copyright IBM Corp. 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ *    Author(s): Carsten Otte <cotte@de.ibm.com>
+ *               Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include "gaccess.h"
+#include "kvm-s390.h"
+
+/* sigp order codes */
+#define SIGP_SENSE             0x01
+#define SIGP_EXTERNAL_CALL     0x02
+#define SIGP_EMERGENCY         0x03
+#define SIGP_START             0x04
+#define SIGP_STOP              0x05
+#define SIGP_RESTART           0x06
+#define SIGP_STOP_STORE_STATUS 0x09
+#define SIGP_INITIAL_CPU_RESET 0x0b
+#define SIGP_CPU_RESET         0x0c
+#define SIGP_SET_PREFIX        0x0d
+#define SIGP_STORE_STATUS_ADDR 0x0e
+#define SIGP_SET_ARCH          0x12
+
+/* cpu status bits */
+#define SIGP_STAT_EQUIPMENT_CHECK   0x80000000UL
+#define SIGP_STAT_INCORRECT_STATE   0x00000200UL
+#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
+#define SIGP_STAT_EXT_CALL_PENDING  0x00000080UL
+#define SIGP_STAT_STOPPED           0x00000040UL
+#define SIGP_STAT_OPERATOR_INTERV   0x00000020UL
+#define SIGP_STAT_CHECK_STOP        0x00000010UL
+#define SIGP_STAT_INOPERATIVE       0x00000004UL
+#define SIGP_STAT_INVALID_ORDER     0x00000002UL
+#define SIGP_STAT_RECEIVER_CHECK    0x00000001UL
+
+
+static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	spin_lock_bh(&fi->lock);
+	if (fi->local_int[cpu_addr] == NULL)
+		rc = 3; /* not operational */
+	else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+		 & CPUSTAT_RUNNING) {
+		*reg &= 0xffffffff00000000UL;
+		rc = 1; /* status stored */
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STAT_STOPPED;
+		rc = 1; /* status stored */
+	}
+	spin_unlock_bh(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	return rc;
+}
+
+static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_INT_EMERGENCY;
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+unlock:
+	spin_unlock_bh(&fi->lock);
+	VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+	return rc;
+}
+
+static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return -ENOMEM;
+
+	inti->type = KVM_S390_SIGP_STOP;
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+	if (li == NULL) {
+		rc = 3; /* not operational */
+		kfree(inti);
+		goto unlock;
+	}
+	spin_lock_bh(&li->lock);
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+	if (store)
+		li->action_bits |= ACTION_STORE_ON_STOP;
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	spin_unlock_bh(&li->lock);
+	rc = 0; /* order accepted */
+unlock:
+	spin_unlock_bh(&fi->lock);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	return rc;
+}
+
+static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
+{
+	int rc;
+
+	switch (parameter & 0xff) {
+	case 0:
+		printk(KERN_WARNING "kvm: request to switch to ESA/390 mode"
+							" not supported");
+		rc = 3; /* not operational */
+		break;
+	case 1:
+	case 2:
+		rc = 0; /* order accepted */
+		break;
+	default:
+		rc = -ENOTSUPP;
+	}
+	return rc;
+}
+
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
+			     u64 *reg)
+{
+	struct float_interrupt *fi = &vcpu->kvm->arch.float_int;
+	struct local_interrupt *li;
+	struct interrupt_info *inti;
+	int rc;
+	u8 tmp;
+
+	/* make sure that the new value is valid memory */
+	address = address & 0x7fffe000u;
+	if ((copy_from_guest(vcpu, &tmp,
+		(u64) (address + vcpu->kvm->arch.guest_origin) , 1)) ||
+	   (copy_from_guest(vcpu, &tmp, (u64) (address +
+			vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) {
+		*reg |= SIGP_STAT_INVALID_PARAMETER;
+		return 1; /* invalid parameter */
+	}
+
+	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
+	if (!inti)
+		return 2; /* busy */
+
+	spin_lock_bh(&fi->lock);
+	li = fi->local_int[cpu_addr];
+
+	if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
+		rc = 1; /* incorrect state */
+		*reg &= SIGP_STAT_INCORRECT_STATE;
+		kfree(inti);
+		goto out_fi;
+	}
+
+	spin_lock_bh(&li->lock);
+	/* cpu must be in stopped state */
+	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+		rc = 1; /* incorrect state */
+		*reg &= SIGP_STAT_INCORRECT_STATE;
+		kfree(inti);
+		goto out_li;
+	}
+
+	inti->type = KVM_S390_SIGP_SET_PREFIX;
+	inti->prefix.address = address;
+
+	list_add_tail(&inti->list, &li->list);
+	atomic_set(&li->active, 1);
+	if (waitqueue_active(&li->wq))
+		wake_up_interruptible(&li->wq);
+	rc = 0; /* order accepted */
+
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+out_li:
+	spin_unlock_bh(&li->lock);
+out_fi:
+	spin_unlock_bh(&fi->lock);
+	return rc;
+}
+
+int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
+{
+	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
+	int r3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int base2 = vcpu->arch.sie_block->ipb >> 28;
+	int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+	u32 parameter;
+	u16 cpu_addr = vcpu->arch.guest_gprs[r3];
+	u8 order_code;
+	int rc;
+
+	order_code = disp2;
+	if (base2)
+		order_code += vcpu->arch.guest_gprs[base2];
+
+	if (r1 % 2)
+		parameter = vcpu->arch.guest_gprs[r1];
+	else
+		parameter = vcpu->arch.guest_gprs[r1 + 1];
+
+	switch (order_code) {
+	case SIGP_SENSE:
+		vcpu->stat.instruction_sigp_sense++;
+		rc = __sigp_sense(vcpu, cpu_addr,
+				  &vcpu->arch.guest_gprs[r1]);
+		break;
+	case SIGP_EMERGENCY:
+		vcpu->stat.instruction_sigp_emergency++;
+		rc = __sigp_emergency(vcpu, cpu_addr);
+		break;
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, cpu_addr, 0);
+		break;
+	case SIGP_STOP_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, cpu_addr, 1);
+		break;
+	case SIGP_SET_ARCH:
+		vcpu->stat.instruction_sigp_arch++;
+		rc = __sigp_set_arch(vcpu, parameter);
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
+				       &vcpu->arch.guest_gprs[r1]);
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		/* user space must know about restart */
+	default:
+		return -ENOTSUPP;
+	}
+
+	if (rc < 0)
+		return rc;
+
+	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+	vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+	return 0;
+}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index fd072013f88c..5c1aea97cd12 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -30,11 +30,27 @@
 #define TABLES_PER_PAGE	4
 #define FRAG_MASK	15UL
 #define SECOND_HALVES	10UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 256, 0, PAGE_SIZE/4);
+	clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
+	memset(table + 768, 0, PAGE_SIZE/4);
+}
+
 #else
 #define ALLOC_ORDER	2
 #define TABLES_PER_PAGE	2
 #define FRAG_MASK	3UL
 #define SECOND_HALVES	2UL
+
+void clear_table_pgstes(unsigned long *table)
+{
+	clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
+	memset(table + 256, 0, PAGE_SIZE/2);
+}
+
 #endif
 
 unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
@@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 	unsigned long *table;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	spin_lock(&mm->page_table_lock);
 	page = NULL;
 	if (!list_empty(&mm->context.pgtable_list)) {
@@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
 		pgtable_page_ctor(page);
 		page->flags &= ~FRAG_MASK;
 		table = (unsigned long *) page_to_phys(page);
-		clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
+		if (mm->context.pgstes)
+			clear_table_pgstes(table);
+		else
+			clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
 		spin_lock(&mm->page_table_lock);
 		list_add(&page->lru, &mm->context.pgtable_list);
 	}
@@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
 	struct page *page;
 	unsigned long bits;
 
-	bits = mm->context.noexec ? 3UL : 1UL;
+	bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL;
 	bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
 	page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
 	spin_lock(&mm->page_table_lock);
@@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
 	mm->context.noexec = 0;
 	update_mm(mm, tsk);
 }
+
+/*
+ * switch on pgstes for its userspace process (for kvm)
+ */
+int s390_enable_sie(void)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm;
+	int rc;
+
+	task_lock(tsk);
+
+	rc = 0;
+	if (tsk->mm->context.pgstes)
+		goto unlock;
+
+	rc = -EINVAL;
+	if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+	    tsk->mm != tsk->active_mm || tsk->mm->ioctx_list)
+		goto unlock;
+
+	tsk->mm->context.pgstes = 1;	/* dirty little tricks .. */
+	mm = dup_mm(tsk);
+	tsk->mm->context.pgstes = 0;
+
+	rc = -ENOMEM;
+	if (!mm)
+		goto unlock;
+	mmput(tsk->mm);
+	tsk->mm = tsk->active_mm = mm;
+	preempt_disable();
+	update_mm(mm, tsk);
+	cpu_set(smp_processor_id(), mm->cpu_vm_mask);
+	preempt_enable();
+	rc = 0;
+unlock:
+	task_unlock(tsk);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(s390_enable_sie);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 53dde0607362..d7df26bd1e54 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -307,15 +307,6 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
 int arch_add_memory(int nid, u64 start, u64 size)
 {
 	pg_data_t *pgdat;
diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
index 70c0dd22491d..e7f35198ae34 100644
--- a/arch/sparc/kernel/process.c
+++ b/arch/sparc/kernel/process.c
@@ -357,8 +357,6 @@ void flush_thread(void)
 {
 	current_thread_info()->w_saved = 0;
 
-	/* No new signal delivery by default */
-	current->thread.new_signal = 0;
 #ifndef CONFIG_SMP
 	if(last_task_used_math == current) {
 #else
diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c
index 3e849e8e3480..3c312290c3c2 100644
--- a/arch/sparc/kernel/signal.c
+++ b/arch/sparc/kernel/signal.c
@@ -1,5 +1,4 @@
-/*  $Id: signal.c,v 1.110 2002/02/08 03:57:14 davem Exp $
- *  linux/arch/sparc/kernel/signal.c
+/*  linux/arch/sparc/kernel/signal.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -32,37 +31,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
 		   void *fpqueue, unsigned long *fpqdepth);
 extern void fpload(unsigned long *fpregs, unsigned long *fsr);
 
-/* Signal frames: the original one (compatible with SunOS):
- *
- * Set up a signal frame... Make the stack look the way SunOS
- * expects it to look which is basically:
- *
- * ---------------------------------- <-- %sp at signal time
- * Struct sigcontext
- * Signal address
- * Ptr to sigcontext area above
- * Signal code
- * The signal number itself
- * One register window
- * ---------------------------------- <-- New %sp
- */
-struct signal_sframe {
-	struct reg_window	sig_window;
-	int			sig_num;
-	int			sig_code;
-	struct sigcontext __user *sig_scptr;
-	int			sig_address;
-	struct sigcontext	sig_context;
-	unsigned int		extramask[_NSIG_WORDS - 1];
-};
-
-/* 
- * And the new one, intended to be used for Linux applications only
- * (we have enough in there to work with clone).
- * All the interesting bits are in the info field.
- */
-
-struct new_signal_frame {
+struct signal_frame {
 	struct sparc_stackf	ss;
 	__siginfo_t		info;
 	__siginfo_fpu_t __user	*fpu_save;
@@ -85,8 +54,7 @@ struct rt_signal_frame {
 };
 
 /* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_sframe) + 7) & (~7)))
-#define NF_ALIGNEDSZ  (((sizeof(struct new_signal_frame) + 7) & (~7)))
+#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame) + 7) & (~7)))
 #define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame) + 7) & (~7)))
 
 static int _sigpause_common(old_sigset_t set)
@@ -141,15 +109,20 @@ restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 	return err;
 }
 
-static inline void do_new_sigreturn (struct pt_regs *regs)
+asmlinkage void do_sigreturn(struct pt_regs *regs)
 {
-	struct new_signal_frame __user *sf;
+	struct signal_frame __user *sf;
 	unsigned long up_psr, pc, npc;
 	sigset_t set;
 	__siginfo_fpu_t __user *fpu_save;
 	int err;
 
-	sf = (struct new_signal_frame __user *) regs->u_regs[UREG_FP];
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	synchronize_user_stack();
+
+	sf = (struct signal_frame __user *) regs->u_regs[UREG_FP];
 
 	/* 1. Make sure we are not getting garbage from the user */
 	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)))
@@ -198,73 +171,6 @@ segv_and_exit:
 	force_sig(SIGSEGV, current);
 }
 
-asmlinkage void do_sigreturn(struct pt_regs *regs)
-{
-	struct sigcontext __user *scptr;
-	unsigned long pc, npc, psr;
-	sigset_t set;
-	int err;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	synchronize_user_stack();
-
-	if (current->thread.new_signal) {
-		do_new_sigreturn(regs);
-		return;
-	}
-
-	scptr = (struct sigcontext __user *) regs->u_regs[UREG_I0];
-
-	/* Check sanity of the user arg. */
-	if (!access_ok(VERIFY_READ, scptr, sizeof(struct sigcontext)) ||
-	    (((unsigned long) scptr) & 3))
-		goto segv_and_exit;
-
-	err = __get_user(pc, &scptr->sigc_pc);
-	err |= __get_user(npc, &scptr->sigc_npc);
-
-	if ((pc | npc) & 3)
-		goto segv_and_exit;
-
-	/* This is pretty much atomic, no amount locking would prevent
-	 * the races which exist anyways.
-	 */
-	err |= __get_user(set.sig[0], &scptr->sigc_mask);
-	/* Note that scptr + 1 points to extramask */
-	err |= __copy_from_user(&set.sig[1], scptr + 1,
-				(_NSIG_WORDS - 1) * sizeof(unsigned int));
-	
-	if (err)
-		goto segv_and_exit;
-
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-
-	regs->pc = pc;
-	regs->npc = npc;
-
-	err = __get_user(regs->u_regs[UREG_FP], &scptr->sigc_sp);
-	err |= __get_user(regs->u_regs[UREG_I0], &scptr->sigc_o0);
-	err |= __get_user(regs->u_regs[UREG_G1], &scptr->sigc_g1);
-
-	/* User can only change condition codes in %psr. */
-	err |= __get_user(psr, &scptr->sigc_psr);
-	if (err)
-		goto segv_and_exit;
-		
-	regs->psr &= ~(PSR_ICC);
-	regs->psr |= (psr & PSR_ICC);
-	return;
-
-segv_and_exit:
-	force_sig(SIGSEGV, current);
-}
-
 asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 {
 	struct rt_signal_frame __user *sf;
@@ -351,128 +257,6 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re
 	return (void __user *)(sp - framesize);
 }
 
-static inline void
-setup_frame(struct sigaction *sa, struct pt_regs *regs, int signr, sigset_t *oldset, siginfo_t *info)
-{
-	struct signal_sframe __user *sframep;
-	struct sigcontext __user *sc;
-	int window = 0, err;
-	unsigned long pc = regs->pc;
-	unsigned long npc = regs->npc;
-	struct thread_info *tp = current_thread_info();
-	void __user *sig_address;
-	int sig_code;
-
-	synchronize_user_stack();
-	sframep = (struct signal_sframe __user *)
-		get_sigframe(sa, regs, SF_ALIGNEDSZ);
-	if (invalid_frame_pointer(sframep, sizeof(*sframep))){
-		/* Don't change signal code and address, so that
-		 * post mortem debuggers can have a look.
-		 */
-		goto sigill_and_return;
-	}
-
-	sc = &sframep->sig_context;
-
-	/* We've already made sure frame pointer isn't in kernel space... */
-	err  = __put_user((sas_ss_flags(regs->u_regs[UREG_FP]) == SS_ONSTACK),
-			 &sc->sigc_onstack);
-	err |= __put_user(oldset->sig[0], &sc->sigc_mask);
-	err |= __copy_to_user(sframep->extramask, &oldset->sig[1],
-			      (_NSIG_WORDS - 1) * sizeof(unsigned int));
-	err |= __put_user(regs->u_regs[UREG_FP], &sc->sigc_sp);
-	err |= __put_user(pc, &sc->sigc_pc);
-	err |= __put_user(npc, &sc->sigc_npc);
-	err |= __put_user(regs->psr, &sc->sigc_psr);
-	err |= __put_user(regs->u_regs[UREG_G1], &sc->sigc_g1);
-	err |= __put_user(regs->u_regs[UREG_I0], &sc->sigc_o0);
-	err |= __put_user(tp->w_saved, &sc->sigc_oswins);
-	if (tp->w_saved)
-		for (window = 0; window < tp->w_saved; window++) {
-			put_user((char *)tp->rwbuf_stkptrs[window],
-				 &sc->sigc_spbuf[window]);
-			err |= __copy_to_user(&sc->sigc_wbuf[window],
-					      &tp->reg_window[window],
-					      sizeof(struct reg_window));
-		}
-	else
-		err |= __copy_to_user(sframep, (char *) regs->u_regs[UREG_FP],
-				      sizeof(struct reg_window));
-
-	tp->w_saved = 0; /* So process is allowed to execute. */
-
-	err |= __put_user(signr, &sframep->sig_num);
-	sig_address = NULL;
-	sig_code = 0;
-	if (SI_FROMKERNEL (info) && (info->si_code & __SI_MASK) == __SI_FAULT) {
-		sig_address = info->si_addr;
-		switch (signr) {
-		case SIGSEGV:
-			switch (info->si_code) {
-			case SEGV_MAPERR: sig_code = SUBSIG_NOMAPPING; break;
-			default: sig_code = SUBSIG_PROTECTION; break;
-			}
-			break;
-		case SIGILL:
-			switch (info->si_code) {
-			case ILL_ILLOPC: sig_code = SUBSIG_ILLINST; break;
-			case ILL_PRVOPC: sig_code = SUBSIG_PRIVINST; break;
-			case ILL_ILLTRP: sig_code = SUBSIG_BADTRAP(info->si_trapno); break;
-			default: sig_code = SUBSIG_STACK; break;
-			}
-			break;
-		case SIGFPE:
-			switch (info->si_code) {
-			case FPE_INTDIV: sig_code = SUBSIG_IDIVZERO; break;
-			case FPE_INTOVF: sig_code = SUBSIG_FPINTOVFL; break;
-			case FPE_FLTDIV: sig_code = SUBSIG_FPDIVZERO; break;
-			case FPE_FLTOVF: sig_code = SUBSIG_FPOVFLOW; break;
-			case FPE_FLTUND: sig_code = SUBSIG_FPUNFLOW; break;
-			case FPE_FLTRES: sig_code = SUBSIG_FPINEXACT; break;
-			case FPE_FLTINV: sig_code = SUBSIG_FPOPERROR; break;
-			default: sig_code = SUBSIG_FPERROR; break;
-			}
-			break;
-		case SIGBUS:
-			switch (info->si_code) {
-			case BUS_ADRALN: sig_code = SUBSIG_ALIGNMENT; break;
-			case BUS_ADRERR: sig_code = SUBSIG_MISCERROR; break;
-			default: sig_code = SUBSIG_BUSTIMEOUT; break;
-			}
-			break;
-		case SIGEMT:
-			switch (info->si_code) {
-			case EMT_TAGOVF: sig_code = SUBSIG_TAG; break;
-			}
-			break;
-		case SIGSYS:
-			if (info->si_code == (__SI_FAULT|0x100)) {
-				sig_code = info->si_trapno;
-				break;
-			}
-		default:
-			sig_address = NULL;
-		}
-	}
-	err |= __put_user((unsigned long)sig_address, &sframep->sig_address);
-	err |= __put_user(sig_code, &sframep->sig_code);
-	err |= __put_user(sc, &sframep->sig_scptr);
-	if (err)
-		goto sigsegv;
-
-	regs->u_regs[UREG_FP] = (unsigned long) sframep;
-	regs->pc = (unsigned long) sa->sa_handler;
-	regs->npc = (regs->pc + 4);
-	return;
-
-sigill_and_return:
-	do_exit(SIGILL);
-sigsegv:
-	force_sigsegv(signr, current);
-}
-
-
 static inline int
 save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 {
@@ -508,21 +292,20 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 	return err;
 }
 
-static inline void
-new_setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
-		int signo, sigset_t *oldset)
+static void setup_frame(struct k_sigaction *ka, struct pt_regs *regs,
+			int signo, sigset_t *oldset)
 {
-	struct new_signal_frame __user *sf;
+	struct signal_frame __user *sf;
 	int sigframe_size, err;
 
 	/* 1. Make sure everything is clean */
 	synchronize_user_stack();
 
-	sigframe_size = NF_ALIGNEDSZ;
+	sigframe_size = SF_ALIGNEDSZ;
 	if (!used_math())
 		sigframe_size -= sizeof(__siginfo_fpu_t);
 
-	sf = (struct new_signal_frame __user *)
+	sf = (struct signal_frame __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
 
 	if (invalid_frame_pointer(sf, sigframe_size))
@@ -586,9 +369,8 @@ sigsegv:
 	force_sigsegv(signo, current);
 }
 
-static inline void
-new_setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
-		   int signo, sigset_t *oldset, siginfo_t *info)
+static void setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
+			   int signo, sigset_t *oldset, siginfo_t *info)
 {
 	struct rt_signal_frame __user *sf;
 	int sigframe_size;
@@ -674,11 +456,9 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
 	      siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
 {
 	if (ka->sa.sa_flags & SA_SIGINFO)
-		new_setup_rt_frame(ka, regs, signr, oldset, info);
-	else if (current->thread.new_signal)
-		new_setup_frame(ka, regs, signr, oldset);
+		setup_rt_frame(ka, regs, signr, oldset, info);
 	else
-		setup_frame(&ka->sa, regs, signr, oldset, info);
+		setup_frame(ka, regs, signr, oldset);
 
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
index 42bf09db9a81..f188b5dc9fd0 100644
--- a/arch/sparc/kernel/sys_sparc.c
+++ b/arch/sparc/kernel/sys_sparc.c
@@ -1,5 +1,4 @@
-/* $Id: sys_sparc.c,v 1.70 2001/04/14 01:12:02 davem Exp $
- * linux/arch/sparc/kernel/sys_sparc.c
+/* linux/arch/sparc/kernel/sys_sparc.c
  *
  * This file contains various random system calls that
  * have a non-standard calling sequence on the Linux/sparc
@@ -395,10 +394,8 @@ sparc_sigaction (int sig, const struct old_sigaction __user *act,
 	struct k_sigaction new_ka, old_ka;
 	int ret;
 
-	if (sig < 0) {
-		current->thread.new_signal = 1;
-		sig = -sig;
-	}
+	WARN_ON_ONCE(sig >= 0);
+	sig = -sig;
 
 	if (act) {
 		unsigned long mask;
@@ -446,11 +443,6 @@ sys_rt_sigaction(int sig,
 	if (sigsetsize != sizeof(sigset_t))
 		return -EINVAL;
 
-	/* All tasks which use RT signals (effectively) use
-	 * new style signals.
-	 */
-	current->thread.new_signal = 1;
-
 	if (act) {
 		new_ka.ka_restorer = restorer;
 		if (copy_from_user(&new_ka.sa, act, sizeof(*act)))
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 8acc5cc38621..edbe71e3fab9 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -1,9 +1,5 @@
-# $Id: config.in,v 1.158 2002/01/24 22:14:44 davem Exp $
-# For a description of the syntax of this configuration file,
-# see the Configure script.
-#
-
-mainmenu "Linux/UltraSPARC Kernel Configuration"
+# sparc64 configuration
+mainmenu "Linux Kernel Configuration for 64-bit SPARC"
 
 config SPARC
 	bool
@@ -17,12 +13,6 @@ config SPARC64
 	default y
 	select HAVE_IDE
 	select HAVE_LMB
-	help
-	  SPARC is a family of RISC microprocessors designed and marketed by
-	  Sun Microsystems, incorporated.  This port covers the newer 64-bit
-	  UltraSPARC.  The UltraLinux project maintains both the SPARC32 and
-	  SPARC64 ports; its web page is available at
-	  <http://www.ultralinux.org/>.
 
 config GENERIC_TIME
 	bool
@@ -97,7 +87,7 @@ config SPARC64_PAGE_SIZE_8KB
 	help
 	  This lets you select the page size of the kernel.
 
-	  8KB and 64KB work quite well, since Sparc ELF sections
+	  8KB and 64KB work quite well, since SPARC ELF sections
 	  provide for up to 64KB alignment.
 
 	  Therefore, 512KB and 4MB are for expert hackers only.
@@ -138,7 +128,7 @@ config HOTPLUG_CPU
 	bool "Support for hot-pluggable CPUs"
 	depends on SMP
 	select HOTPLUG
-	---help---
+	help
 	  Say Y here to experiment with turning CPUs off and on.  CPUs
 	  can be controlled through /sys/devices/system/cpu/cpu#.
 	  Say N if you want to disable CPU hotplug.
@@ -155,23 +145,16 @@ source "kernel/time/Kconfig"
 
 config SMP
 	bool "Symmetric multi-processing support"
-	---help---
+	help
 	  This enables support for systems with more than one CPU. If you have
 	  a system with only one CPU, say N. If you have a system with more than
 	  one CPU, say Y.
 
 	  If you say N here, the kernel will run on single and multiprocessor
 	  machines, but will use only one CPU of a multiprocessor machine. If
-	  you say Y here, the kernel will run on many, but not all,
-	  singleprocessor machines. On a singleprocessor machine, the kernel
-	  will run faster if you say N here.
-
-	  People using multiprocessor machines who say Y here should also say
-	  Y to "Enhanced Real Time Clock Support", below. The "Advanced Power
-	  Management" code will be disabled if you say Y here.
-
-	  See also <file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO
-	  available at <http://www.tldp.org/docs.html#howto>.
+	  you say Y here, the kernel will run on single-processor machines.
+	  On a single-processor machine, the kernel will run faster if you say
+	  N here.
 
 	  If you don't know what to do here, say N.
 
@@ -284,50 +267,19 @@ source "mm/Kconfig"
 
 config ISA
 	bool
-	help
-	  Find out whether you have ISA slots on your motherboard.  ISA is the
-	  name of a bus system, i.e. the way the CPU talks to the other stuff
-	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
-	  (MCA) or VESA.  ISA is an older system, now being displaced by PCI;
-	  newer boards don't support it.  If you have ISA, say Y, otherwise N.
 
 config ISAPNP
 	bool
-	help
-	  Say Y here if you would like support for ISA Plug and Play devices.
-	  Some information is in <file:Documentation/isapnp.txt>.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called isapnp.
-
-	  If unsure, say Y.
 
 config EISA
 	bool
-	---help---
-	  The Extended Industry Standard Architecture (EISA) bus was
-	  developed as an open alternative to the IBM MicroChannel bus.
-
-	  The EISA bus provided some of the features of the IBM MicroChannel
-	  bus while maintaining backward compatibility with cards made for
-	  the older ISA bus.  The EISA bus saw limited use between 1988 and
-	  1995 when it was made obsolete by the PCI bus.
-
-	  Say Y here if you are building a kernel for an EISA-based machine.
-
-	  Otherwise, say N.
 
 config MCA
 	bool
-	help
-	  MicroChannel Architecture is found in some IBM PS/2 machines and
-	  laptops.  It is a bus system similar to PCI or ISA. See
-	  <file:Documentation/mca.txt> (and especially the web page given
-	  there) before attempting to build an MCA bus kernel.
 
 config PCMCIA
 	tristate
-	---help---
+	help
 	  Say Y here if you want to attach PCMCIA- or PC-cards to your Linux
 	  computer.  These are credit-card size devices such as network cards,
 	  modems or hard drives often used with laptops computers.  There are
@@ -369,10 +321,10 @@ config PCI
 	bool "PCI support"
 	select ARCH_SUPPORTS_MSI
 	help
-	  Find out whether you have a PCI motherboard. PCI is the name of a
-	  bus system, i.e. the way the CPU talks to the other stuff inside
-	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
-	  VESA. If you have PCI, say Y, otherwise N.
+	  Find out whether your system includes a PCI bus. PCI is the name of
+	  a bus system, i.e. the way the CPU talks to the other stuff inside
+	  your box.  If you say Y here, the kernel will include drivers and
+	  infrastructure code to support PCI bus devices.
 
 config PCI_DOMAINS
 	def_bool PCI
@@ -396,15 +348,8 @@ menu "Executable file formats"
 
 source "fs/Kconfig.binfmt"
 
-config SPARC32_COMPAT
-	bool "Kernel support for Linux/Sparc 32bit binary compatibility"
-	help
-	  This allows you to run 32-bit binaries on your Ultra.
-	  Everybody wants this; say Y.
-
 config COMPAT
 	bool
-	depends on SPARC32_COMPAT
 	default y
 	select COMPAT_BINFMT_ELF
 
@@ -421,8 +366,8 @@ config SCHED_SMT
 	default y
 	help
 	  SMT scheduler support improves the CPU scheduler's decision making
-	  when dealing with UltraSPARC cpus at a cost of slightly increased
-	  overhead in some places. If unsure say N here.
+	  when dealing with SPARC cpus at a cost of slightly increased overhead
+	  in some places. If unsure say N here.
 
 config SCHED_MC
 	bool "Multi-core scheduler support"
diff --git a/arch/sparc64/defconfig b/arch/sparc64/defconfig
index 92f79680f70d..aff93c9d13f4 100644
--- a/arch/sparc64/defconfig
+++ b/arch/sparc64/defconfig
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.25-numa
-# Wed Apr 23 04:49:08 2008
+# Linux kernel version: 2.6.25
+# Sat Apr 26 03:11:06 2008
 #
 CONFIG_SPARC=y
 CONFIG_SPARC64=y
@@ -152,7 +152,9 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y
 CONFIG_HUGETLB_PAGE_SIZE_4MB=y
 # CONFIG_HUGETLB_PAGE_SIZE_512K is not set
 # CONFIG_HUGETLB_PAGE_SIZE_64K is not set
-# CONFIG_NUMA is not set
+CONFIG_NUMA=y
+CONFIG_NODES_SHIFT=4
+CONFIG_NODES_SPAN_OTHER_NODES=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_ARCH_SELECT_MEMORY_MODEL=y
 CONFIG_ARCH_SPARSEMEM_ENABLE=y
@@ -162,12 +164,14 @@ CONFIG_SELECT_MEMORY_MODEL=y
 # CONFIG_DISCONTIGMEM_MANUAL is not set
 CONFIG_SPARSEMEM_MANUAL=y
 CONFIG_SPARSEMEM=y
+CONFIG_NEED_MULTIPLE_NODES=y
 CONFIG_HAVE_MEMORY_PRESENT=y
 # CONFIG_SPARSEMEM_STATIC is not set
 CONFIG_SPARSEMEM_EXTREME=y
 CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
 CONFIG_SPARSEMEM_VMEMMAP=y
 CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_MIGRATION=y
 CONFIG_RESOURCES_64BIT=y
 CONFIG_ZONE_DMA_FLAG=0
 CONFIG_NR_QUICK=1
@@ -191,7 +195,6 @@ CONFIG_SUN_OPENPROMFS=m
 CONFIG_BINFMT_ELF=y
 CONFIG_COMPAT_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=m
-CONFIG_SPARC32_COMPAT=y
 CONFIG_COMPAT=y
 CONFIG_SYSVIPC_COMPAT=y
 CONFIG_SCHED_SMT=y
@@ -746,13 +749,7 @@ CONFIG_DEVPORT=y
 CONFIG_I2C=y
 CONFIG_I2C_BOARDINFO=y
 # CONFIG_I2C_CHARDEV is not set
-
-#
-# I2C Algorithms
-#
 CONFIG_I2C_ALGOBIT=y
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
 
 #
 # I2C Hardware Bus support
@@ -780,6 +777,7 @@ CONFIG_I2C_ALGOBIT=y
 # CONFIG_I2C_VIA is not set
 # CONFIG_I2C_VIAPRO is not set
 # CONFIG_I2C_VOODOO3 is not set
+# CONFIG_I2C_PCA_PLATFORM is not set
 
 #
 # Miscellaneous I2C Chip support
@@ -1026,6 +1024,7 @@ CONFIG_SND_ALI5451=m
 # CONFIG_SND_AU8810 is not set
 # CONFIG_SND_AU8820 is not set
 # CONFIG_SND_AU8830 is not set
+# CONFIG_SND_AW2 is not set
 # CONFIG_SND_AZT3328 is not set
 # CONFIG_SND_BT87X is not set
 # CONFIG_SND_CA0106 is not set
@@ -1097,10 +1096,6 @@ CONFIG_SND_SUN_CS4231=m
 # CONFIG_SND_SOC is not set
 
 #
-# SoC Audio support for SuperH
-#
-
-#
 # ALSA SoC audio for Freescale SOCs
 #
 
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index 63c6ae0dd273..2bd0340b743d 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -15,17 +15,17 @@ obj-y		:= process.o setup.o cpu.o idprom.o \
 		   visemul.o prom.o of_device.o hvapi.o sstate.o mdesc.o
 
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
-obj-$(CONFIG_PCI)	 += ebus.o isa.o pci_common.o \
+obj-$(CONFIG_PCI)	 += ebus.o pci_common.o \
 			    pci_psycho.o pci_sabre.o pci_schizo.o \
 			    pci_sun4v.o pci_sun4v_asm.o pci_fire.o
 obj-$(CONFIG_PCI_MSI)	+= pci_msi.o
 obj-$(CONFIG_SMP)	 += smp.o trampoline.o hvtramp.o
-obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
+obj-$(CONFIG_COMPAT) += sys32.o sys_sparc32.o signal32.o
 obj-$(CONFIG_MODULES) += module.o
 obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
 obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
 obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
 obj-$(CONFIG_AUDIT) += audit.o
-obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
+obj-$(CONFIG_AUDIT)$(CONFIG_COMPAT) += compat_audit.o
 obj-y += $(obj-yy)
diff --git a/arch/sparc64/kernel/audit.c b/arch/sparc64/kernel/audit.c
index 24d7f4b4178a..8fff0ac63d56 100644
--- a/arch/sparc64/kernel/audit.c
+++ b/arch/sparc64/kernel/audit.c
@@ -30,7 +30,7 @@ static unsigned signal_class[] = {
 
 int audit_classify_arch(int arch)
 {
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
 	if (arch == AUDIT_ARCH_SPARC)
 		return 1;
 #endif
@@ -39,7 +39,7 @@ int audit_classify_arch(int arch)
 
 int audit_classify_syscall(int abi, unsigned syscall)
 {
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
 	extern int sparc32_classify_syscall(unsigned);
 	if (abi == AUDIT_ARCH_SPARC)
 		return sparc32_classify_syscall(syscall);
@@ -60,7 +60,7 @@ int audit_classify_syscall(int abi, unsigned syscall)
 
 static int __init audit_classes_init(void)
 {
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
 	extern __u32 sparc32_dir_class[];
 	extern __u32 sparc32_write_class[];
 	extern __u32 sparc32_read_class[];
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index eb88bd6e674e..b441a26b73b0 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -1,6 +1,6 @@
 /* irq.c: UltraSparc IRQ handling/init/registry.
  *
- * Copyright (C) 1997, 2007  David S. Miller  (davem@davemloft.net)
+ * Copyright (C) 1997, 2007, 2008 David S. Miller (davem@davemloft.net)
  * Copyright (C) 1998  Eddie C. Dost    (ecd@skynet.be)
  * Copyright (C) 1998  Jakub Jelinek    (jj@ultra.linux.cz)
  */
@@ -308,6 +308,7 @@ static void sun4u_irq_enable(unsigned int virt_irq)
 			 IMAP_AID_SAFARI | IMAP_NID_SAFARI);
 		val |= tid | IMAP_VALID;
 		upa_writeq(val, imap);
+		upa_writeq(ICLR_IDLE, data->iclr);
 	}
 }
 
diff --git a/arch/sparc64/kernel/isa.c b/arch/sparc64/kernel/isa.c
deleted file mode 100644
index a2af5ed784c9..000000000000
--- a/arch/sparc64/kernel/isa.c
+++ /dev/null
@@ -1,191 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <asm/oplib.h>
-#include <asm/prom.h>
-#include <asm/of_device.h>
-#include <asm/isa.h>
-
-struct sparc_isa_bridge *isa_chain;
-
-static void __init fatal_err(const char *reason)
-{
-	prom_printf("ISA: fatal error, %s.\n", reason);
-}
-
-static void __init report_dev(struct sparc_isa_device *isa_dev, int child)
-{
-	if (child)
-		printk(" (%s)", isa_dev->prom_node->name);
-	else
-		printk(" [%s", isa_dev->prom_node->name);
-}
-
-static void __init isa_dev_get_resource(struct sparc_isa_device *isa_dev)
-{
-	struct of_device *op = of_find_device_by_node(isa_dev->prom_node);
-
-	memcpy(&isa_dev->resource, &op->resource[0], sizeof(struct resource));
-}
-
-static void __init isa_dev_get_irq(struct sparc_isa_device *isa_dev)
-{
-	struct of_device *op = of_find_device_by_node(isa_dev->prom_node);
-
-	if (!op || !op->num_irqs) {
-		isa_dev->irq = PCI_IRQ_NONE;
-	} else {
-		isa_dev->irq = op->irqs[0];
-	}
-}
-
-static void __init isa_fill_children(struct sparc_isa_device *parent_isa_dev)
-{
-	struct device_node *dp = parent_isa_dev->prom_node->child;
-
-	if (!dp)
-		return;
-
-	printk(" ->");
-	while (dp) {
-		struct sparc_isa_device *isa_dev;
-
-		isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL);
-		if (!isa_dev) {
-			fatal_err("cannot allocate child isa_dev");
-			prom_halt();
-		}
-
-		/* Link it in to parent. */
-		isa_dev->next = parent_isa_dev->child;
-		parent_isa_dev->child = isa_dev;
-
-		isa_dev->bus = parent_isa_dev->bus;
-		isa_dev->prom_node = dp;
-
-		isa_dev_get_resource(isa_dev);
-		isa_dev_get_irq(isa_dev);
-
-		report_dev(isa_dev, 1);
-
-		dp = dp->sibling;
-	}
-}
-
-static void __init isa_fill_devices(struct sparc_isa_bridge *isa_br)
-{
-	struct device_node *dp = isa_br->prom_node->child;
-
-	while (dp) {
-		struct sparc_isa_device *isa_dev;
-		struct dev_archdata *sd;
-
-		isa_dev = kzalloc(sizeof(*isa_dev), GFP_KERNEL);
-		if (!isa_dev) {
-			printk(KERN_DEBUG "ISA: cannot allocate isa_dev");
-			return;
-		}
-
-		sd = &isa_dev->ofdev.dev.archdata;
-		sd->prom_node = dp;
-		sd->op = &isa_dev->ofdev;
-		sd->iommu = isa_br->ofdev.dev.parent->archdata.iommu;
-		sd->stc = isa_br->ofdev.dev.parent->archdata.stc;
-		sd->numa_node = isa_br->ofdev.dev.parent->archdata.numa_node;
-
-		isa_dev->ofdev.node = dp;
-		isa_dev->ofdev.dev.parent = &isa_br->ofdev.dev;
-		isa_dev->ofdev.dev.bus = &isa_bus_type;
-		sprintf(isa_dev->ofdev.dev.bus_id, "isa[%08x]", dp->node);
-
-		/* Register with core */
-		if (of_device_register(&isa_dev->ofdev) != 0) {
-			printk(KERN_DEBUG "isa: device registration error for %s!\n",
-			       dp->path_component_name);
-			kfree(isa_dev);
-			goto next_sibling;
-		}
-
-		/* Link it in. */
-		isa_dev->next = NULL;
-		if (isa_br->devices == NULL) {
-			isa_br->devices = isa_dev;
-		} else {
-			struct sparc_isa_device *tmp = isa_br->devices;
-
-			while (tmp->next)
-				tmp = tmp->next;
-
-			tmp->next = isa_dev;
-		}
-
-		isa_dev->bus = isa_br;
-		isa_dev->prom_node = dp;
-
-		isa_dev_get_resource(isa_dev);
-		isa_dev_get_irq(isa_dev);
-
-		report_dev(isa_dev, 0);
-
-		isa_fill_children(isa_dev);
-
-		printk("]");
-
-	next_sibling:
-		dp = dp->sibling;
-	}
-}
-
-void __init isa_init(void)
-{
-	struct pci_dev *pdev;
-	unsigned short vendor, device;
-	int index = 0;
-
-	vendor = PCI_VENDOR_ID_AL;
-	device = PCI_DEVICE_ID_AL_M1533;
-
-	pdev = NULL;
-	while ((pdev = pci_get_device(vendor, device, pdev)) != NULL) {
-		struct sparc_isa_bridge *isa_br;
-		struct device_node *dp;
-
-		dp = pci_device_to_OF_node(pdev);
-
-		isa_br = kzalloc(sizeof(*isa_br), GFP_KERNEL);
-		if (!isa_br) {
-			printk(KERN_DEBUG "isa: cannot allocate sparc_isa_bridge");
-			pci_dev_put(pdev);
-			return;
-		}
-
-		isa_br->ofdev.node = dp;
-		isa_br->ofdev.dev.parent = &pdev->dev;
-		isa_br->ofdev.dev.bus = &isa_bus_type;
-		sprintf(isa_br->ofdev.dev.bus_id, "isa%d", index);
-
-		/* Register with core */
-		if (of_device_register(&isa_br->ofdev) != 0) {
-			printk(KERN_DEBUG "isa: device registration error for %s!\n",
-			       dp->path_component_name);
-			kfree(isa_br);
-			pci_dev_put(pdev);
-			return;
-		}
-
-		/* Link it in. */
-		isa_br->next = isa_chain;
-		isa_chain = isa_br;
-
-		isa_br->self = pdev;
-		isa_br->index = index++;
-		isa_br->prom_node = dp;
-
-		printk("isa%d:", isa_br->index);
-
-		isa_fill_devices(isa_br);
-
-		printk("\n");
-	}
-}
diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c
index 9e58e8cba1c3..d569f60c24b8 100644
--- a/arch/sparc64/kernel/of_device.c
+++ b/arch/sparc64/kernel/of_device.c
@@ -412,12 +412,6 @@ static int __init build_one_resource(struct device_node *parent,
 
 static int __init use_1to1_mapping(struct device_node *pp)
 {
-	/* If this is on the PMU bus, don't try to translate it even
-	 * if a ranges property exists.
-	 */
-	if (!strcmp(pp->name, "pmu"))
-		return 1;
-
 	/* If we have a ranges property in the parent, use it.  */
 	if (of_find_property(pp, "ranges", NULL) != NULL)
 		return 0;
diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c
index 49f912766519..dbf2fc2f4d87 100644
--- a/arch/sparc64/kernel/pci.c
+++ b/arch/sparc64/kernel/pci.c
@@ -23,7 +23,6 @@
 #include <asm/pgtable.h>
 #include <asm/irq.h>
 #include <asm/ebus.h>
-#include <asm/isa.h>
 #include <asm/prom.h>
 #include <asm/apb.h>
 
@@ -885,7 +884,6 @@ static int __init pcibios_init(void)
 
 	pci_scan_each_controller_bus();
 
-	isa_init();
 	ebus_init();
 	power_init();
 
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index acf8c5250aa9..056013749157 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -1,5 +1,4 @@
-/*  $Id: process.c,v 1.131 2002/02/09 19:49:30 davem Exp $
- *  arch/sparc64/kernel/process.c
+/*  arch/sparc64/kernel/process.c
  *
  *  Copyright (C) 1995, 1996 David S. Miller (davem@caip.rutgers.edu)
  *  Copyright (C) 1996       Eddie C. Dost   (ecd@skynet.be)
@@ -368,9 +367,6 @@ void flush_thread(void)
 	
 	if (get_thread_current_ds() != ASI_AIUS)
 		set_fs(USER_DS);
-
-	/* Init new signal delivery disposition. */
-	clear_thread_flag(TIF_NEWSIGNALS);
 }
 
 /* It's a bit more tricky when 64-bit tasks are involved... */
@@ -595,6 +591,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
 	if (clone_flags & CLONE_SETTLS)
 		t->kregs->u_regs[UREG_G7] = regs->u_regs[UREG_I3];
 
+	/* We do not want to accidently trigger system call restart
+	 * handling in the new thread.  Therefore, clear out the trap
+	 * type, which will make pt_regs_regs_is_syscall() return false.
+	 */
+	pt_regs_clear_trap_type(t->kregs);
+
 	return 0;
 }
 
diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
index 77a3e8592cbc..f2d88d8f7a42 100644
--- a/arch/sparc64/kernel/signal.c
+++ b/arch/sparc64/kernel/signal.c
@@ -8,7 +8,7 @@
  *  Copyright (C) 1997,1998 Jakub Jelinek   (jj@sunsite.mff.cuni.cz)
  */
 
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
 #include <linux/compat.h>	/* for compat_old_sigset_t */
 #endif
 #include <linux/sched.h>
@@ -236,9 +236,6 @@ struct rt_signal_frame {
 	__siginfo_fpu_t		fpu_state;
 };
 
-/* Align macros */
-#define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame) + 7) & (~7)))
-
 static long _sigpause_common(old_sigset_t set)
 {
 	set &= _BLOCKABLE;
@@ -400,7 +397,7 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs,
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = RT_ALIGNEDSZ;
+	sigframe_size = sizeof(struct rt_signal_frame);
 	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
 		sigframe_size -= sizeof(__siginfo_fpu_t);
 
@@ -516,11 +513,10 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	struct k_sigaction ka;
 	sigset_t *oldset;
 	siginfo_t info;
-	int signr, tt;
+	int signr;
 	
-	tt = regs->magic & 0x1ff;
-	if (tt == 0x110 || tt == 0x111 || tt == 0x16d) {
-		regs->magic &= ~0x1ff;
+	if (pt_regs_is_syscall(regs)) {
+		pt_regs_clear_trap_type(regs);
 		cookie.restart_syscall = 1;
 	} else
 		cookie.restart_syscall = 0;
@@ -531,7 +527,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	else
 		oldset = &current->blocked;
 
-#ifdef CONFIG_SPARC32_COMPAT
+#ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT)) {
 		extern void do_signal32(sigset_t *, struct pt_regs *,
 					struct signal_deliver_cookie *);
diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
index 43cdec64d9c9..91f8d0826db1 100644
--- a/arch/sparc64/kernel/signal32.c
+++ b/arch/sparc64/kernel/signal32.c
@@ -1,5 +1,4 @@
-/*  $Id: signal32.c,v 1.74 2002/02/09 19:49:30 davem Exp $
- *  arch/sparc64/kernel/signal32.c
+/*  arch/sparc64/kernel/signal32.c
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
@@ -31,30 +30,6 @@
 
 #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
-/* Signal frames: the original one (compatible with SunOS):
- *
- * Set up a signal frame... Make the stack look the way SunOS
- * expects it to look which is basically:
- *
- * ---------------------------------- <-- %sp at signal time
- * Struct sigcontext
- * Signal address
- * Ptr to sigcontext area above
- * Signal code
- * The signal number itself
- * One register window
- * ---------------------------------- <-- New %sp
- */
-struct signal_sframe32 {
-	struct reg_window32 sig_window;
-	int sig_num;
-	int sig_code;
-	/* struct sigcontext32 * */ u32 sig_scptr;
-	int sig_address;
-	struct sigcontext32 sig_context;
-	unsigned int extramask[_COMPAT_NSIG_WORDS - 1];
-};
-
 /* This magic should be in g_upper[0] for all upper parts
  * to be valid.
  */
@@ -65,12 +40,7 @@ typedef struct {
 	unsigned int asi;
 } siginfo_extra_v8plus_t;
 
-/* 
- * And the new one, intended to be used for Linux applications only
- * (we have enough in there to work with clone).
- * All the interesting bits are in the info field.
- */
-struct new_signal_frame32 {
+struct signal_frame32 {
 	struct sparc_stackf32	ss;
 	__siginfo32_t		info;
 	/* __siginfo_fpu32_t * */ u32 fpu_save;
@@ -149,8 +119,7 @@ struct rt_signal_frame32 {
 };
 
 /* Align macros */
-#define SF_ALIGNEDSZ  (((sizeof(struct signal_sframe32) + 7) & (~7)))
-#define NF_ALIGNEDSZ  (((sizeof(struct new_signal_frame32) + 7) & (~7)))
+#define SF_ALIGNEDSZ  (((sizeof(struct signal_frame32) + 7) & (~7)))
 #define RT_ALIGNEDSZ  (((sizeof(struct rt_signal_frame32) + 7) & (~7)))
 
 int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
@@ -241,17 +210,22 @@ static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu
 	return err;
 }
 
-void do_new_sigreturn32(struct pt_regs *regs)
+void do_sigreturn32(struct pt_regs *regs)
 {
-	struct new_signal_frame32 __user *sf;
+	struct signal_frame32 __user *sf;
 	unsigned int psr;
 	unsigned pc, npc, fpu_save;
 	sigset_t set;
 	unsigned seta[_COMPAT_NSIG_WORDS];
 	int err, i;
 	
+	/* Always make any pending restarted system calls return -EINTR */
+	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+
+	synchronize_user_stack();
+
 	regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
-	sf = (struct new_signal_frame32 __user *) regs->u_regs[UREG_FP];
+	sf = (struct signal_frame32 __user *) regs->u_regs[UREG_FP];
 
 	/* 1. Make sure we are not getting garbage from the user */
 	if (!access_ok(VERIFY_READ, sf, sizeof(*sf)) ||
@@ -319,76 +293,6 @@ segv:
 	force_sig(SIGSEGV, current);
 }
 
-asmlinkage void do_sigreturn32(struct pt_regs *regs)
-{
-	struct sigcontext32 __user *scptr;
-	unsigned int pc, npc, psr;
-	sigset_t set;
-	unsigned int seta[_COMPAT_NSIG_WORDS];
-	int err;
-
-	/* Always make any pending restarted system calls return -EINTR */
-	current_thread_info()->restart_block.fn = do_no_restart_syscall;
-
-	synchronize_user_stack();
-	if (test_thread_flag(TIF_NEWSIGNALS)) {
-		do_new_sigreturn32(regs);
-		return;
-	}
-
-	scptr = (struct sigcontext32 __user *)
-		(regs->u_regs[UREG_I0] & 0x00000000ffffffffUL);
-	/* Check sanity of the user arg. */
-	if (!access_ok(VERIFY_READ, scptr, sizeof(struct sigcontext32)) ||
-	    (((unsigned long) scptr) & 3))
-		goto segv;
-
-	err = __get_user(pc, &scptr->sigc_pc);
-	err |= __get_user(npc, &scptr->sigc_npc);
-
-	if ((pc | npc) & 3)
-		goto segv; /* Nice try. */
-
-	err |= __get_user(seta[0], &scptr->sigc_mask);
-	/* Note that scptr + 1 points to extramask */
-	err |= copy_from_user(seta+1, scptr + 1,
-			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
-	if (err)
-	    	goto segv;
-	switch (_NSIG_WORDS) {
-		case 4: set.sig[3] = seta[6] + (((long)seta[7]) << 32);
-		case 3: set.sig[2] = seta[4] + (((long)seta[5]) << 32);
-		case 2: set.sig[1] = seta[2] + (((long)seta[3]) << 32);
-		case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32);
-	}
-	sigdelsetmask(&set, ~_BLOCKABLE);
-	spin_lock_irq(&current->sighand->siglock);
-	current->blocked = set;
-	recalc_sigpending();
-	spin_unlock_irq(&current->sighand->siglock);
-	
-	if (test_thread_flag(TIF_32BIT)) {
-		pc &= 0xffffffff;
-		npc &= 0xffffffff;
-	}
-	regs->tpc = pc;
-	regs->tnpc = npc;
-	err = __get_user(regs->u_regs[UREG_FP], &scptr->sigc_sp);
-	err |= __get_user(regs->u_regs[UREG_I0], &scptr->sigc_o0);
-	err |= __get_user(regs->u_regs[UREG_G1], &scptr->sigc_g1);
-
-	/* User can only change condition codes in %tstate. */
-	err |= __get_user(psr, &scptr->sigc_psr);
-	if (err)
-		goto segv;
-	regs->tstate &= ~(TSTATE_ICC|TSTATE_XCC);
-	regs->tstate |= psr_to_tstate_icc(psr);
-	return;
-
-segv:
-	force_sig(SIGSEGV, current);
-}
-
 asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 {
 	struct rt_signal_frame32 __user *sf;
@@ -504,145 +408,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
 	return (void __user *)(sp - framesize);
 }
 
-static void
-setup_frame32(struct sigaction *sa, struct pt_regs *regs, int signr, sigset_t *oldset, siginfo_t *info)
-{
-	struct signal_sframe32 __user *sframep;
-	struct sigcontext32 __user *sc;
-	unsigned int seta[_COMPAT_NSIG_WORDS];
-	int err = 0;
-	void __user *sig_address;
-	int sig_code;
-	unsigned long pc = regs->tpc;
-	unsigned long npc = regs->tnpc;
-	unsigned int psr;
-
-	if (test_thread_flag(TIF_32BIT)) {
-		pc &= 0xffffffff;
-		npc &= 0xffffffff;
-	}
-
-	synchronize_user_stack();
-	save_and_clear_fpu();
-
-	sframep = (struct signal_sframe32 __user *)
-		get_sigframe(sa, regs, SF_ALIGNEDSZ);
-	if (invalid_frame_pointer(sframep, sizeof(*sframep))){
-		/* Don't change signal code and address, so that
-		 * post mortem debuggers can have a look.
-		 */
-		do_exit(SIGILL);
-	}
-
-	sc = &sframep->sig_context;
-
-	/* We've already made sure frame pointer isn't in kernel space... */
-	err = __put_user((sas_ss_flags(regs->u_regs[UREG_FP]) == SS_ONSTACK),
-			 &sc->sigc_onstack);
-	
-	switch (_NSIG_WORDS) {
-	case 4: seta[7] = (oldset->sig[3] >> 32);
-	        seta[6] = oldset->sig[3];
-	case 3: seta[5] = (oldset->sig[2] >> 32);
-	        seta[4] = oldset->sig[2];
-	case 2: seta[3] = (oldset->sig[1] >> 32);
-	        seta[2] = oldset->sig[1];
-	case 1: seta[1] = (oldset->sig[0] >> 32);
-	        seta[0] = oldset->sig[0];
-	}
-	err |= __put_user(seta[0], &sc->sigc_mask);
-	err |= __copy_to_user(sframep->extramask, seta + 1,
-			      (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int));
-	err |= __put_user(regs->u_regs[UREG_FP], &sc->sigc_sp);
-	err |= __put_user(pc, &sc->sigc_pc);
-	err |= __put_user(npc, &sc->sigc_npc);
-	psr = tstate_to_psr(regs->tstate);
-	if (current_thread_info()->fpsaved[0] & FPRS_FEF)
-		psr |= PSR_EF;
-	err |= __put_user(psr, &sc->sigc_psr);
-	err |= __put_user(regs->u_regs[UREG_G1], &sc->sigc_g1);
-	err |= __put_user(regs->u_regs[UREG_I0], &sc->sigc_o0);
-	err |= __put_user(get_thread_wsaved(), &sc->sigc_oswins);
-
-	err |= copy_in_user((u32 __user *)sframep,
-			    (u32 __user *)(regs->u_regs[UREG_FP]),
-			    sizeof(struct reg_window32));
-		       
-	set_thread_wsaved(0); /* So process is allowed to execute. */
-	err |= __put_user(signr, &sframep->sig_num);
-	sig_address = NULL;
-	sig_code = 0;
-	if (SI_FROMKERNEL (info) && (info->si_code & __SI_MASK) == __SI_FAULT) {
-		sig_address = info->si_addr;
-		switch (signr) {
-		case SIGSEGV:
-			switch (info->si_code) {
-			case SEGV_MAPERR: sig_code = SUBSIG_NOMAPPING; break;
-			default: sig_code = SUBSIG_PROTECTION; break;
-			}
-			break;
-		case SIGILL:
-			switch (info->si_code) {
-			case ILL_ILLOPC: sig_code = SUBSIG_ILLINST; break;
-			case ILL_PRVOPC: sig_code = SUBSIG_PRIVINST; break;
-			case ILL_ILLTRP: sig_code = SUBSIG_BADTRAP(info->si_trapno); break;
-			default: sig_code = SUBSIG_STACK; break;
-			}
-			break;
-		case SIGFPE:
-			switch (info->si_code) {
-			case FPE_INTDIV: sig_code = SUBSIG_IDIVZERO; break;
-			case FPE_INTOVF: sig_code = SUBSIG_FPINTOVFL; break;
-			case FPE_FLTDIV: sig_code = SUBSIG_FPDIVZERO; break;
-			case FPE_FLTOVF: sig_code = SUBSIG_FPOVFLOW; break;
-			case FPE_FLTUND: sig_code = SUBSIG_FPUNFLOW; break;
-			case FPE_FLTRES: sig_code = SUBSIG_FPINEXACT; break;
-			case FPE_FLTINV: sig_code = SUBSIG_FPOPERROR; break;
-			default: sig_code = SUBSIG_FPERROR; break;
-			}
-			break;
-		case SIGBUS:
-			switch (info->si_code) {
-			case BUS_ADRALN: sig_code = SUBSIG_ALIGNMENT; break;
-			case BUS_ADRERR: sig_code = SUBSIG_MISCERROR; break;
-			default: sig_code = SUBSIG_BUSTIMEOUT; break;
-			}
-			break;
-		case SIGEMT:
-			switch (info->si_code) {
-			case EMT_TAGOVF: sig_code = SUBSIG_TAG; break;
-			}
-			break;
-		case SIGSYS:
-			if (info->si_code == (__SI_FAULT|0x100)) {
-				/* See sys_sunos32.c */
-				sig_code = info->si_trapno;
-				break;
-			}
-		default:
-			sig_address = NULL;
-		}
-	}
-	err |= __put_user(ptr_to_compat(sig_address), &sframep->sig_address);
-	err |= __put_user(sig_code, &sframep->sig_code);
-	err |= __put_user(ptr_to_compat(sc), &sframep->sig_scptr);
-	if (err)
-		goto sigsegv;
-
-	regs->u_regs[UREG_FP] = (unsigned long) sframep;
-	regs->tpc = (unsigned long) sa->sa_handler;
-	regs->tnpc = (regs->tpc + 4);
-	if (test_thread_flag(TIF_32BIT)) {
-		regs->tpc &= 0xffffffff;
-		regs->tnpc &= 0xffffffff;
-	}
-	return;
-
-sigsegv:
-	force_sigsegv(signr, current);
-}
-
-
 static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 {
 	unsigned long *fpregs = current_thread_info()->fpregs;
@@ -663,10 +428,10 @@ static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
 	return err;
 }
 
-static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
-			      int signo, sigset_t *oldset)
+static void setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
+			  int signo, sigset_t *oldset)
 {
-	struct new_signal_frame32 __user *sf;
+	struct signal_frame32 __user *sf;
 	int sigframe_size;
 	u32 psr;
 	int i, err;
@@ -676,11 +441,11 @@ static void new_setup_frame32(struct k_sigaction *ka, struct pt_regs *regs,
 	synchronize_user_stack();
 	save_and_clear_fpu();
 	
-	sigframe_size = NF_ALIGNEDSZ;
+	sigframe_size = SF_ALIGNEDSZ;
 	if (!(current_thread_info()->fpsaved[0] & FPRS_FEF))
 		sigframe_size -= sizeof(__siginfo_fpu_t);
 
-	sf = (struct new_signal_frame32 __user *)
+	sf = (struct signal_frame32 __user *)
 		get_sigframe(&ka->sa, regs, sigframe_size);
 	
 	if (invalid_frame_pointer(sf, sigframe_size))
@@ -944,10 +709,9 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka,
 {
 	if (ka->sa.sa_flags & SA_SIGINFO)
 		setup_rt_frame32(ka, regs, signr, oldset, info);
-	else if (test_thread_flag(TIF_NEWSIGNALS))
-		new_setup_frame32(ka, regs, signr, oldset);
 	else
-		setup_frame32(&ka->sa, regs, signr, oldset, info);
+		setup_frame32(ka, regs, signr, oldset);
+
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NOMASK))
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 524b88920947..409dd71f2738 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -866,14 +866,21 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
 	void *info = call_data->info;
 
 	clear_softint(1 << irq);
+
+	irq_enter();
+
+	if (!call_data->wait) {
+		/* let initiator proceed after getting data */
+		atomic_inc(&call_data->finished);
+	}
+
+	func(info);
+
+	irq_exit();
+
 	if (call_data->wait) {
 		/* let initiator proceed only after completion */
-		func(info);
 		atomic_inc(&call_data->finished);
-	} else {
-		/* let initiator proceed after getting data */
-		atomic_inc(&call_data->finished);
-		func(info);
 	}
 }
 
@@ -1032,7 +1039,9 @@ void smp_receive_signal(int cpu)
 
 void smp_receive_signal_client(int irq, struct pt_regs *regs)
 {
+	irq_enter();
 	clear_softint(1 << irq);
+	irq_exit();
 }
 
 void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
@@ -1040,6 +1049,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
 	struct mm_struct *mm;
 	unsigned long flags;
 
+	irq_enter();
+
 	clear_softint(1 << irq);
 
 	/* See if we need to allocate a new TLB context because
@@ -1059,6 +1070,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
 	load_secondary_context(mm);
 	__flush_tlb_mm(CTX_HWBITS(mm->context),
 		       SECONDARY_CONTEXT);
+
+	irq_exit();
 }
 
 void smp_new_mmu_context_version(void)
@@ -1217,6 +1230,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
 {
 	clear_softint(1 << irq);
 
+	irq_enter();
+
 	preempt_disable();
 
 	__asm__ __volatile__("flushw");
@@ -1229,6 +1244,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
 	prom_world(0);
 
 	preempt_enable();
+
+	irq_exit();
 }
 
 /* /proc/profile writes can call this, don't __init it please. */
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 66336590e830..8ac0b99f2c55 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -49,7 +49,6 @@
 #endif
 #ifdef CONFIG_PCI
 #include <asm/ebus.h>
-#include <asm/isa.h>
 #endif
 #include <asm/ns87303.h>
 #include <asm/timer.h>
@@ -187,7 +186,6 @@ EXPORT_SYMBOL(insw);
 EXPORT_SYMBOL(insl);
 #ifdef CONFIG_PCI
 EXPORT_SYMBOL(ebus_chain);
-EXPORT_SYMBOL(isa_chain);
 EXPORT_SYMBOL(pci_alloc_consistent);
 EXPORT_SYMBOL(pci_free_consistent);
 EXPORT_SYMBOL(pci_map_single);
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 73ed01ba40dc..8d4761f15fa9 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -454,8 +454,8 @@ asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second,
 			err = sys_semget(first, (int)second, (int)third);
 			goto out;
 		case SEMCTL: {
-			err = sys_semctl(first, third,
-					 (int)second | IPC_64,
+			err = sys_semctl(first, second,
+					 (int)third | IPC_64,
 					 (union semun) ptr);
 			goto out;
 		}
diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
index c1a61e98899a..161ce4710fe7 100644
--- a/arch/sparc64/kernel/sys_sparc32.c
+++ b/arch/sparc64/kernel/sys_sparc32.c
@@ -554,10 +554,8 @@ asmlinkage long compat_sys_sigaction(int sig, struct old_sigaction32 __user *act
         struct k_sigaction new_ka, old_ka;
         int ret;
 
-	if (sig < 0) {
-		set_thread_flag(TIF_NEWSIGNALS);
-		sig = -sig;
-	}
+	WARN_ON_ONCE(sig >= 0);
+	sig = -sig;
 
         if (act) {
 		compat_old_sigset_t mask;
@@ -601,11 +599,6 @@ asmlinkage long compat_sys_rt_sigaction(int sig,
         if (sigsetsize != sizeof(compat_sigset_t))
                 return -EINVAL;
 
-	/* All tasks which use RT signals (effectively) use
-	 * new style signals.
-	 */
-	set_thread_flag(TIF_NEWSIGNALS);
-
         if (act) {
 		u32 u_handler, u_restorer;
 
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 177d8aaeec42..8c2b50e8abc6 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1699,9 +1699,21 @@ void __init paging_init(void)
 	 * functions like clear_dcache_dirty_cpu use the cpu mask
 	 * in 13-bit signed-immediate instruction fields.
 	 */
-	BUILD_BUG_ON(FLAGS_RESERVED != 32);
+
+	/*
+	 * Page flags must not reach into upper 32 bits that are used
+	 * for the cpu number
+	 */
+	BUILD_BUG_ON(NR_PAGEFLAGS > 32);
+
+	/*
+	 * The bit fields placed in the high range must not reach below
+	 * the 32 bit boundary. Otherwise we cannot place the cpu field
+	 * at the 32 bit boundary.
+	 */
 	BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
-		     ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
+		ilog2(roundup_pow_of_two(NR_CPUS)) > 32);
+
 	BUILD_BUG_ON(NR_CPUS > 4096);
 
 	kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
diff --git a/arch/um/Kconfig.x86_64 b/arch/um/Kconfig.x86_64
index 3fbe69e359ed..5696e7b374b3 100644
--- a/arch/um/Kconfig.x86_64
+++ b/arch/um/Kconfig.x86_64
@@ -1,3 +1,10 @@
+
+menu "Host processor type and features"
+
+source "arch/x86/Kconfig.cpu"
+
+endmenu
+
 config UML_X86
 	bool
 	default y
diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index db3082b4da46..6e51424745ab 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -125,7 +125,7 @@ static int open_one_chan(struct chan *chan)
 	return 0;
 }
 
-int open_chan(struct list_head *chans)
+static int open_chan(struct list_head *chans)
 {
 	struct list_head *ele;
 	struct chan *chan;
@@ -583,19 +583,6 @@ int parse_chan_pair(char *str, struct line *line, int device,
 	return 0;
 }
 
-int chan_out_fd(struct list_head *chans)
-{
-	struct list_head *ele;
-	struct chan *chan;
-
-	list_for_each(ele, chans) {
-		chan = list_entry(ele, struct chan, list);
-		if (chan->primary && chan->output)
-			return chan->fd;
-	}
-	return -1;
-}
-
 void chan_interrupt(struct list_head *chans, struct delayed_work *task,
 		    struct tty_struct *tty, int irq)
 {
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index 2c898c4d6b6a..10b86e1cc659 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -304,7 +304,7 @@ int line_ioctl(struct tty_struct *tty, struct file * file,
 				break;
 		if (i == ARRAY_SIZE(tty_ioctls)) {
 			printk(KERN_ERR "%s: %s: unknown ioctl: 0x%x\n",
-			       __FUNCTION__, tty->name, cmd);
+			       __func__, tty->name, cmd);
 		}
 		ret = -ENOIOCTLCMD;
 		break;
diff --git a/arch/um/drivers/mcast_kern.c b/arch/um/drivers/mcast_kern.c
index 822092f149be..8c4378a76d63 100644
--- a/arch/um/drivers/mcast_kern.c
+++ b/arch/um/drivers/mcast_kern.c
@@ -58,7 +58,7 @@ static const struct net_kern_info mcast_kern_info = {
 	.write			= mcast_write,
 };
 
-int mcast_setup(char *str, char **mac_out, void *data)
+static int mcast_setup(char *str, char **mac_out, void *data)
 {
 	struct mcast_init *init = data;
 	char *port_str = NULL, *ttl_str = NULL, *remain;
diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
index 13af2f03ed84..f8cf4c8bedef 100644
--- a/arch/um/drivers/mconsole_user.c
+++ b/arch/um/drivers/mconsole_user.c
@@ -39,7 +39,7 @@ static struct mconsole_command commands[] = {
 /* Initialized in mconsole_init, which is an initcall */
 char mconsole_socket_name[256];
 
-int mconsole_reply_v0(struct mc_request *req, char *reply)
+static int mconsole_reply_v0(struct mc_request *req, char *reply)
 {
 	struct iovec iov;
 	struct msghdr msg;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 1d43bdfc20c4..5b4ca8d93682 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -116,7 +116,7 @@ static void uml_dev_close(struct work_struct *work)
 	dev_close(lp->dev);
 }
 
-irqreturn_t uml_net_interrupt(int irq, void *dev_id)
+static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
 	struct uml_net_private *lp = dev->priv;
@@ -296,7 +296,7 @@ static struct ethtool_ops uml_net_ethtool_ops = {
 	.get_link	= ethtool_op_get_link,
 };
 
-void uml_net_user_timer_expire(unsigned long _conn)
+static void uml_net_user_timer_expire(unsigned long _conn)
 {
 #ifdef undef
 	struct connection *conn = (struct connection *)_conn;
@@ -786,7 +786,7 @@ static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
 }
 
 /* uml_net_init shouldn't be called twice on two CPUs at the same time */
-struct notifier_block uml_inetaddr_notifier = {
+static struct notifier_block uml_inetaddr_notifier = {
 	.notifier_call		= uml_inetaddr_event,
 };
 
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index addd75902656..d269ca387f10 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -153,7 +153,7 @@ struct port_pre_exec_data {
 	int pipe_fd;
 };
 
-void port_pre_exec(void *arg)
+static void port_pre_exec(void *arg)
 {
 	struct port_pre_exec_data *data = arg;
 
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index 6b4a0f9e38de..d19faec7046e 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -13,7 +13,7 @@ struct slip_init {
 	char *gate_addr;
 };
 
-void slip_init(struct net_device *dev, void *data)
+static void slip_init(struct net_device *dev, void *data)
 {
 	struct uml_net_private *private;
 	struct slip_data *spri;
@@ -57,7 +57,7 @@ static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
 			       (struct slip_data *) &lp->user);
 }
 
-const struct net_kern_info slip_kern_info = {
+static const struct net_kern_info slip_kern_info = {
 	.init			= slip_init,
 	.protocol		= slip_protocol,
 	.read			= slip_read,
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index cec0c33cdd39..49266f6108c4 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -34,7 +34,7 @@
 
 static struct tty_driver *console_driver;
 
-void stdio_announce(char *dev_name, int dev)
+static void stdio_announce(char *dev_name, int dev)
 {
 	printk(KERN_INFO "Virtual console %d assigned device '%s'\n", dev,
 	       dev_name);
@@ -158,7 +158,7 @@ static struct console stdiocons = {
 	.index		= -1,
 };
 
-int stdio_init(void)
+static int stdio_init(void)
 {
 	char *new_title;
 
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index be3a2797dac4..5e45e39a8a8d 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -72,18 +72,6 @@ struct io_thread_req {
 	int error;
 };
 
-extern int open_ubd_file(char *file, struct openflags *openflags, int shared,
-			 char **backing_file_out, int *bitmap_offset_out,
-			 unsigned long *bitmap_len_out, int *data_offset_out,
-			 int *create_cow_out);
-extern int create_cow_file(char *cow_file, char *backing_file,
-			   struct openflags flags, int sectorsize,
-			   int alignment, int *bitmap_offset_out,
-			   unsigned long *bitmap_len_out,
-			   int *data_offset_out);
-extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
-extern void do_io(struct io_thread_req *req);
-
 static inline int ubd_test_bit(__u64 bit, unsigned char *data)
 {
 	__u64 n;
@@ -200,7 +188,7 @@ struct ubd {
 }
 
 /* Protected by ubd_lock */
-struct ubd ubd_devs[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
+static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
 
 /* Only changed by fake_ide_setup which is a setup */
 static int fake_ide = 0;
@@ -463,7 +451,7 @@ __uml_help(udb_setup,
 static void do_ubd_request(struct request_queue * q);
 
 /* Only changed by ubd_init, which is an initcall. */
-int thread_fd = -1;
+static int thread_fd = -1;
 
 static void ubd_end_request(struct request *req, int bytes, int error)
 {
@@ -531,7 +519,7 @@ static irqreturn_t ubd_intr(int irq, void *dev)
 /* Only changed by ubd_init, which is an initcall. */
 static int io_pid = -1;
 
-void kill_io_thread(void)
+static void kill_io_thread(void)
 {
 	if(io_pid != -1)
 		os_kill_process(io_pid, 1);
@@ -547,6 +535,192 @@ static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
 	return os_file_size(file, size_out);
 }
 
+static int read_cow_bitmap(int fd, void *buf, int offset, int len)
+{
+	int err;
+
+	err = os_seek_file(fd, offset);
+	if (err < 0)
+		return err;
+
+	err = os_read_file(fd, buf, len);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+{
+	unsigned long modtime;
+	unsigned long long actual;
+	int err;
+
+	err = os_file_modtime(file, &modtime);
+	if (err < 0) {
+		printk(KERN_ERR "Failed to get modification time of backing "
+		       "file \"%s\", err = %d\n", file, -err);
+		return err;
+	}
+
+	err = os_file_size(file, &actual);
+	if (err < 0) {
+		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
+		       "err = %d\n", file, -err);
+		return err;
+	}
+
+	if (actual != size) {
+		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
+		 * the typecast.*/
+		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
+		       "vs backing file\n", (unsigned long long) size, actual);
+		return -EINVAL;
+	}
+	if (modtime != mtime) {
+		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
+		       "backing file\n", mtime, modtime);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
+{
+	struct uml_stat buf1, buf2;
+	int err;
+
+	if (from_cmdline == NULL)
+		return 0;
+	if (!strcmp(from_cmdline, from_cow))
+		return 0;
+
+	err = os_stat_file(from_cmdline, &buf1);
+	if (err < 0) {
+		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
+		       -err);
+		return 0;
+	}
+	err = os_stat_file(from_cow, &buf2);
+	if (err < 0) {
+		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
+		       -err);
+		return 1;
+	}
+	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
+		return 0;
+
+	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
+	       "\"%s\" specified in COW header of \"%s\"\n",
+	       from_cmdline, from_cow, cow);
+	return 1;
+}
+
+static int open_ubd_file(char *file, struct openflags *openflags, int shared,
+		  char **backing_file_out, int *bitmap_offset_out,
+		  unsigned long *bitmap_len_out, int *data_offset_out,
+		  int *create_cow_out)
+{
+	time_t mtime;
+	unsigned long long size;
+	__u32 version, align;
+	char *backing_file;
+	int fd, err, sectorsize, asked_switch, mode = 0644;
+
+	fd = os_open_file(file, *openflags, mode);
+	if (fd < 0) {
+		if ((fd == -ENOENT) && (create_cow_out != NULL))
+			*create_cow_out = 1;
+		if (!openflags->w ||
+		    ((fd != -EROFS) && (fd != -EACCES)))
+			return fd;
+		openflags->w = 0;
+		fd = os_open_file(file, *openflags, mode);
+		if (fd < 0)
+			return fd;
+	}
+
+	if (shared)
+		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
+	else {
+		err = os_lock_file(fd, openflags->w);
+		if (err < 0) {
+			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
+			       file, -err);
+			goto out_close;
+		}
+	}
+
+	/* Successful return case! */
+	if (backing_file_out == NULL)
+		return fd;
+
+	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
+			      &size, &sectorsize, &align, bitmap_offset_out);
+	if (err && (*backing_file_out != NULL)) {
+		printk(KERN_ERR "Failed to read COW header from COW file "
+		       "\"%s\", errno = %d\n", file, -err);
+		goto out_close;
+	}
+	if (err)
+		return fd;
+
+	asked_switch = path_requires_switch(*backing_file_out, backing_file,
+					    file);
+
+	/* Allow switching only if no mismatch. */
+	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
+						   mtime)) {
+		printk(KERN_ERR "Switching backing file to '%s'\n",
+		       *backing_file_out);
+		err = write_cow_header(file, fd, *backing_file_out,
+				       sectorsize, align, &size);
+		if (err) {
+			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
+			goto out_close;
+		}
+	} else {
+		*backing_file_out = backing_file;
+		err = backing_file_mismatch(*backing_file_out, size, mtime);
+		if (err)
+			goto out_close;
+	}
+
+	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
+		  bitmap_len_out, data_offset_out);
+
+	return fd;
+ out_close:
+	os_close_file(fd);
+	return err;
+}
+
+static int create_cow_file(char *cow_file, char *backing_file,
+		    struct openflags flags,
+		    int sectorsize, int alignment, int *bitmap_offset_out,
+		    unsigned long *bitmap_len_out, int *data_offset_out)
+{
+	int err, fd;
+
+	flags.c = 1;
+	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
+	if (fd < 0) {
+		err = fd;
+		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
+		       cow_file, -err);
+		goto out;
+	}
+
+	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
+			    bitmap_offset_out, bitmap_len_out,
+			    data_offset_out);
+	if (!err)
+		return fd;
+	os_close_file(fd);
+ out:
+	return err;
+}
+
 static void ubd_close_dev(struct ubd *ubd_dev)
 {
 	os_close_file(ubd_dev->fd);
@@ -1166,185 +1340,6 @@ static int ubd_ioctl(struct inode * inode, struct file * file,
 	return -EINVAL;
 }
 
-static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
-{
-	struct uml_stat buf1, buf2;
-	int err;
-
-	if(from_cmdline == NULL)
-		return 0;
-	if(!strcmp(from_cmdline, from_cow))
-		return 0;
-
-	err = os_stat_file(from_cmdline, &buf1);
-	if(err < 0){
-		printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
-		return 0;
-	}
-	err = os_stat_file(from_cow, &buf2);
-	if(err < 0){
-		printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
-		return 1;
-	}
-	if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
-		return 0;
-
-	printk("Backing file mismatch - \"%s\" requested,\n"
-	       "\"%s\" specified in COW header of \"%s\"\n",
-	       from_cmdline, from_cow, cow);
-	return 1;
-}
-
-static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
-{
-	unsigned long modtime;
-	unsigned long long actual;
-	int err;
-
-	err = os_file_modtime(file, &modtime);
-	if(err < 0){
-		printk("Failed to get modification time of backing file "
-		       "\"%s\", err = %d\n", file, -err);
-		return err;
-	}
-
-	err = os_file_size(file, &actual);
-	if(err < 0){
-		printk("Failed to get size of backing file \"%s\", "
-		       "err = %d\n", file, -err);
-		return err;
-	}
-
-	if(actual != size){
-		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
-		 * the typecast.*/
-		printk("Size mismatch (%llu vs %llu) of COW header vs backing "
-		       "file\n", (unsigned long long) size, actual);
-		return -EINVAL;
-	}
-	if(modtime != mtime){
-		printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
-		       "file\n", mtime, modtime);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int read_cow_bitmap(int fd, void *buf, int offset, int len)
-{
-	int err;
-
-	err = os_seek_file(fd, offset);
-	if(err < 0)
-		return err;
-
-	err = os_read_file(fd, buf, len);
-	if(err < 0)
-		return err;
-
-	return 0;
-}
-
-int open_ubd_file(char *file, struct openflags *openflags, int shared,
-		  char **backing_file_out, int *bitmap_offset_out,
-		  unsigned long *bitmap_len_out, int *data_offset_out,
-		  int *create_cow_out)
-{
-	time_t mtime;
-	unsigned long long size;
-	__u32 version, align;
-	char *backing_file;
-	int fd, err, sectorsize, asked_switch, mode = 0644;
-
-	fd = os_open_file(file, *openflags, mode);
-	if (fd < 0) {
-		if ((fd == -ENOENT) && (create_cow_out != NULL))
-			*create_cow_out = 1;
-		if (!openflags->w ||
-		    ((fd != -EROFS) && (fd != -EACCES)))
-			return fd;
-		openflags->w = 0;
-		fd = os_open_file(file, *openflags, mode);
-		if (fd < 0)
-			return fd;
-	}
-
-	if(shared)
-		printk("Not locking \"%s\" on the host\n", file);
-	else {
-		err = os_lock_file(fd, openflags->w);
-		if(err < 0){
-			printk("Failed to lock '%s', err = %d\n", file, -err);
-			goto out_close;
-		}
-	}
-
-	/* Successful return case! */
-	if(backing_file_out == NULL)
-		return fd;
-
-	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
-			      &size, &sectorsize, &align, bitmap_offset_out);
-	if(err && (*backing_file_out != NULL)){
-		printk("Failed to read COW header from COW file \"%s\", "
-		       "errno = %d\n", file, -err);
-		goto out_close;
-	}
-	if(err)
-		return fd;
-
-	asked_switch = path_requires_switch(*backing_file_out, backing_file, file);
-
-	/* Allow switching only if no mismatch. */
-	if (asked_switch && !backing_file_mismatch(*backing_file_out, size, mtime)) {
-		printk("Switching backing file to '%s'\n", *backing_file_out);
-		err = write_cow_header(file, fd, *backing_file_out,
-				       sectorsize, align, &size);
-		if (err) {
-			printk("Switch failed, errno = %d\n", -err);
-			goto out_close;
-		}
-	} else {
-		*backing_file_out = backing_file;
-		err = backing_file_mismatch(*backing_file_out, size, mtime);
-		if (err)
-			goto out_close;
-	}
-
-	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
-		  bitmap_len_out, data_offset_out);
-
-	return fd;
- out_close:
-	os_close_file(fd);
-	return err;
-}
-
-int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
-		    int sectorsize, int alignment, int *bitmap_offset_out,
-		    unsigned long *bitmap_len_out, int *data_offset_out)
-{
-	int err, fd;
-
-	flags.c = 1;
-	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
-	if(fd < 0){
-		err = fd;
-		printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
-		       -err);
-		goto out;
-	}
-
-	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
-			    bitmap_offset_out, bitmap_len_out,
-			    data_offset_out);
-	if(!err)
-		return fd;
-	os_close_file(fd);
- out:
-	return err;
-}
-
 static int update_bitmap(struct io_thread_req *req)
 {
 	int n;
@@ -1369,7 +1364,7 @@ static int update_bitmap(struct io_thread_req *req)
 	return 0;
 }
 
-void do_io(struct io_thread_req *req)
+static void do_io(struct io_thread_req *req)
 {
 	char *buf;
 	unsigned long len;
diff --git a/arch/um/include/chan_kern.h b/arch/um/include/chan_kern.h
index 624b5100a3cd..1e651457e049 100644
--- a/arch/um/include/chan_kern.h
+++ b/arch/um/include/chan_kern.h
@@ -31,7 +31,6 @@ extern void chan_interrupt(struct list_head *chans, struct delayed_work *task,
 			   struct tty_struct *tty, int irq);
 extern int parse_chan_pair(char *str, struct line *line, int device,
 			   const struct chan_opts *opts, char **error_out);
-extern int open_chan(struct list_head *chans);
 extern int write_chan(struct list_head *chans, const char *buf, int len,
 			     int write_irq);
 extern int console_write_chan(struct list_head *chans, const char *buf, 
@@ -45,7 +44,6 @@ extern void close_chan(struct list_head *chans, int delay_free_irq);
 extern int chan_window_size(struct list_head *chans, 
 			     unsigned short *rows_out, 
 			     unsigned short *cols_out);
-extern int chan_out_fd(struct list_head *chans);
 extern int chan_config_string(struct list_head *chans, char *str, int size,
 			      char **error_out);
 
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index a6c1dd1cf5a1..56deed623446 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -115,7 +115,7 @@ static int have_root __initdata = 0;
 /* Set in uml_mem_setup and modified in linux_main */
 long long physmem_size = 32 * 1024 * 1024;
 
-static char *usage_string =
+static const char *usage_string =
 "User Mode Linux v%s\n"
 "	available at http://user-mode-linux.sourceforge.net/\n\n";
 
@@ -202,7 +202,7 @@ static void __init uml_checksetup(char *line, int *add)
 
 	p = &__uml_setup_start;
 	while (p < &__uml_setup_end) {
-		int n;
+		size_t n;
 
 		n = strlen(p->str);
 		if (!strncmp(line, p->str, n) && p->setup_func(line + n, add))
@@ -258,7 +258,8 @@ int __init linux_main(int argc, char **argv)
 {
 	unsigned long avail, diff;
 	unsigned long virtmem_size, max_physmem;
-	unsigned int i, add;
+	unsigned int i;
+	int add;
 	char * mode;
 
 	for (i = 1; i < argc; i++) {
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index f4bd349d4412..f25c29a12d00 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -14,6 +14,7 @@
 #include "os.h"
 #include "um_malloc.h"
 #include "user.h"
+#include <linux/limits.h>
 
 struct helper_data {
 	void (*pre_exec)(void*);
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index b616e15638fb..997d01944f91 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -25,15 +25,15 @@
 #include "registers.h"
 #include "skas_ptrace.h"
 
-static int ptrace_child(void)
+static void ptrace_child(void)
 {
 	int ret;
 	/* Calling os_getpid because some libcs cached getpid incorrectly */
 	int pid = os_getpid(), ppid = getppid();
 	int sc_result;
 
-	change_sig(SIGWINCH, 0);
-	if (ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
+	if (change_sig(SIGWINCH, 0) < 0 ||
+	    ptrace(PTRACE_TRACEME, 0, 0, 0) < 0) {
 		perror("ptrace");
 		kill(pid, SIGKILL);
 	}
@@ -75,9 +75,8 @@ static void fatal(char *fmt, ...)
 	va_list list;
 
 	va_start(list, fmt);
-	vprintf(fmt, list);
+	vfprintf(stderr, fmt, list);
 	va_end(list);
-	fflush(stdout);
 
 	exit(1);
 }
@@ -87,9 +86,8 @@ static void non_fatal(char *fmt, ...)
 	va_list list;
 
 	va_start(list, fmt);
-	vprintf(fmt, list);
+	vfprintf(stderr, fmt, list);
 	va_end(list);
-	fflush(stdout);
 }
 
 static int start_ptraced_child(void)
@@ -495,7 +493,7 @@ int __init parse_iomem(char *str, int *add)
 	driver = str;
 	file = strchr(str,',');
 	if (file == NULL) {
-		printf("parse_iomem : failed to parse iomem\n");
+		fprintf(stderr, "parse_iomem : failed to parse iomem\n");
 		goto out;
 	}
 	*file = '\0';
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
index 48d211b3d9a1..ccb49b0aff59 100644
--- a/arch/um/os-Linux/sys-i386/task_size.c
+++ b/arch/um/os-Linux/sys-i386/task_size.c
@@ -88,7 +88,10 @@ unsigned long os_get_task_size(void)
 	sa.sa_handler = segfault;
 	sigemptyset(&sa.sa_mask);
 	sa.sa_flags = SA_NODEFER;
-	sigaction(SIGSEGV, &sa, &old);
+	if (sigaction(SIGSEGV, &sa, &old)) {
+		perror("os_get_task_size");
+		exit(1);
+	}
 
 	if (!page_ok(bottom)) {
 		fprintf(stderr, "Address 0x%x no good?\n",
@@ -110,11 +113,12 @@ unsigned long os_get_task_size(void)
 
 out:
 	/* Restore the old SIGSEGV handling */
-	sigaction(SIGSEGV, &old, NULL);
-
+	if (sigaction(SIGSEGV, &old, NULL)) {
+		perror("os_get_task_size");
+		exit(1);
+	}
 	top <<= UM_KERN_PAGE_SHIFT;
 	printf("0x%x\n", top);
-	fflush(stdout);
 
 	return top;
 }
diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
index 964dc1a04c37..598b5c1903af 100644
--- a/arch/um/sys-i386/Makefile
+++ b/arch/um/sys-i386/Makefile
@@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \
 	ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \
 	sys_call_table.o tls.o
 
-subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o
+subarch-obj-y = lib/semaphore_32.o lib/string_32.o
 subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o
 
diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile
index 3c22de532088..c8b4cce9cfe1 100644
--- a/arch/um/sys-x86_64/Makefile
+++ b/arch/um/sys-x86_64/Makefile
@@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \
 
 obj-$(CONFIG_MODULES) += um_module.o
 
-subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
+subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o
 subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o
 
 ldt-y = ../sys-i386/ldt.o
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4d350b5cbc71..a12dbb2b93f3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -86,9 +86,6 @@ config GENERIC_GPIO
 config ARCH_MAY_HAVE_PC_FDC
 	def_bool y
 
-config DMI
-	def_bool y
-
 config RWSEM_GENERIC_SPINLOCK
 	def_bool !X86_XADD
 
@@ -114,6 +111,9 @@ config GENERIC_TIME_VSYSCALL
 config ARCH_HAS_CPU_RELAX
 	def_bool y
 
+config ARCH_HAS_CACHE_LINE_SIZE
+	def_bool y
+
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool X86_64 || (X86_SMP && !X86_VOYAGER)
 
@@ -142,6 +142,9 @@ config AUDIT_ARCH
 config ARCH_SUPPORTS_AOUT
 	def_bool y
 
+config ARCH_SUPPORTS_OPTIMIZED_INLINING
+	def_bool y
+
 # Use the generic interrupt handling code in kernel/irq/:
 config GENERIC_HARDIRQS
 	bool
@@ -370,6 +373,25 @@ config VMI
 	  at the moment), by linking the kernel to a GPL-ed ROM module
 	  provided by the hypervisor.
 
+config KVM_CLOCK
+	bool "KVM paravirtualized clock"
+	select PARAVIRT
+	depends on !(X86_VISWS || X86_VOYAGER)
+	help
+	  Turning on this option will allow you to run a paravirtualized clock
+	  when running over the KVM hypervisor. Instead of relying on a PIT
+	  (or probably other) emulation by the underlying device model, the host
+	  provides the guest with timing infrastructure such as time of day, and
+	  system time
+
+config KVM_GUEST
+	bool "KVM Guest support"
+	select PARAVIRT
+	depends on !(X86_VISWS || X86_VOYAGER)
+	help
+	 This option enables various optimizations for running under the KVM
+	 hypervisor.
+
 source "arch/x86/lguest/Kconfig"
 
 config PARAVIRT
@@ -460,6 +482,15 @@ config HPET_EMULATE_RTC
 
 # Mark as embedded because too many people got it wrong.
 # The code disables itself when not needed.
+config DMI
+	default y
+	bool "Enable DMI scanning" if EMBEDDED
+	help
+	  Enabled scanning of DMI to identify machine quirks. Say Y
+	  here unless you have verified that your setup is not
+	  affected by entries in the DMI blacklist. Required by PNP
+	  BIOS code.
+
 config GART_IOMMU
 	bool "GART IOMMU support" if EMBEDDED
 	default y
@@ -1049,9 +1080,9 @@ config MTRR
 	  See <file:Documentation/mtrr.txt> for more information.
 
 config X86_PAT
-	def_bool y
+	bool
 	prompt "x86 PAT support"
-	depends on MTRR && NONPROMISC_DEVMEM
+	depends on MTRR
 	help
 	  Use PAT attributes to setup page level cache control.
 
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 57072f2716f9..7ef18b01f0bc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -21,8 +21,8 @@ config M386
 
 	  Here are the settings recommended for greatest speed:
 	  - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
-	  486DLC/DLC2, UMC 486SX-S and NexGen Nx586.  Only "386" kernels
-	  will run on a 386 class machine.
+	  486DLC/DLC2, and UMC 486SX-S.  Only "386" kernels will run on a 386
+	  class machine.
 	  - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
 	  SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
 	  - "586" for generic Pentium CPUs lacking the TSC
@@ -278,6 +278,11 @@ config GENERIC_CPU
 
 endchoice
 
+config X86_CPU
+	def_bool y
+	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_FIND_NEXT_BIT
+
 config X86_GENERIC
 	bool "Generic x86 support"
 	depends on X86_32
@@ -398,7 +403,7 @@ config X86_TSC
 # generates cmov.
 config X86_CMOV
 	def_bool y
-	depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
+	depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64)
 
 config X86_MINIMUM_CPU_FAMILY
 	int
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 610aaecc19f8..5b1979a45a1e 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,6 +5,17 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
+config NONPROMISC_DEVMEM
+	bool "Disable promiscuous /dev/mem"
+	help
+	  The /dev/mem file by default only allows userspace access to PCI
+	  space and the BIOS code and data regions. This is sufficient for
+	  dosemu and X and all common users of /dev/mem. With this config
+	  option, you allow userspace access to all of memory, including
+	  kernel and userspace memory. Accidental access to this is
+	  obviously disasterous, but specific access can be used by people
+	  debugging the kernel.
+
 config EARLY_PRINTK
 	bool "Early printk" if EMBEDDED
 	default y
@@ -246,3 +257,16 @@ config CPA_DEBUG
 	  Do change_page_attr() self-tests every 30 seconds.
 
 endmenu
+
+config OPTIMIZE_INLINING
+	bool "Allow gcc to uninline functions marked 'inline'"
+	default y
+	help
+	  This option determines if the kernel forces gcc to inline the functions
+	  developers have marked 'inline'. Doing so takes away freedom from gcc to
+	  do what it thinks is best, which is desirable for the gcc 3.x series of
+	  compilers. The gcc 4.x series have a rewritten inlining algorithm and
+	  disabling this option will generate a smaller kernel there. Hopefully
+	  this algorithm is so good that allowing gcc4 to make the decision can
+	  become the default in the future, until then this option is there to
+	  test gcc for this.
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index b1bdc4c6f9f2..172cf8a98bdd 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -1,7 +1,8 @@
 bootsect
 bzImage
+cpustr.h
+mkcpustr
+offsets.h
 setup
 setup.bin
 setup.elf
-cpustr.h
-mkcpustr
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 6d2df8d61c54..af86e431acfa 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -120,7 +120,7 @@ _start:
 	# Part 2 of the header, from the old setup.S
 
 		.ascii	"HdrS"		# header signature
-		.word	0x0208		# header version number (>= 0x0105)
+		.word	0x0209		# header version number (>= 0x0105)
 					# or else old loadlin-1.5 will fail)
 		.globl realmode_swtch
 realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
@@ -227,6 +227,10 @@ hardware_subarch_data:	.quad 0
 payload_offset:		.long input_data
 payload_length:		.long input_data_end-input_data
 
+setup_data:		.quad 0			# 64-bit physical pointer to
+						# single linked list of
+						# struct setup_data
+
 # End of setup header #####################################################
 
 	.section ".inittext", "ax"
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 3df340b54e57..ad7ddaaff588 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,6 +1421,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
+CONFIG_OPTIMIZE_INLINING=y
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index eef98cb00c62..2d6f5b2809d2 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,6 +1346,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_VM is not set
 # CONFIG_DEBUG_LIST is not set
 # CONFIG_FRAME_POINTER is not set
+CONFIG_OPTIMIZE_INLINING=y
 # CONFIG_RCU_TORTURE_TEST is not set
 # CONFIG_LKDTM is not set
 # CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 05e155d3fb6c..bbed3a26ce55 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -499,11 +499,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
 
-	set_fs(USER_DS);
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 #if DEBUG_SIG
 	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
 	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
@@ -599,11 +594,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->cs = __USER32_CS;
 	regs->ss = __USER32_DS;
 
-	set_fs(USER_DS);
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 #if DEBUG_SIG
 	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
 	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ae7158bce4d6..b5e329da166c 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -430,7 +430,7 @@ ia32_sys_call_table:
 	.quad sys_setuid16
 	.quad sys_getuid16
 	.quad compat_sys_stime	/* stime */		/* 25 */
-	.quad sys32_ptrace	/* ptrace */
+	.quad compat_sys_ptrace	/* ptrace */
 	.quad sys_alarm
 	.quad sys_fstat	/* (old)fstat */
 	.quad sys_pause
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 90e092d0af0c..fa19c3819540 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
 obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 
 obj-$(CONFIG_VMI)		+= vmi_32.o vmiclock_32.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o
+obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 
 ifdef CONFIG_INPUT_PCSPKR
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 057ccf1d5ad4..977ed5cdeaa3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 #define HPET_RESOURCE_NAME_SIZE 9
 	hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
 
-	if (!hpet_res)
-		return 0;
-
-	memset(hpet_res, 0, sizeof(*hpet_res));
 	hpet_res->name = (void *)&hpet_res[1];
 	hpet_res->flags = IORESOURCE_MEM;
 	snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore
new file mode 100644
index 000000000000..58f1f48a58f8
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/.gitignore
@@ -0,0 +1,3 @@
+wakeup.bin
+wakeup.elf
+wakeup.lds
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df4099dc1c68..65c7857a90dd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -511,31 +511,30 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 	unsigned long flags;
 	char *vaddr;
 	int nr_pages = 2;
+	struct page *pages[2];
+	int i;
 
-	BUG_ON(len > sizeof(long));
-	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
-		- ((long)addr & ~(sizeof(long) - 1)));
-	if (kernel_text_address((unsigned long)addr)) {
-		struct page *pages[2] = { virt_to_page(addr),
-			virt_to_page(addr + PAGE_SIZE) };
-		if (!pages[1])
-			nr_pages = 1;
-		vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-		BUG_ON(!vaddr);
-		local_irq_save(flags);
-		memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
-		local_irq_restore(flags);
-		vunmap(vaddr);
+	if (!core_kernel_text((unsigned long)addr)) {
+		pages[0] = vmalloc_to_page(addr);
+		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 	} else {
-		/*
-		 * modules are in vmalloc'ed memory, always writable.
-		 */
-		local_irq_save(flags);
-		memcpy(addr, opcode, len);
-		local_irq_restore(flags);
+		pages[0] = virt_to_page(addr);
+		WARN_ON(!PageReserved(pages[0]));
+		pages[1] = virt_to_page(addr + PAGE_SIZE);
 	}
+	BUG_ON(!pages[0]);
+	if (!pages[1])
+		nr_pages = 1;
+	vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+	BUG_ON(!vaddr);
+	local_irq_save(flags);
+	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+	local_irq_restore(flags);
+	vunmap(vaddr);
 	sync_core();
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
+	for (i = 0; i < len; i++)
+		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
 	return addr;
 }
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 8317401170b8..4b99b1bdeb6c 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void)
 	}
 
 	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32);
+	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+				       lapic_clockevent.shift);
 	lapic_clockevent.max_delta_ns =
 		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
 	lapic_clockevent.min_delta_ns =
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index bf83157337e4..5910020c3f24 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void)
 		result / 1000 / 1000, result / 1000 % 1000);
 
 	/* Calculate the scaled math multiplication factor */
-	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32);
+	lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC,
+				       lapic_clockevent.shift);
 	lapic_clockevent.max_delta_ns =
 		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
 	lapic_clockevent.min_delta_ns =
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index f0030a0999c7..bf9290e29013 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -904,6 +904,7 @@ recalc:
 			original_pm_idle();
 		else
 			default_idle();
+		local_irq_disable();
 		jiffies_since_last_check = jiffies - last_jiffies;
 		if (jiffies_since_last_check > idle_period)
 			goto recalc;
@@ -911,6 +912,8 @@ recalc:
 
 	if (apm_idle_done)
 		apm_do_busy();
+
+	local_irq_enable();
 }
 
 /**
@@ -1189,19 +1192,6 @@ static int suspend(int vetoable)
 	int err;
 	struct apm_user	*as;
 
-	if (pm_send_all(PM_SUSPEND, (void *)3)) {
-		/* Vetoed */
-		if (vetoable) {
-			if (apm_info.connection_version > 0x100)
-				set_system_power_state(APM_STATE_REJECT);
-			err = -EBUSY;
-			ignore_sys_suspend = 0;
-			printk(KERN_WARNING "apm: suspend was vetoed.\n");
-			goto out;
-		}
-		printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
-	}
-
 	device_suspend(PMSG_SUSPEND);
 	local_irq_disable();
 	device_power_down(PMSG_SUSPEND);
@@ -1224,9 +1214,7 @@ static int suspend(int vetoable)
 	device_power_up();
 	local_irq_enable();
 	device_resume();
-	pm_send_all(PM_RESUME, (void *)0);
 	queue_event(APM_NORMAL_RESUME, NULL);
- out:
 	spin_lock(&user_list_lock);
 	for (as = user_list; as != NULL; as = as->next) {
 		as->suspend_wait = 0;
@@ -1337,7 +1325,6 @@ static void check_events(void)
 			if ((event != APM_NORMAL_RESUME)
 			    || (ignore_normal_resume == 0)) {
 				device_resume();
-				pm_send_all(PM_RESUME, (void *)0);
 				queue_event(event, NULL);
 			}
 			ignore_normal_resume = 0;
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee7c45235e54..a0c6f8190887 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32)	+= cyrix.o
 obj-$(CONFIG_X86_32)	+= centaur.o
 obj-$(CONFIG_X86_32)	+= transmeta.o
 obj-$(CONFIG_X86_32)	+= intel.o
-obj-$(CONFIG_X86_32)	+= nexgen.o
 obj-$(CONFIG_X86_32)	+= umc.o
 
 obj-$(CONFIG_X86_MCE)	+= mcheck/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 0173065dc3b7..245866828294 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
 	.c_size_cache	= amd_size_cache,
 };
 
-int __init amd_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
-	return 0;
-}
-
 cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index e2d870de837c..8db8f73503b3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -339,6 +339,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 {
 	struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu);
 	unsigned int freq;
+	unsigned int cached_freq;
 
 	dprintk("get_cur_freq_on_cpu (%d)\n", cpu);
 
@@ -347,7 +348,16 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
 		return 0;
 	}
 
+	cached_freq = data->freq_table[data->acpi_data->state].frequency;
 	freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data);
+	if (freq != cached_freq) {
+		/*
+		 * The dreaded BIOS frequency change behind our back.
+		 * Force set the frequency on next target call.
+		 */
+		data->resume = 1;
+	}
+
 	dprintk("cur freq = %u\n", freq);
 
 	return freq;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 9a699ed03598..e07e8c068ae0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -49,7 +49,7 @@ static int banks;
 static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
 static unsigned long notify_user;
 static int rip_msr;
-static int mce_bootlog = 1;
+static int mce_bootlog = -1;
 static atomic_t mce_events;
 
 static char trigger[128];
@@ -471,13 +471,15 @@ static void mce_init(void *dummy)
 static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 {
 	/* This should be disabled by the BIOS, but isn't always */
-	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
-		/* disable GART TBL walk error reporting, which trips off
-		   incorrectly with the IOMMU & 3ware & Cerberus. */
-		clear_bit(10, &bank[4]);
-		/* Lots of broken BIOS around that don't clear them
-		   by default and leave crap in there. Don't log. */
-		mce_bootlog = 0;
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		if(c->x86 == 15)
+			/* disable GART TBL walk error reporting, which trips off
+			   incorrectly with the IOMMU & 3ware & Cerberus. */
+			clear_bit(10, &bank[4]);
+		if(c->x86 <= 17 && mce_bootlog < 0)
+			/* Lots of broken BIOS around that don't clear them
+			   by default and leave crap in there. Don't log. */
+			mce_bootlog = 0;
 	}
 
 }
diff --git a/arch/x86/kernel/cpu/nexgen.c b/arch/x86/kernel/cpu/nexgen.c
deleted file mode 100644
index 5d5e1c134123..000000000000
--- a/arch/x86/kernel/cpu/nexgen.c
+++ /dev/null
@@ -1,59 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-/*
- *	Detect a NexGen CPU running without BIOS hypercode new enough
- *	to have CPUID. (Thanks to Herbert Oppmann)
- */
-
-static int __cpuinit deep_magic_nexgen_probe(void)
-{
-	int ret;
-
-	__asm__ __volatile__ (
-		"	movw	$0x5555, %%ax\n"
-		"	xorw	%%dx,%%dx\n"
-		"	movw	$2, %%cx\n"
-		"	divw	%%cx\n"
-		"	movl	$0, %%eax\n"
-		"	jnz	1f\n"
-		"	movl	$1, %%eax\n"
-		"1:\n"
-		: "=a" (ret) : : "cx", "dx");
-	return  ret;
-}
-
-static void __cpuinit init_nexgen(struct cpuinfo_x86 *c)
-{
-	c->x86_cache_size = 256; /* A few had 1 MB... */
-}
-
-static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c)
-{
-	/* Detect NexGen with old hypercode */
-	if (deep_magic_nexgen_probe())
-		strcpy(c->x86_vendor_id, "NexGenDriven");
-}
-
-static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
-	.c_vendor	= "Nexgen",
-	.c_ident	= { "NexGenDriven" },
-	.c_models = {
-			{ .vendor = X86_VENDOR_NEXGEN,
-			  .family = 5,
-			  .model_names = { [1] = "Nx586" }
-			},
-	},
-	.c_init		= init_nexgen,
-	.c_identify	= nexgen_identify,
-};
-
-int __init nexgen_init_cpu(void)
-{
-	cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
-	return 0;
-}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index b943e10ad814..f9ae93adffe5 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = {
 	.evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
 };
 
-static struct wd_ops coreduo_wd_ops = {
-	.reserve = single_msr_reserve,
-	.unreserve = single_msr_unreserve,
-	.setup = setup_intel_arch_watchdog,
-	.rearm = p6_rearm,
-	.stop = single_msr_stop_watchdog,
-	.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
-	.evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
-};
-
 static void probe_nmi_watchdog(void)
 {
 	switch (boot_cpu_data.x86_vendor) {
@@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void)
 		/* Work around Core Duo (Yonah) errata AE49 where perfctr1
 		   doesn't have a working enable bit. */
 		if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) {
-			wd_ops = &coreduo_wd_ops;
-			break;
+			intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
+			intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
 		}
 		if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
 			wd_ops = &intel_arch_wd_ops;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 2251d0ae9570..268553817909 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -25,6 +25,7 @@
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
 #include <asm/smp.h>
+#include <asm/reboot.h>
 
 #include <mach_ipi.h>
 
@@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void)
 }
 #endif
 
-void machine_crash_shutdown(struct pt_regs *regs)
+void native_machine_crash_shutdown(struct pt_regs *regs)
 {
 	/* This function is only called after the system
 	 * has panicked or is otherwise in a critical state.
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c
index cbd42e51cb08..645ee5e32a27 100644
--- a/arch/x86/kernel/e820_64.c
+++ b/arch/x86/kernel/e820_64.c
@@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name)
 		strncpy(r->name, name, sizeof(r->name) - 1);
 }
 
-void __init early_res_to_bootmem(void)
+void __init free_early(unsigned long start, unsigned long end)
+{
+	struct early_res *r;
+	int i, j;
+
+	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+		r = &early_res[i];
+		if (start == r->start && end == r->end)
+			break;
+	}
+	if (i >= MAX_EARLY_RES || !early_res[i].end)
+		panic("free_early on not reserved area: %lx-%lx!", start, end);
+
+	for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+		;
+
+	memcpy(&early_res[i], &early_res[i + 1],
+	       (j - 1 - i) * sizeof(struct early_res));
+
+	early_res[j - 1].end = 0;
+}
+
+void __init early_res_to_bootmem(unsigned long start, unsigned long end)
 {
 	int i;
+	unsigned long final_start, final_end;
 	for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
 		struct early_res *r = &early_res[i];
-		printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i,
-			r->start, r->end - 1, r->name);
-		reserve_bootmem_generic(r->start, r->end - r->start);
+		final_start = max(start, r->start);
+		final_end = min(end, r->end);
+		if (final_start >= final_end)
+			continue;
+		printk(KERN_INFO "  early res: %d [%lx-%lx] %s\n", i,
+			final_start, final_end - 1, r->name);
+		reserve_bootmem_generic(final_start, final_end - final_start);
 	}
 }
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0f8934fc303..2a609dc3271c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
 irq_return:
 	INTERRUPT_RETURN
 .section .fixup,"ax"
-iret_exc:
+ENTRY(iret_exc)
 	pushl $0			# no error code
 	pushl $do_iret_error
 	jmp error_code
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 
 #ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+   entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+	RING0_INT_FRAME
+	addl $5*4, %esp		/* remove xen-provided frame */
+	jmp sysenter_past_esp
+
 ENTRY(xen_hypervisor_callback)
 	CFI_STARTPROC
 	pushl $0
@@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback)
 	cmpl $xen_iret_end_crit,%eax
 	jae  1f
 
-	call xen_iret_crit_fixup
+	jmp  xen_iret_crit_fixup
 
+ENTRY(xen_do_upcall)
 1:	mov %esp, %eax
 	call xen_evtchn_do_upcall
 	jmp  ret_from_intr
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 9546ef408b92..021624c83583 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
 	else
 #endif
 
-	if (cpus_weight(cpu_possible_map) <= 8)
+	if (num_possible_cpus() <= 8)
 		genapic = &apic_flat;
 	else
 		genapic = &apic_physflat;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 993c76773256..e25c57b8aa84 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/percpu.h>
 #include <linux/start_kernel.h>
+#include <linux/io.h>
 
 #include <asm/processor.h>
 #include <asm/proto.h>
@@ -22,6 +23,7 @@
 #include <asm/sections.h>
 #include <asm/kdebug.h>
 #include <asm/e820.h>
+#include <asm/bios_ebda.h>
 
 static void __init zap_identity_mappings(void)
 {
@@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
 	}
 }
 
-#define BIOS_EBDA_SEGMENT 0x40E
 #define BIOS_LOWMEM_KILOBYTES 0x413
 
 /*
@@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void)
 	lowmem <<= 10;
 
 	/* start of EBDA area */
-	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
-	ebda_addr <<= 4;
+	ebda_addr = get_bios_ebda();
 
 	/* Fixup: bios puts an EBDA in the top 64K segment */
 	/* of conventional memory, but does not adjust lowmem. */
@@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void)
 	reserve_early(lowmem, 0x100000, "BIOS reserved");
 }
 
+static void __init reserve_setup_data(void)
+{
+	struct setup_data *data;
+	unsigned long pa_data;
+	char buf[32];
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, sizeof(*data));
+		sprintf(buf, "setup data %x", data->type);
+		reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+		pa_data = data->next;
+		early_iounmap(data, sizeof(*data));
+	}
+}
+
 void __init x86_64_start_kernel(char * real_mode_data)
 {
 	int i;
@@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
 #endif
 
 	reserve_ebda_region();
+	reserve_setup_data();
 
 	/*
 	 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 36652ea1a265..9007f9ea64ee 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void)
 	hpet_freq = 1000000000000000ULL;
 	do_div(hpet_freq, hpet_period);
 	hpet_clockevent.mult = div_sc((unsigned long) hpet_freq,
-				      NSEC_PER_SEC, 32);
+				      NSEC_PER_SEC, hpet_clockevent.shift);
 	/* Calculate the min / max delta */
 	hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF,
 							   &hpet_clockevent);
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 8540abe86ade..c1b5e3ece1f2 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -115,7 +115,8 @@ void __init setup_pit_timer(void)
 	 * IO_APIC has been initialized.
 	 */
 	pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
-	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32);
+	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
+				     pit_clockevent.shift);
 	pit_clockevent.max_delta_ns =
 		clockevent_delta2ns(0x7FFF, &pit_clockevent);
 	pit_clockevent.min_delta_ns =
@@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void)
 	    pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
 		return 0;
 
-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20);
+	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
+						   clocksource_pit.shift);
 	return clocksource_register(&clocksource_pit);
 }
 arch_initcall(init_pit_clocksource);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 2e2f42074e18..a40d54fc1fdd 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -2068,7 +2068,7 @@ static void __init setup_nmi(void)
  * cycles as some i82489DX-based boards have glue logic that keeps the
  * 8259A interrupt line asserted until INTA.  --macro
  */
-static inline void unlock_ExtINT_logic(void)
+static inline void __init unlock_ExtINT_logic(void)
 {
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
@@ -2444,6 +2444,7 @@ void destroy_irq(unsigned int irq)
 	dynamic_irq_cleanup(irq);
 
 	spin_lock_irqsave(&vector_lock, flags);
+	clear_bit(irq_vector[irq], used_vectors);
 	irq_vector[irq] = 0;
 	spin_unlock_irqrestore(&vector_lock, flags);
 }
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 9ba11d07920f..ef1a8dfcc529 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1599,7 +1599,7 @@ static void __init setup_nmi(void)
  * cycles as some i82489DX-based boards have glue logic that keeps the
  * 8259A interrupt line asserted until INTA.  --macro
  */
-static inline void unlock_ExtINT_logic(void)
+static inline void __init unlock_ExtINT_logic(void)
 {
 	int apic, pin, i;
 	struct IO_APIC_route_entry entry0, entry1;
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 6ea67b76a214..00bda7bcda63 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs)
 			: "=a" (arg1), "=d" (arg2), "=b" (bx)
 			:  "0" (irq),   "1" (desc),  "2" (isp),
 			   "D" (desc->handle_irq)
-			: "memory", "cc"
+			: "memory", "cc", "ecx"
 		);
 	} else
 #endif
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index 73354302fda7..c03205991718 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -6,23 +6,171 @@
  *
  * This file is released under the GPLv2.
  */
-
 #include <linux/debugfs.h>
+#include <linux/uaccess.h>
 #include <linux/stat.h>
 #include <linux/init.h>
+#include <linux/io.h>
+#include <linux/mm.h>
 
 #include <asm/setup.h>
 
 #ifdef CONFIG_DEBUG_BOOT_PARAMS
+struct setup_data_node {
+	u64 paddr;
+	u32 type;
+	u32 len;
+};
+
+static ssize_t
+setup_data_read(struct file *file, char __user *user_buf, size_t count,
+		loff_t *ppos)
+{
+	struct setup_data_node *node = file->private_data;
+	unsigned long remain;
+	loff_t pos = *ppos;
+	struct page *pg;
+	void *p;
+	u64 pa;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= node->len)
+		return 0;
+
+	if (count > node->len - pos)
+		count = node->len - pos;
+	pa = node->paddr + sizeof(struct setup_data) + pos;
+	pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
+	if (PageHighMem(pg)) {
+		p = ioremap_cache(pa, count);
+		if (!p)
+			return -ENXIO;
+	} else {
+		p = __va(pa);
+	}
+
+	remain = copy_to_user(user_buf, p, count);
+
+	if (PageHighMem(pg))
+		iounmap(p);
+
+	if (remain)
+		return -EFAULT;
+
+	*ppos = pos + count;
+
+	return count;
+}
+
+static int setup_data_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static const struct file_operations fops_setup_data = {
+	.read =		setup_data_read,
+	.open =		setup_data_open,
+};
+
+static int __init
+create_setup_data_node(struct dentry *parent, int no,
+		       struct setup_data_node *node)
+{
+	struct dentry *d, *type, *data;
+	char buf[16];
+	int error;
+
+	sprintf(buf, "%d", no);
+	d = debugfs_create_dir(buf, parent);
+	if (!d) {
+		error = -ENOMEM;
+		goto err_return;
+	}
+	type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
+	if (!type) {
+		error = -ENOMEM;
+		goto err_dir;
+	}
+	data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
+	if (!data) {
+		error = -ENOMEM;
+		goto err_type;
+	}
+	return 0;
+
+err_type:
+	debugfs_remove(type);
+err_dir:
+	debugfs_remove(d);
+err_return:
+	return error;
+}
+
+static int __init create_setup_data_nodes(struct dentry *parent)
+{
+	struct setup_data_node *node;
+	struct setup_data *data;
+	int error, no = 0;
+	struct dentry *d;
+	struct page *pg;
+	u64 pa_data;
+
+	d = debugfs_create_dir("setup_data", parent);
+	if (!d) {
+		error = -ENOMEM;
+		goto err_return;
+	}
+
+	pa_data = boot_params.hdr.setup_data;
+
+	while (pa_data) {
+		node = kmalloc(sizeof(*node), GFP_KERNEL);
+		if (!node) {
+			error = -ENOMEM;
+			goto err_dir;
+		}
+		pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
+		if (PageHighMem(pg)) {
+			data = ioremap_cache(pa_data, sizeof(*data));
+			if (!data) {
+				error = -ENXIO;
+				goto err_dir;
+			}
+		} else {
+			data = __va(pa_data);
+		}
+
+		node->paddr = pa_data;
+		node->type = data->type;
+		node->len = data->len;
+		error = create_setup_data_node(d, no, node);
+		pa_data = data->next;
+
+		if (PageHighMem(pg))
+			iounmap(data);
+		if (error)
+			goto err_dir;
+		no++;
+	}
+	return 0;
+
+err_dir:
+	debugfs_remove(d);
+err_return:
+	return error;
+}
+
 static struct debugfs_blob_wrapper boot_params_blob = {
-	.data = &boot_params,
-	.size = sizeof(boot_params),
+	.data		= &boot_params,
+	.size		= sizeof(boot_params),
 };
 
 static int __init boot_params_kdebugfs_init(void)
 {
-	int error;
 	struct dentry *dbp, *version, *data;
+	int error;
 
 	dbp = debugfs_create_dir("boot_params", NULL);
 	if (!dbp) {
@@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void)
 		error = -ENOMEM;
 		goto err_version;
 	}
+	error = create_setup_data_nodes(dbp);
+	if (error)
+		goto err_data;
 	return 0;
+
+err_data:
+	debugfs_remove(data);
 err_version:
 	debugfs_remove(version);
 err_dir:
@@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void)
 
 	return error;
 }
-
 arch_initcall(arch_kdebugfs_init);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
new file mode 100644
index 000000000000..8b7a3cf37d2b
--- /dev/null
+++ b/arch/x86/kernel/kvm.c
@@ -0,0 +1,248 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ * Copyright IBM Corporation, 2007
+ *   Authors: Anthony Liguori <aliguori@us.ibm.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+
+#define MMU_QUEUE_SIZE 1024
+
+struct kvm_para_state {
+	u8 mmu_queue[MMU_QUEUE_SIZE];
+	int mmu_queue_len;
+	enum paravirt_lazy_mode mode;
+};
+
+static DEFINE_PER_CPU(struct kvm_para_state, para_state);
+
+static struct kvm_para_state *kvm_para_state(void)
+{
+	return &per_cpu(para_state, raw_smp_processor_id());
+}
+
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
+static void kvm_mmu_op(void *buffer, unsigned len)
+{
+	int r;
+	unsigned long a1, a2;
+
+	do {
+		a1 = __pa(buffer);
+		a2 = 0;   /* on i386 __pa() always returns <4G */
+		r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
+		buffer += r;
+		len -= r;
+	} while (len);
+}
+
+static void mmu_queue_flush(struct kvm_para_state *state)
+{
+	if (state->mmu_queue_len) {
+		kvm_mmu_op(state->mmu_queue, state->mmu_queue_len);
+		state->mmu_queue_len = 0;
+	}
+}
+
+static void kvm_deferred_mmu_op(void *buffer, int len)
+{
+	struct kvm_para_state *state = kvm_para_state();
+
+	if (state->mode != PARAVIRT_LAZY_MMU) {
+		kvm_mmu_op(buffer, len);
+		return;
+	}
+	if (state->mmu_queue_len + len > sizeof state->mmu_queue)
+		mmu_queue_flush(state);
+	memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len);
+	state->mmu_queue_len += len;
+}
+
+static void kvm_mmu_write(void *dest, u64 val)
+{
+	__u64 pte_phys;
+	struct kvm_mmu_op_write_pte wpte;
+
+#ifdef CONFIG_HIGHPTE
+	struct page *page;
+	unsigned long dst = (unsigned long) dest;
+
+	page = kmap_atomic_to_page(dest);
+	pte_phys = page_to_pfn(page);
+	pte_phys <<= PAGE_SHIFT;
+	pte_phys += (dst & ~(PAGE_MASK));
+#else
+	pte_phys = (unsigned long)__pa(dest);
+#endif
+	wpte.header.op = KVM_MMU_OP_WRITE_PTE;
+	wpte.pte_val = val;
+	wpte.pte_phys = pte_phys;
+
+	kvm_deferred_mmu_op(&wpte, sizeof wpte);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+			   pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+	kvm_mmu_write(pmdp, pmd_val(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte)
+{
+	kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+			  unsigned long addr, pte_t *ptep)
+{
+	kvm_mmu_write(ptep, 0);
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+	kvm_mmu_write(pmdp, 0);
+}
+#endif
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+	kvm_mmu_write(pudp, pud_val(pud));
+}
+
+#if PAGETABLE_LEVELS == 4
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+	kvm_mmu_write(pgdp, pgd_val(pgd));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+	struct kvm_mmu_op_flush_tlb ftlb = {
+		.header.op = KVM_MMU_OP_FLUSH_TLB,
+	};
+
+	kvm_deferred_mmu_op(&ftlb, sizeof ftlb);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+	struct kvm_mmu_op_release_pt rpt = {
+		.header.op = KVM_MMU_OP_RELEASE_PT,
+		.pt_phys = (u64)pfn << PAGE_SHIFT,
+	};
+
+	kvm_mmu_op(&rpt, sizeof rpt);
+}
+
+static void kvm_enter_lazy_mmu(void)
+{
+	struct kvm_para_state *state = kvm_para_state();
+
+	paravirt_enter_lazy_mmu();
+	state->mode = paravirt_get_lazy_mode();
+}
+
+static void kvm_leave_lazy_mmu(void)
+{
+	struct kvm_para_state *state = kvm_para_state();
+
+	mmu_queue_flush(state);
+	paravirt_leave_lazy(paravirt_get_lazy_mode());
+	state->mode = paravirt_get_lazy_mode();
+}
+
+static void paravirt_ops_setup(void)
+{
+	pv_info.name = "KVM";
+	pv_info.paravirt_enabled = 1;
+
+	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
+		pv_cpu_ops.io_delay = kvm_io_delay;
+
+	if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
+		pv_mmu_ops.set_pte = kvm_set_pte;
+		pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+		pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+		pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+		pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+		pv_mmu_ops.pte_clear = kvm_pte_clear;
+		pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+		pv_mmu_ops.set_pud = kvm_set_pud;
+#if PAGETABLE_LEVELS == 4
+		pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+#endif
+		pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+		pv_mmu_ops.release_pte = kvm_release_pt;
+		pv_mmu_ops.release_pmd = kvm_release_pt;
+		pv_mmu_ops.release_pud = kvm_release_pt;
+
+		pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
+		pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
+	}
+}
+
+void __init kvm_guest_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	paravirt_ops_setup();
+}
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
new file mode 100644
index 000000000000..ddee04043aeb
--- /dev/null
+++ b/arch/x86/kernel/kvmclock.c
@@ -0,0 +1,187 @@
+/*  KVM paravirtual clock driver. A clocksource implementation
+    Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include <linux/clocksource.h>
+#include <linux/kvm_para.h>
+#include <asm/arch_hooks.h>
+#include <asm/msr.h>
+#include <asm/apic.h>
+#include <linux/percpu.h>
+#include <asm/reboot.h>
+
+#define KVM_SCALE 22
+
+static int kvmclock = 1;
+
+static int parse_no_kvmclock(char *arg)
+{
+	kvmclock = 0;
+	return 0;
+}
+early_param("no-kvmclock", parse_no_kvmclock);
+
+/* The hypervisor will put information about time periodically here */
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock);
+#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field
+
+static inline u64 kvm_get_delta(u64 last_tsc)
+{
+	int cpu = smp_processor_id();
+	u64 delta = native_read_tsc() - last_tsc;
+	return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
+}
+
+static struct kvm_wall_clock wall_clock;
+static cycle_t kvm_clock_read(void);
+/*
+ * The wallclock is the time of day when we booted. Since then, some time may
+ * have elapsed since the hypervisor wrote the data. So we try to account for
+ * that with system time
+ */
+unsigned long kvm_get_wallclock(void)
+{
+	u32 wc_sec, wc_nsec;
+	u64 delta;
+	struct timespec ts;
+	int version, nsec;
+	int low, high;
+
+	low = (int)__pa(&wall_clock);
+	high = ((u64)__pa(&wall_clock) >> 32);
+
+	delta = kvm_clock_read();
+
+	native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
+	do {
+		version = wall_clock.wc_version;
+		rmb();
+		wc_sec = wall_clock.wc_sec;
+		wc_nsec = wall_clock.wc_nsec;
+		rmb();
+	} while ((wall_clock.wc_version != version) || (version & 1));
+
+	delta = kvm_clock_read() - delta;
+	delta += wc_nsec;
+	nsec = do_div(delta, NSEC_PER_SEC);
+	set_normalized_timespec(&ts, wc_sec + delta, nsec);
+	/*
+	 * Of all mechanisms of time adjustment I've tested, this one
+	 * was the champion!
+	 */
+	return ts.tv_sec + 1;
+}
+
+int kvm_set_wallclock(unsigned long now)
+{
+	return 0;
+}
+
+/*
+ * This is our read_clock function. The host puts an tsc timestamp each time
+ * it updates a new time. Without the tsc adjustment, we can have a situation
+ * in which a vcpu starts to run earlier (smaller system_time), but probes
+ * time later (compared to another vcpu), leading to backwards time
+ */
+static cycle_t kvm_clock_read(void)
+{
+	u64 last_tsc, now;
+	int cpu;
+
+	preempt_disable();
+	cpu = smp_processor_id();
+
+	last_tsc = get_clock(cpu, tsc_timestamp);
+	now = get_clock(cpu, system_time);
+
+	now += kvm_get_delta(last_tsc);
+	preempt_enable();
+
+	return now;
+}
+static struct clocksource kvm_clock = {
+	.name = "kvm-clock",
+	.read = kvm_clock_read,
+	.rating = 400,
+	.mask = CLOCKSOURCE_MASK(64),
+	.mult = 1 << KVM_SCALE,
+	.shift = KVM_SCALE,
+	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static int kvm_register_clock(void)
+{
+	int cpu = smp_processor_id();
+	int low, high;
+	low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
+	high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+
+	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+}
+
+static void kvm_setup_secondary_clock(void)
+{
+	/*
+	 * Now that the first cpu already had this clocksource initialized,
+	 * we shouldn't fail.
+	 */
+	WARN_ON(kvm_register_clock());
+	/* ok, done with our trickery, call native */
+	setup_secondary_APIC_clock();
+}
+
+/*
+ * After the clock is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written. So before any
+ * kind of shutdown from our side, we unregister the clock by writting anything
+ * that does not have the 'enable' bit set in the msr
+ */
+#ifdef CONFIG_KEXEC
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+	native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+	native_machine_crash_shutdown(regs);
+}
+#endif
+
+static void kvm_shutdown(void)
+{
+	native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0);
+	native_machine_shutdown();
+}
+
+void __init kvmclock_init(void)
+{
+	if (!kvm_para_available())
+		return;
+
+	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
+		if (kvm_register_clock())
+			return;
+		pv_time_ops.get_wallclock = kvm_get_wallclock;
+		pv_time_ops.set_wallclock = kvm_set_wallclock;
+		pv_time_ops.sched_clock = kvm_clock_read;
+		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+		machine_ops.shutdown  = kvm_shutdown;
+#ifdef CONFIG_KEXEC
+		machine_ops.crash_shutdown  = kvm_crash_shutdown;
+#endif
+		clocksource_register(&kvm_clock);
+	}
+}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index b402c0f3f192..3cad17fe026b 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -63,7 +63,7 @@ static int __init mfgpt_fix(char *s)
 
 	/* The following udocumented bit resets the MFGPT timers */
 	val = 0xFF; dummy = 0;
-	wrmsr(0x5140002B, val, dummy);
+	wrmsr(MSR_MFGPT_SETUP, val, dummy);
 	return 1;
 }
 __setup("mfgptfix", mfgpt_fix);
@@ -127,17 +127,17 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
 		 * 6; that is, resets for 7 and 8 will be ignored.  Is this
 		 * a problem?   -dilinger
 		 */
-		msr = MFGPT_NR_MSR;
+		msr = MSR_MFGPT_NR;
 		mask = 1 << (timer + 24);
 		break;
 
 	case MFGPT_EVENT_NMI:
-		msr = MFGPT_NR_MSR;
+		msr = MSR_MFGPT_NR;
 		mask = 1 << (timer + shift);
 		break;
 
 	case MFGPT_EVENT_IRQ:
-		msr = MFGPT_IRQ_MSR;
+		msr = MSR_MFGPT_IRQ;
 		mask = 1 << (timer + shift);
 		break;
 
@@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void)
 	geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val);
 
 	/* Set up the clock event */
-	mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32);
+	mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC,
+				       mfgpt_clockevent.shift);
 	mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF,
 			&mfgpt_clockevent);
 	mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE,
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 70744e344fa1..3e2c54dc8b29 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -686,13 +686,11 @@ void __init get_smp_config(void)
 static int __init smp_scan_config(unsigned long base, unsigned long length,
 				  unsigned reserve)
 {
-	extern void __bad_mpf_size(void);
 	unsigned int *bp = phys_to_virt(base);
 	struct intel_mp_floating *mpf;
 
 	Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
-	if (sizeof(*mpf) != 16)
-		__bad_mpf_size();
+	BUILD_BUG_ON(sizeof(*mpf) != 16);
 
 	while (length > 0) {
 		mpf = (struct intel_mp_floating *)bp;
@@ -801,7 +799,6 @@ void __init find_smp_config(void)
 #ifdef	CONFIG_X86_IO_APIC
 
 #define MP_ISA_BUS		0
-#define MP_MAX_IOAPIC_PIN	127
 
 extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
 
@@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi)
 	return -1;
 }
 
-static u8 uniq_ioapic_id(u8 id)
+static u8 __init uniq_ioapic_id(u8 id)
 {
 #ifdef CONFIG_X86_32
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
@@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
 	intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;	/* APIC ID */
 	intsrc.mpc_dstirq = pin;	/* INTIN# */
 
-	Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n",
-		intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
-		(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
-		intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq);
-
-	mp_irqs[mp_irq_entries] = intsrc;
-	if (++mp_irq_entries == MAX_IRQ_SOURCES)
-		panic("Max # of irq sources exceeded!\n");
+	MP_intsrc_info(&intsrc);
 }
 
 int es7000_plat;
@@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void)
 		intsrc.mpc_srcbusirq = i;	/* Identity mapped */
 		intsrc.mpc_dstirq = i;
 
-		Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, "
-			"%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3,
-			(intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus,
-			intsrc.mpc_srcbusirq, intsrc.mpc_dstapic,
-			intsrc.mpc_dstirq);
-
-		mp_irqs[mp_irq_entries] = intsrc;
-		if (++mp_irq_entries == MAX_IRQ_SOURCES)
-			panic("Max # of irq sources exceeded!\n");
+		MP_intsrc_info(&intsrc);
 	}
 }
 
 int mp_register_gsi(u32 gsi, int triggering, int polarity)
 {
-	int ioapic = -1;
-	int ioapic_pin = 0;
-	int idx, bit = 0;
+	int ioapic;
+	int ioapic_pin;
 #ifdef CONFIG_X86_32
 #define MAX_GSI_NUM	4096
 #define IRQ_COMPRESSION_START	64
@@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 	 * with redundant pin->gsi mappings (but unique PCI devices);
 	 * we only program the IOAPIC on the first.
 	 */
-	bit = ioapic_pin % 32;
-	idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32);
-	if (idx > 3) {
+	if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
 		printk(KERN_ERR "Invalid reference to IOAPIC pin "
 		       "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
 		       ioapic_pin);
 		return gsi;
 	}
-	if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
+	if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
 		Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
 			mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
 #ifdef CONFIG_X86_32
@@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
 #endif
 	}
 
-	mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit);
+	set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
 #ifdef CONFIG_X86_32
 	/*
 	 * For GSI >= 64, use IRQ compression
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3733412d1357..74f0c5ea2a03 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.flush_tlb_single = native_flush_tlb_single,
 	.flush_tlb_others = native_flush_tlb_others,
 
-	.alloc_pt = paravirt_nop,
-	.alloc_pd = paravirt_nop,
-	.alloc_pd_clone = paravirt_nop,
-	.release_pt = paravirt_nop,
-	.release_pd = paravirt_nop,
+	.alloc_pte = paravirt_nop,
+	.alloc_pmd = paravirt_nop,
+	.alloc_pmd_clone = paravirt_nop,
+	.alloc_pud = paravirt_nop,
+	.release_pte = paravirt_nop,
+	.release_pmd = paravirt_nop,
+	.release_pud = paravirt_nop,
 
 	.set_pte = native_set_pte,
 	.set_pte_at = native_set_pte_at,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 2edee22e9c30..e28ec497e142 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -43,6 +43,7 @@
 #include <asm/system.h>
 #include <asm/dma.h>
 #include <asm/rio.h>
+#include <asm/bios_ebda.h>
 
 #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
 int use_calgary __read_mostly = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3004d716539d..67e9b4a1e89d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -4,6 +4,8 @@
 #include <linux/smp.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/pm.h>
 
 struct kmem_cache *task_xstate_cachep;
 
@@ -42,3 +44,118 @@ void arch_task_cache_init(void)
 				  __alignof__(union thread_xstate),
 				  SLAB_PANIC, NULL);
 }
+
+static void do_nothing(void *unused)
+{
+}
+
+/*
+ * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
+ * pm_idle and update to new pm_idle value. Required while changing pm_idle
+ * handler on SMP systems.
+ *
+ * Caller must have changed pm_idle to the new value before the call. Old
+ * pm_idle value will not be used by any CPU after the return of this function.
+ */
+void cpu_idle_wait(void)
+{
+	smp_mb();
+	/* kick all the CPUs so that they exit out of pm_idle */
+	smp_call_function(do_nothing, NULL, 0, 1);
+}
+EXPORT_SYMBOL_GPL(cpu_idle_wait);
+
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+	if (!need_resched()) {
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__mwait(ax, cx);
+	}
+}
+
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+	if (!need_resched()) {
+		__monitor((void *)&current_thread_info()->flags, 0, 0);
+		smp_mb();
+		if (!need_resched())
+			__sti_mwait(0, 0);
+		else
+			local_irq_enable();
+	} else
+		local_irq_enable();
+}
+
+
+static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
+{
+	if (force_mwait)
+		return 1;
+	/* Any C1 states supported? */
+	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
+}
+
+/*
+ * On SMP it's slightly faster (but much more power-consuming!)
+ * to poll the ->work.need_resched flag instead of waiting for the
+ * cross-CPU IPI to arrive. Use this option with caution.
+ */
+static void poll_idle(void)
+{
+	local_irq_enable();
+	cpu_relax();
+}
+
+void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+{
+	static int selected;
+
+	if (selected)
+		return;
+#ifdef CONFIG_X86_SMP
+	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+			" performance may degrade.\n");
+	}
+#endif
+	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
+		/*
+		 * Skip, if setup has overridden idle.
+		 * One CPU supports mwait => All CPUs supports mwait
+		 */
+		if (!pm_idle) {
+			printk(KERN_INFO "using mwait in idle threads.\n");
+			pm_idle = mwait_idle;
+		}
+	}
+	selected = 1;
+}
+
+static int __init idle_setup(char *str)
+{
+	if (!strcmp(str, "poll")) {
+		printk("using polling idle threads.\n");
+		pm_idle = poll_idle;
+	} else if (!strcmp(str, "mwait"))
+		force_mwait = 1;
+	else
+		return -1;
+
+	boot_option_idle_override = 1;
+	return 0;
+}
+early_param("idle", idle_setup);
+
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 77de848bd1fb..f8476dfbb60d 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -111,12 +111,10 @@ void default_idle(void)
 		 */
 		smp_mb();
 
-		local_irq_disable();
-		if (!need_resched()) {
+		if (!need_resched())
 			safe_halt();	/* enables interrupts racelessly */
-			local_irq_disable();
-		}
-		local_irq_enable();
+		else
+			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
 	} else {
 		local_irq_enable();
@@ -128,17 +126,6 @@ void default_idle(void)
 EXPORT_SYMBOL(default_idle);
 #endif
 
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->work.need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
-	local_irq_enable();
-	cpu_relax();
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 #include <asm/nmi.h>
 /* We don't actually take CPU down, just spin without interrupts. */
@@ -196,6 +183,7 @@ void cpu_idle(void)
 			if (cpu_is_offline(cpu))
 				play_dead();
 
+			local_irq_disable();
 			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
 			idle();
 		}
@@ -206,104 +194,6 @@ void cpu_idle(void)
 	}
 }
 
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
-	smp_mb();
-	/* kick all the CPUs so that they exit out of pm_idle */
-	smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__sti_mwait(ax, cx);
-		else
-			local_irq_enable();
-	} else
-		local_irq_enable();
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
-	local_irq_enable();
-	mwait_idle_with_hints(0, 0);
-}
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
-	if (force_mwait)
-		return 1;
-	/* Any C1 states supported? */
-	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
-	static int selected;
-
-	if (selected)
-		return;
-#ifdef CONFIG_X86_SMP
-	if (pm_idle == poll_idle && smp_num_siblings > 1) {
-		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
-			" performance may degrade.\n");
-	}
-#endif
-	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-		/*
-		 * Skip, if setup has overridden idle.
-		 * One CPU supports mwait => All CPUs supports mwait
-		 */
-		if (!pm_idle) {
-			printk(KERN_INFO "using mwait in idle threads.\n");
-			pm_idle = mwait_idle;
-		}
-	}
-	selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
-	if (!strcmp(str, "poll")) {
-		printk("using polling idle threads.\n");
-		pm_idle = poll_idle;
-	} else if (!strcmp(str, "mwait"))
-		force_mwait = 1;
-	else
-		return -1;
-
-	boot_option_idle_override = 1;
-	return 0;
-}
-early_param("idle", idle_setup);
-
 void __show_registers(struct pt_regs *regs, int all)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 131c2ee7ac56..e2319f39988b 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -106,26 +106,13 @@ void default_idle(void)
 	 * test NEED_RESCHED:
 	 */
 	smp_mb();
-	local_irq_disable();
-	if (!need_resched()) {
+	if (!need_resched())
 		safe_halt();	/* enables interrupts racelessly */
-		local_irq_disable();
-	}
-	local_irq_enable();
+	else
+		local_irq_enable();
 	current_thread_info()->status |= TS_POLLING;
 }
 
-/*
- * On SMP it's slightly faster (but much more power-consuming!)
- * to poll the ->need_resched flag instead of waiting for the
- * cross-CPU IPI to arrive. Use this option with caution.
- */
-static void poll_idle(void)
-{
-	local_irq_enable();
-	cpu_relax();
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 DECLARE_PER_CPU(int, cpu_state);
 
@@ -192,110 +179,6 @@ void cpu_idle(void)
 	}
 }
 
-static void do_nothing(void *unused)
-{
-}
-
-/*
- * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
- * pm_idle and update to new pm_idle value. Required while changing pm_idle
- * handler on SMP systems.
- *
- * Caller must have changed pm_idle to the new value before the call. Old
- * pm_idle value will not be used by any CPU after the return of this function.
- */
-void cpu_idle_wait(void)
-{
-	smp_mb();
-	/* kick all the CPUs so that they exit out of pm_idle */
-	smp_call_function(do_nothing, NULL, 0, 1);
-}
-EXPORT_SYMBOL_GPL(cpu_idle_wait);
-
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
-	if (!need_resched()) {
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__mwait(ax, cx);
-	}
-}
-
-/* Default MONITOR/MWAIT with no hints, used for default C1 state */
-static void mwait_idle(void)
-{
-	if (!need_resched()) {
-		__monitor((void *)&current_thread_info()->flags, 0, 0);
-		smp_mb();
-		if (!need_resched())
-			__sti_mwait(0, 0);
-		else
-			local_irq_enable();
-	} else {
-		local_irq_enable();
-	}
-}
-
-
-static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
-{
-	if (force_mwait)
-		return 1;
-	/* Any C1 states supported? */
-	return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0;
-}
-
-void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
-{
-	static int selected;
-
-	if (selected)
-		return;
-#ifdef CONFIG_X86_SMP
-	if (pm_idle == poll_idle && smp_num_siblings > 1) {
-		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
-			" performance may degrade.\n");
-	}
-#endif
-	if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
-		/*
-		 * Skip, if setup has overridden idle.
-		 * One CPU supports mwait => All CPUs supports mwait
-		 */
-		if (!pm_idle) {
-			printk(KERN_INFO "using mwait in idle threads.\n");
-			pm_idle = mwait_idle;
-		}
-	}
-	selected = 1;
-}
-
-static int __init idle_setup(char *str)
-{
-	if (!strcmp(str, "poll")) {
-		printk("using polling idle threads.\n");
-		pm_idle = poll_idle;
-	} else if (!strcmp(str, "mwait"))
-		force_mwait = 1;
-	else
-		return -1;
-
-	boot_option_idle_override = 1;
-	return 0;
-}
-early_param("idle", idle_setup);
-
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs * regs)
 {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 559c1b027417..fb03ef380f0e 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1207,97 +1207,16 @@ static int genregs32_set(struct task_struct *target,
 	return ret;
 }
 
-static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data)
+long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
+			compat_ulong_t caddr, compat_ulong_t cdata)
 {
-	siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t));
-	compat_siginfo_t __user *si32 = compat_ptr(data);
-	siginfo_t ssi;
-	int ret;
-
-	if (request == PTRACE_SETSIGINFO) {
-		memset(&ssi, 0, sizeof(siginfo_t));
-		ret = copy_siginfo_from_user32(&ssi, si32);
-		if (ret)
-			return ret;
-		if (copy_to_user(si, &ssi, sizeof(siginfo_t)))
-			return -EFAULT;
-	}
-	ret = sys_ptrace(request, pid, addr, (unsigned long)si);
-	if (ret)
-		return ret;
-	if (request == PTRACE_GETSIGINFO) {
-		if (copy_from_user(&ssi, si, sizeof(siginfo_t)))
-			return -EFAULT;
-		ret = copy_siginfo_to_user32(si32, &ssi);
-	}
-	return ret;
-}
-
-asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
-{
-	struct task_struct *child;
-	struct pt_regs *childregs;
+	unsigned long addr = caddr;
+	unsigned long data = cdata;
 	void __user *datap = compat_ptr(data);
 	int ret;
 	__u32 val;
 
 	switch (request) {
-	case PTRACE_TRACEME:
-	case PTRACE_ATTACH:
-	case PTRACE_KILL:
-	case PTRACE_CONT:
-	case PTRACE_SINGLESTEP:
-	case PTRACE_SINGLEBLOCK:
-	case PTRACE_DETACH:
-	case PTRACE_SYSCALL:
-	case PTRACE_OLDSETOPTIONS:
-	case PTRACE_SETOPTIONS:
-	case PTRACE_SET_THREAD_AREA:
-	case PTRACE_GET_THREAD_AREA:
-#ifdef X86_BTS
-	case PTRACE_BTS_CONFIG:
-	case PTRACE_BTS_STATUS:
-	case PTRACE_BTS_SIZE:
-	case PTRACE_BTS_GET:
-	case PTRACE_BTS_CLEAR:
-	case PTRACE_BTS_DRAIN:
-#endif
-		return sys_ptrace(request, pid, addr, data);
-
-	default:
-		return -EINVAL;
-
-	case PTRACE_PEEKTEXT:
-	case PTRACE_PEEKDATA:
-	case PTRACE_POKEDATA:
-	case PTRACE_POKETEXT:
-	case PTRACE_POKEUSR:
-	case PTRACE_PEEKUSR:
-	case PTRACE_GETREGS:
-	case PTRACE_SETREGS:
-	case PTRACE_SETFPREGS:
-	case PTRACE_GETFPREGS:
-	case PTRACE_SETFPXREGS:
-	case PTRACE_GETFPXREGS:
-	case PTRACE_GETEVENTMSG:
-		break;
-
-	case PTRACE_SETSIGINFO:
-	case PTRACE_GETSIGINFO:
-		return ptrace32_siginfo(request, pid, addr, data);
-	}
-
-	child = ptrace_get_task_struct(pid);
-	if (IS_ERR(child))
-		return PTR_ERR(child);
-
-	ret = ptrace_check_attach(child, request == PTRACE_KILL);
-	if (ret < 0)
-		goto out;
-
-	childregs = task_pt_regs(child);
-
-	switch (request) {
 	case PTRACE_PEEKUSR:
 		ret = getreg32(child, addr, &val);
 		if (ret == 0)
@@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data)
 					     sizeof(struct user32_fxsr_struct),
 					     datap);
 
+	case PTRACE_GET_THREAD_AREA:
+	case PTRACE_SET_THREAD_AREA:
+		return arch_ptrace(child, request, addr, data);
+
 	default:
 		return compat_ptrace_request(child, request, addr, data);
 	}
 
- out:
-	put_task_struct(child);
 	return ret;
 }
 
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 19c9386ac118..a4a838306b2c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -8,6 +8,7 @@
 #include <asm/apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
+#include <asm/pgtable.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 
@@ -15,7 +16,6 @@
 # include <linux/dmi.h>
 # include <linux/ctype.h>
 # include <linux/mc146818rtc.h>
-# include <asm/pgtable.h>
 #else
 # include <asm/iommu.h>
 #endif
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
 	/* Remap the kernel at virtual address zero, as well as offset zero
 	   from the kernel segment.  This assumes the kernel segment starts at
 	   virtual address PAGE_OFFSET. */
-	memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+	memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 		sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
 
 	/*
@@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void)
 	}
 }
 
-static void native_machine_shutdown(void)
+void native_machine_shutdown(void)
 {
 	/* Stop the cpus and apics */
 #ifdef CONFIG_SMP
@@ -470,7 +470,10 @@ struct machine_ops machine_ops = {
 	.shutdown = native_machine_shutdown,
 	.emergency_restart = native_machine_emergency_restart,
 	.restart = native_machine_restart,
-	.halt = native_machine_halt
+	.halt = native_machine_halt,
+#ifdef CONFIG_KEXEC
+	.crash_shutdown = native_machine_crash_shutdown,
+#endif
 };
 
 void machine_power_off(void)
@@ -498,3 +501,9 @@ void machine_halt(void)
 	machine_ops.halt();
 }
 
+#ifdef CONFIG_KEXEC
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+	machine_ops.crash_shutdown(regs);
+}
+#endif
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 455d3c80960b..2283422af794 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -47,6 +47,7 @@
 #include <linux/pfn.h>
 #include <linux/pci.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <video/edid.h>
 
@@ -389,7 +390,6 @@ unsigned long __init find_max_low_pfn(void)
 	return max_low_pfn;
 }
 
-#define BIOS_EBDA_SEGMENT 0x40E
 #define BIOS_LOWMEM_KILOBYTES 0x413
 
 /*
@@ -420,8 +420,7 @@ static void __init reserve_ebda_region(void)
 	lowmem <<= 10;
 
 	/* start of EBDA area */
-	ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT);
-	ebda_addr <<= 4;
+	ebda_addr = get_bios_ebda();
 
 	/* Fixup: bios puts an EBDA in the top 64K segment */
 	/* of conventional memory, but does not adjust lowmem. */
@@ -822,6 +821,10 @@ void __init setup_arch(char **cmdline_p)
 
 	max_low_pfn = setup_memory();
 
+#ifdef CONFIG_KVM_CLOCK
+	kvmclock_init();
+#endif
+
 #ifdef CONFIG_VMI
 	/*
 	 * Must be after max_low_pfn is determined, and before kernel
@@ -829,6 +832,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	vmi_init();
 #endif
+	kvm_guest_init();
 
 	/*
 	 * NOTE: before this point _nobody_ is allowed to allocate
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c2ec3dcb6b99..a94fb959a87a 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -42,6 +42,7 @@
 #include <linux/ctype.h>
 #include <linux/uaccess.h>
 #include <linux/init_ohci1394_dma.h>
+#include <linux/kvm_para.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -116,7 +117,7 @@ extern int root_mountflags;
 
 char __initdata command_line[COMMAND_LINE_SIZE];
 
-struct resource standard_io_resources[] = {
+static struct resource standard_io_resources[] = {
 	{ .name = "dma1", .start = 0x00, .end = 0x1f,
 		.flags = IORESOURCE_BUSY | IORESOURCE_IO },
 	{ .name = "pic1", .start = 0x20, .end = 0x21,
@@ -190,6 +191,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
 	bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
 	e820_register_active_regions(0, start_pfn, end_pfn);
 	free_bootmem_with_active_regions(0, end_pfn);
+	early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
 	reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
 #endif
@@ -264,6 +266,28 @@ void __attribute__((weak)) __init memory_setup(void)
        machine_specific_memory_setup();
 }
 
+static void __init parse_setup_data(void)
+{
+	struct setup_data *data;
+	unsigned long pa_data;
+
+	if (boot_params.hdr.version < 0x0209)
+		return;
+	pa_data = boot_params.hdr.setup_data;
+	while (pa_data) {
+		data = early_ioremap(pa_data, PAGE_SIZE);
+		switch (data->type) {
+		default:
+			break;
+		}
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+		free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+		pa_data = data->next;
+		early_iounmap(data, PAGE_SIZE);
+	}
+}
+
 /*
  * setup_arch - architecture-specific boot-time initializations
  *
@@ -316,6 +340,8 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+	parse_setup_data();
+
 	parse_early_param();
 
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
@@ -359,6 +385,10 @@ void __init setup_arch(char **cmdline_p)
 
 	io_delay_init();
 
+#ifdef CONFIG_KVM_CLOCK
+	kvmclock_init();
+#endif
+
 #ifdef CONFIG_SMP
 	/* setup to use the early static init tables during kernel startup */
 	x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
@@ -397,8 +427,6 @@ void __init setup_arch(char **cmdline_p)
 	contig_initmem_init(0, end_pfn);
 #endif
 
-	early_res_to_bootmem();
-
 	dma32_reserve_bootmem();
 
 #ifdef CONFIG_ACPI_SLEEP
@@ -465,6 +493,8 @@ void __init setup_arch(char **cmdline_p)
 	init_apic_mappings();
 	ioapic_init_mappings();
 
+	kvm_guest_init();
+
 	/*
 	 * We trust e820 completely. No explicit ROM probing in memory.
 	 */
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index f1b117930837..8e05e7f7bd40 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -413,16 +413,6 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
 	regs->ss = __USER_DS;
 	regs->cs = __USER_CS;
 
-	/*
-	 * Clear TF when entering the signal handler, but
-	 * notify any tracer that was single-stepping it.
-	 * The tracer may want to single-step inside the
-	 * handler too.
-	 */
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 0;
 
 give_sigsegv:
@@ -501,16 +491,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->ss = __USER_DS;
 	regs->cs = __USER_CS;
 
-	/*
-	 * Clear TF when entering the signal handler, but
-	 * notify any tracer that was single-stepping it.
-	 * The tracer may want to single-step inside the
-	 * handler too.
-	 */
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 0;
 
 give_sigsegv:
@@ -566,6 +546,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (ret)
 		return ret;
 
+	/*
+	 * Clear the direction flag as per the ABI for function entry.
+	 */
+	regs->flags &= ~X86_EFLAGS_DF;
+
+	/*
+	 * Clear TF when entering the signal handler, but
+	 * notify any tracer that was single-stepping it.
+	 * The tracer may want to single-step inside the
+	 * handler too.
+	 */
+	regs->flags &= ~X86_EFLAGS_TF;
+	if (test_thread_flag(TIF_SINGLESTEP))
+		ptrace_notify(SIGTRAP);
+
 	spin_lock_irq(&current->sighand->siglock);
 	sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 827179c5b32a..ccb2a4560c2d 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -285,14 +285,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	   even if the handler happens to be interrupting 32-bit code. */
 	regs->cs = __USER_CS;
 
-	/* This, by contrast, has nothing to do with segment registers -
-	   see include/asm-x86_64/uaccess.h for details. */
-	set_fs(USER_DS);
-
-	regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF);
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 0;
 
 give_sigsegv:
@@ -380,6 +372,28 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	ret = setup_rt_frame(sig, ka, info, oldset, regs);
 
 	if (ret == 0) {
+		/*
+		 * This has nothing to do with segment registers,
+		 * despite the name.  This magic affects uaccess.h
+		 * macros' behavior.  Reset it to the normal setting.
+		 */
+		set_fs(USER_DS);
+
+		/*
+		 * Clear the direction flag as per the ABI for function entry.
+		 */
+		regs->flags &= ~X86_EFLAGS_DF;
+
+		/*
+		 * Clear TF when entering the signal handler, but
+		 * notify any tracer that was single-stepping it.
+		 * The tracer may want to single-step inside the
+		 * handler too.
+		 */
+		regs->flags &= ~X86_EFLAGS_TF;
+		if (test_thread_flag(TIF_SINGLESTEP))
+			ptrace_notify(SIGTRAP);
+
 		spin_lock_irq(&current->sighand->siglock);
 		sigorsets(&current->blocked,&current->blocked,&ka->sa.sa_mask);
 		if (!(ka->sa.sa_flags & SA_NODEFER))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ade371f9663a..04c662ba18f1 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 
 #ifdef CONFIG_X86_32
 	/* init low mem mapping */
-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 	flush_tlb_all();
 #endif
 
@@ -1058,7 +1058,7 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 	check_tsc_sync_source(cpu);
 	local_irq_restore(flags);
 
-	while (!cpu_isset(cpu, cpu_online_map)) {
+	while (!cpu_online(cpu)) {
 		cpu_relax();
 		touch_nmi_watchdog();
 	}
@@ -1168,7 +1168,7 @@ static void __init smp_cpu_index_default(void)
 	int i;
 	struct cpuinfo_x86 *c;
 
-	for_each_cpu_mask(i, cpu_possible_map) {
+	for_each_possible_cpu(i) {
 		c = &cpu_data(i);
 		/* mark all to hotplug */
 		c->cpu_index = NR_CPUS;
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 6878a9c2df5d..ae751094eba9 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <asm/io.h>
+#include <asm/bios_ebda.h>
 #include <asm/mach-summit/mach_mpparse.h>
 
 static struct rio_table_hdr *rio_table_hdr __initdata;
@@ -140,8 +141,8 @@ void __init setup_summit(void)
 	int			i, next_wpeg, next_bus = 0;
 
 	/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
-	ptr = *(unsigned short *)phys_to_virt(0x40Eul);
-	ptr = (unsigned long)phys_to_virt(ptr << 4);
+	ptr = get_bios_ebda();
+	ptr = (unsigned long)phys_to_virt(ptr);
 
 	rio_table_hdr = NULL;
 	offset = 0x180;
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index df224a8774cb..a1f07d793202 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -195,9 +195,9 @@ static int __cpuinit init_smp_flush(void)
 {
 	int i;
 
-	for_each_cpu_mask(i, cpu_possible_map) {
+	for_each_possible_cpu(i)
 		spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
-	}
+
 	return 0;
 }
 core_initcall(init_smp_flush);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 64580679861e..d8ccc3c6552f 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -33,7 +33,7 @@
 
 /* We can free up trampoline after bootup if cpu hotplug is not supported. */
 #ifndef CONFIG_HOTPLUG_CPU
-.section ".init.data","aw",@progbits
+.section ".cpuinit.data","aw",@progbits
 #else
 .section .rodata,"a",@progbits
 #endif
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 471e694d6713..bde6f63e15d5 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -602,7 +602,7 @@ DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
 DO_ERROR(11, SIGBUS,  "segment not present", segment_not_present)
 DO_ERROR(12, SIGBUS,  "stack segment", stack_segment)
 DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
-DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
+DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 
 void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
 {
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 12affe1f9bce..956f38927aa7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
 	 * pdes need to be zeroed.
 	 */
 	if (type & VMI_PAGE_CLONE)
-		limit = USER_PTRS_PER_PGD;
+		limit = KERNEL_PGD_BOUNDARY;
 	for (i = 0; i < limit; i++)
 		BUG_ON(ptr[i]);
 }
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
 }
 #endif
 
-static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
 {
 	vmi_set_page_type(pfn, VMI_PAGE_L1);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
-static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
 {
  	/*
 	 * This call comes in very early, before mem_map is setup.
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
 }
 
-static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
 {
  	vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
 	vmi_check_page_type(clonepfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
 }
 
-static void vmi_release_pt(u32 pfn)
+static void vmi_release_pte(u32 pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L1);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
-static void vmi_release_pd(u32 pfn)
+static void vmi_release_pmd(u32 pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
 
 	vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
 	if (vmi_ops.allocate_page) {
-		pv_mmu_ops.alloc_pt = vmi_allocate_pt;
-		pv_mmu_ops.alloc_pd = vmi_allocate_pd;
-		pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+		pv_mmu_ops.alloc_pte = vmi_allocate_pte;
+		pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
+		pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
 	}
 
 	vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
 	if (vmi_ops.release_page) {
-		pv_mmu_ops.release_pt = vmi_release_pt;
-		pv_mmu_ops.release_pd = vmi_release_pd;
+		pv_mmu_ops.release_pte = vmi_release_pte;
+		pv_mmu_ops.release_pmd = vmi_release_pmd;
 	}
 
 	/* Set linear is needed in all cases */
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index b7ab3c335fae..fad3674b06a5 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -209,12 +209,6 @@ SECTIONS
 	EXIT_DATA
   }
 
-/* vdso blob that is mapped into user space */
-  vdso_start = . ;
-  .vdso  : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
-  . = ALIGN(PAGE_SIZE);
-  vdso_end = .;
-
 #ifdef CONFIG_BLK_DEV_INITRD
   . = ALIGN(PAGE_SIZE);
   __initramfs_start = .;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 41962e793c0f..8d45fabc5f3b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -19,7 +19,7 @@ if VIRTUALIZATION
 
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on HAVE_KVM && EXPERIMENTAL
+	depends on HAVE_KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	---help---
@@ -50,6 +50,17 @@ config KVM_AMD
 	  Provides support for KVM on AMD processors equipped with the AMD-V
 	  (SVM) extensions.
 
+config KVM_TRACE
+	bool "KVM trace support"
+	depends on KVM && MARKERS && SYSFS
+	select RELAY
+	select DEBUG_FS
+	default n
+	---help---
+	  This option allows reading a trace of kvm-related events through
+	  relayfs.  Note the ABI is not considered stable and will be
+	  modified in future updates.
+
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
 source drivers/lguest/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ffdd0b310784..c97d35c218db 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,10 +3,14 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o)
+ifeq ($(CONFIG_KVM_TRACE),y)
+common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
+endif
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
-kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o
+kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
+	i8254.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
new file mode 100644
index 000000000000..361e31611276
--- /dev/null
+++ b/arch/x86/kvm/i8254.c
@@ -0,0 +1,611 @@
+/*
+ * 8253/8254 interval timer emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2006 Intel Corporation
+ * Copyright (c) 2007 Keir Fraser, XenSource Inc
+ * Copyright (c) 2008 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Authors:
+ *   Sheng Yang <sheng.yang@intel.com>
+ *   Based on QEMU and Xen.
+ */
+
+#include <linux/kvm_host.h>
+
+#include "irq.h"
+#include "i8254.h"
+
+#ifndef CONFIG_X86_64
+#define mod_64(x, y) ((x) - (y) * div64_64(x, y))
+#else
+#define mod_64(x, y) ((x) % (y))
+#endif
+
+#define RW_STATE_LSB 1
+#define RW_STATE_MSB 2
+#define RW_STATE_WORD0 3
+#define RW_STATE_WORD1 4
+
+/* Compute with 96 bit intermediate result: (a*b)/c */
+static u64 muldiv64(u64 a, u32 b, u32 c)
+{
+	union {
+		u64 ll;
+		struct {
+			u32 low, high;
+		} l;
+	} u, res;
+	u64 rl, rh;
+
+	u.ll = a;
+	rl = (u64)u.l.low * (u64)b;
+	rh = (u64)u.l.high * (u64)b;
+	rh += (rl >> 32);
+	res.l.high = div64_64(rh, c);
+	res.l.low = div64_64(((mod_64(rh, c) << 32) + (rl & 0xffffffff)), c);
+	return res.ll;
+}
+
+static void pit_set_gate(struct kvm *kvm, int channel, u32 val)
+{
+	struct kvm_kpit_channel_state *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+
+	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	switch (c->mode) {
+	default:
+	case 0:
+	case 4:
+		/* XXX: just disable/enable counting */
+		break;
+	case 1:
+	case 2:
+	case 3:
+	case 5:
+		/* Restart counting on rising edge. */
+		if (c->gate < val)
+			c->count_load_time = ktime_get();
+		break;
+	}
+
+	c->gate = val;
+}
+
+int pit_get_gate(struct kvm *kvm, int channel)
+{
+	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	return kvm->arch.vpit->pit_state.channels[channel].gate;
+}
+
+static int pit_get_count(struct kvm *kvm, int channel)
+{
+	struct kvm_kpit_channel_state *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+	s64 d, t;
+	int counter;
+
+	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+	d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+	switch (c->mode) {
+	case 0:
+	case 1:
+	case 4:
+	case 5:
+		counter = (c->count - d) & 0xffff;
+		break;
+	case 3:
+		/* XXX: may be incorrect for odd counts */
+		counter = c->count - (mod_64((2 * d), c->count));
+		break;
+	default:
+		counter = c->count - mod_64(d, c->count);
+		break;
+	}
+	return counter;
+}
+
+static int pit_get_out(struct kvm *kvm, int channel)
+{
+	struct kvm_kpit_channel_state *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+	s64 d, t;
+	int out;
+
+	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
+	d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
+
+	switch (c->mode) {
+	default:
+	case 0:
+		out = (d >= c->count);
+		break;
+	case 1:
+		out = (d < c->count);
+		break;
+	case 2:
+		out = ((mod_64(d, c->count) == 0) && (d != 0));
+		break;
+	case 3:
+		out = (mod_64(d, c->count) < ((c->count + 1) >> 1));
+		break;
+	case 4:
+	case 5:
+		out = (d == c->count);
+		break;
+	}
+
+	return out;
+}
+
+static void pit_latch_count(struct kvm *kvm, int channel)
+{
+	struct kvm_kpit_channel_state *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+
+	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	if (!c->count_latched) {
+		c->latched_count = pit_get_count(kvm, channel);
+		c->count_latched = c->rw_mode;
+	}
+}
+
+static void pit_latch_status(struct kvm *kvm, int channel)
+{
+	struct kvm_kpit_channel_state *c =
+		&kvm->arch.vpit->pit_state.channels[channel];
+
+	WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
+
+	if (!c->status_latched) {
+		/* TODO: Return NULL COUNT (bit 6). */
+		c->status = ((pit_get_out(kvm, channel) << 7) |
+				(c->rw_mode << 4) |
+				(c->mode << 1) |
+				c->bcd);
+		c->status_latched = 1;
+	}
+}
+
+int __pit_timer_fn(struct kvm_kpit_state *ps)
+{
+	struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
+	struct kvm_kpit_timer *pt = &ps->pit_timer;
+
+	atomic_inc(&pt->pending);
+	smp_mb__after_atomic_inc();
+	/* FIXME: handle case where the guest is in guest mode */
+	if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
+		vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+		wake_up_interruptible(&vcpu0->wq);
+	}
+
+	pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
+	pt->scheduled = ktime_to_ns(pt->timer.expires);
+
+	return (pt->period == 0 ? 0 : 1);
+}
+
+int pit_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+
+	if (pit && vcpu->vcpu_id == 0)
+		return atomic_read(&pit->pit_state.pit_timer.pending);
+
+	return 0;
+}
+
+static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
+{
+	struct kvm_kpit_state *ps;
+	int restart_timer = 0;
+
+	ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
+
+	restart_timer = __pit_timer_fn(ps);
+
+	if (restart_timer)
+		return HRTIMER_RESTART;
+	else
+		return HRTIMER_NORESTART;
+}
+
+static void destroy_pit_timer(struct kvm_kpit_timer *pt)
+{
+	pr_debug("pit: execute del timer!\n");
+	hrtimer_cancel(&pt->timer);
+}
+
+static void create_pit_timer(struct kvm_kpit_timer *pt, u32 val, int is_period)
+{
+	s64 interval;
+
+	interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
+
+	pr_debug("pit: create pit timer, interval is %llu nsec\n", interval);
+
+	/* TODO The new value only affected after the retriggered */
+	hrtimer_cancel(&pt->timer);
+	pt->period = (is_period == 0) ? 0 : interval;
+	pt->timer.function = pit_timer_fn;
+	atomic_set(&pt->pending, 0);
+
+	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+		      HRTIMER_MODE_ABS);
+}
+
+static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
+
+	WARN_ON(!mutex_is_locked(&ps->lock));
+
+	pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
+
+	/*
+	 * Though spec said the state of 8254 is undefined after power-up,
+	 * seems some tricky OS like Windows XP depends on IRQ0 interrupt
+	 * when booting up.
+	 * So here setting initialize rate for it, and not a specific number
+	 */
+	if (val == 0)
+		val = 0x10000;
+
+	ps->channels[channel].count_load_time = ktime_get();
+	ps->channels[channel].count = val;
+
+	if (channel != 0)
+		return;
+
+	/* Two types of timer
+	 * mode 1 is one shot, mode 2 is period, otherwise del timer */
+	switch (ps->channels[0].mode) {
+	case 1:
+		create_pit_timer(&ps->pit_timer, val, 0);
+		break;
+	case 2:
+		create_pit_timer(&ps->pit_timer, val, 1);
+		break;
+	default:
+		destroy_pit_timer(&ps->pit_timer);
+	}
+}
+
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val)
+{
+	mutex_lock(&kvm->arch.vpit->pit_state.lock);
+	pit_load_count(kvm, channel, val);
+	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+}
+
+static void pit_ioport_write(struct kvm_io_device *this,
+			     gpa_t addr, int len, const void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct kvm_kpit_state *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	int channel, access;
+	struct kvm_kpit_channel_state *s;
+	u32 val = *(u32 *) data;
+
+	val  &= 0xff;
+	addr &= KVM_PIT_CHANNEL_MASK;
+
+	mutex_lock(&pit_state->lock);
+
+	if (val != 0)
+		pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n",
+			  (unsigned int)addr, len, val);
+
+	if (addr == 3) {
+		channel = val >> 6;
+		if (channel == 3) {
+			/* Read-Back Command. */
+			for (channel = 0; channel < 3; channel++) {
+				s = &pit_state->channels[channel];
+				if (val & (2 << channel)) {
+					if (!(val & 0x20))
+						pit_latch_count(kvm, channel);
+					if (!(val & 0x10))
+						pit_latch_status(kvm, channel);
+				}
+			}
+		} else {
+			/* Select Counter <channel>. */
+			s = &pit_state->channels[channel];
+			access = (val >> 4) & KVM_PIT_CHANNEL_MASK;
+			if (access == 0) {
+				pit_latch_count(kvm, channel);
+			} else {
+				s->rw_mode = access;
+				s->read_state = access;
+				s->write_state = access;
+				s->mode = (val >> 1) & 7;
+				if (s->mode > 5)
+					s->mode -= 4;
+				s->bcd = val & 1;
+			}
+		}
+	} else {
+		/* Write Count. */
+		s = &pit_state->channels[addr];
+		switch (s->write_state) {
+		default:
+		case RW_STATE_LSB:
+			pit_load_count(kvm, addr, val);
+			break;
+		case RW_STATE_MSB:
+			pit_load_count(kvm, addr, val << 8);
+			break;
+		case RW_STATE_WORD0:
+			s->write_latch = val;
+			s->write_state = RW_STATE_WORD1;
+			break;
+		case RW_STATE_WORD1:
+			pit_load_count(kvm, addr, s->write_latch | (val << 8));
+			s->write_state = RW_STATE_WORD0;
+			break;
+		}
+	}
+
+	mutex_unlock(&pit_state->lock);
+}
+
+static void pit_ioport_read(struct kvm_io_device *this,
+			    gpa_t addr, int len, void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct kvm_kpit_state *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	int ret, count;
+	struct kvm_kpit_channel_state *s;
+
+	addr &= KVM_PIT_CHANNEL_MASK;
+	s = &pit_state->channels[addr];
+
+	mutex_lock(&pit_state->lock);
+
+	if (s->status_latched) {
+		s->status_latched = 0;
+		ret = s->status;
+	} else if (s->count_latched) {
+		switch (s->count_latched) {
+		default:
+		case RW_STATE_LSB:
+			ret = s->latched_count & 0xff;
+			s->count_latched = 0;
+			break;
+		case RW_STATE_MSB:
+			ret = s->latched_count >> 8;
+			s->count_latched = 0;
+			break;
+		case RW_STATE_WORD0:
+			ret = s->latched_count & 0xff;
+			s->count_latched = RW_STATE_MSB;
+			break;
+		}
+	} else {
+		switch (s->read_state) {
+		default:
+		case RW_STATE_LSB:
+			count = pit_get_count(kvm, addr);
+			ret = count & 0xff;
+			break;
+		case RW_STATE_MSB:
+			count = pit_get_count(kvm, addr);
+			ret = (count >> 8) & 0xff;
+			break;
+		case RW_STATE_WORD0:
+			count = pit_get_count(kvm, addr);
+			ret = count & 0xff;
+			s->read_state = RW_STATE_WORD1;
+			break;
+		case RW_STATE_WORD1:
+			count = pit_get_count(kvm, addr);
+			ret = (count >> 8) & 0xff;
+			s->read_state = RW_STATE_WORD0;
+			break;
+		}
+	}
+
+	if (len > sizeof(ret))
+		len = sizeof(ret);
+	memcpy(data, (char *)&ret, len);
+
+	mutex_unlock(&pit_state->lock);
+}
+
+static int pit_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+	return ((addr >= KVM_PIT_BASE_ADDRESS) &&
+		(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
+}
+
+static void speaker_ioport_write(struct kvm_io_device *this,
+				 gpa_t addr, int len, const void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct kvm_kpit_state *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	u32 val = *(u32 *) data;
+
+	mutex_lock(&pit_state->lock);
+	pit_state->speaker_data_on = (val >> 1) & 1;
+	pit_set_gate(kvm, 2, val & 1);
+	mutex_unlock(&pit_state->lock);
+}
+
+static void speaker_ioport_read(struct kvm_io_device *this,
+				gpa_t addr, int len, void *data)
+{
+	struct kvm_pit *pit = (struct kvm_pit *)this->private;
+	struct kvm_kpit_state *pit_state = &pit->pit_state;
+	struct kvm *kvm = pit->kvm;
+	unsigned int refresh_clock;
+	int ret;
+
+	/* Refresh clock toggles at about 15us. We approximate as 2^14ns. */
+	refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1;
+
+	mutex_lock(&pit_state->lock);
+	ret = ((pit_state->speaker_data_on << 1) | pit_get_gate(kvm, 2) |
+		(pit_get_out(kvm, 2) << 5) | (refresh_clock << 4));
+	if (len > sizeof(ret))
+		len = sizeof(ret);
+	memcpy(data, (char *)&ret, len);
+	mutex_unlock(&pit_state->lock);
+}
+
+static int speaker_in_range(struct kvm_io_device *this, gpa_t addr)
+{
+	return (addr == KVM_SPEAKER_BASE_ADDRESS);
+}
+
+void kvm_pit_reset(struct kvm_pit *pit)
+{
+	int i;
+	struct kvm_kpit_channel_state *c;
+
+	mutex_lock(&pit->pit_state.lock);
+	for (i = 0; i < 3; i++) {
+		c = &pit->pit_state.channels[i];
+		c->mode = 0xff;
+		c->gate = (i != 2);
+		pit_load_count(pit->kvm, i, 0);
+	}
+	mutex_unlock(&pit->pit_state.lock);
+
+	atomic_set(&pit->pit_state.pit_timer.pending, 0);
+	pit->pit_state.inject_pending = 1;
+}
+
+struct kvm_pit *kvm_create_pit(struct kvm *kvm)
+{
+	struct kvm_pit *pit;
+	struct kvm_kpit_state *pit_state;
+
+	pit = kzalloc(sizeof(struct kvm_pit), GFP_KERNEL);
+	if (!pit)
+		return NULL;
+
+	mutex_init(&pit->pit_state.lock);
+	mutex_lock(&pit->pit_state.lock);
+
+	/* Initialize PIO device */
+	pit->dev.read = pit_ioport_read;
+	pit->dev.write = pit_ioport_write;
+	pit->dev.in_range = pit_in_range;
+	pit->dev.private = pit;
+	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
+
+	pit->speaker_dev.read = speaker_ioport_read;
+	pit->speaker_dev.write = speaker_ioport_write;
+	pit->speaker_dev.in_range = speaker_in_range;
+	pit->speaker_dev.private = pit;
+	kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev);
+
+	kvm->arch.vpit = pit;
+	pit->kvm = kvm;
+
+	pit_state = &pit->pit_state;
+	pit_state->pit = pit;
+	hrtimer_init(&pit_state->pit_timer.timer,
+		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	mutex_unlock(&pit->pit_state.lock);
+
+	kvm_pit_reset(pit);
+
+	return pit;
+}
+
+void kvm_free_pit(struct kvm *kvm)
+{
+	struct hrtimer *timer;
+
+	if (kvm->arch.vpit) {
+		mutex_lock(&kvm->arch.vpit->pit_state.lock);
+		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+		hrtimer_cancel(timer);
+		mutex_unlock(&kvm->arch.vpit->pit_state.lock);
+		kfree(kvm->arch.vpit);
+	}
+}
+
+void __inject_pit_timer_intr(struct kvm *kvm)
+{
+	mutex_lock(&kvm->lock);
+	kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1);
+	kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 0);
+	kvm_pic_set_irq(pic_irqchip(kvm), 0, 1);
+	kvm_pic_set_irq(pic_irqchip(kvm), 0, 0);
+	mutex_unlock(&kvm->lock);
+}
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
+{
+	struct kvm_pit *pit = vcpu->kvm->arch.vpit;
+	struct kvm *kvm = vcpu->kvm;
+	struct kvm_kpit_state *ps;
+
+	if (vcpu && pit) {
+		ps = &pit->pit_state;
+
+		/* Try to inject pending interrupts when:
+		 * 1. Pending exists
+		 * 2. Last interrupt was accepted or waited for too long time*/
+		if (atomic_read(&ps->pit_timer.pending) &&
+		    (ps->inject_pending ||
+		    (jiffies - ps->last_injected_time
+				>= KVM_MAX_PIT_INTR_INTERVAL))) {
+			ps->inject_pending = 0;
+			__inject_pit_timer_intr(kvm);
+			ps->last_injected_time = jiffies;
+		}
+	}
+}
+
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
+{
+	struct kvm_arch *arch = &vcpu->kvm->arch;
+	struct kvm_kpit_state *ps;
+
+	if (vcpu && arch->vpit) {
+		ps = &arch->vpit->pit_state;
+		if (atomic_read(&ps->pit_timer.pending) &&
+		(((arch->vpic->pics[0].imr & 1) == 0 &&
+		  arch->vpic->pics[0].irq_base == vec) ||
+		  (arch->vioapic->redirtbl[0].fields.vector == vec &&
+		  arch->vioapic->redirtbl[0].fields.mask != 1))) {
+			ps->inject_pending = 1;
+			atomic_dec(&ps->pit_timer.pending);
+			ps->channels[0].count_load_time = ktime_get();
+		}
+	}
+}
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
new file mode 100644
index 000000000000..db25c2a6c8c4
--- /dev/null
+++ b/arch/x86/kvm/i8254.h
@@ -0,0 +1,63 @@
+#ifndef __I8254_H
+#define __I8254_H
+
+#include "iodev.h"
+
+struct kvm_kpit_timer {
+	struct hrtimer timer;
+	int irq;
+	s64 period; /* unit: ns */
+	s64 scheduled;
+	ktime_t last_update;
+	atomic_t pending;
+};
+
+struct kvm_kpit_channel_state {
+	u32 count; /* can be 65536 */
+	u16 latched_count;
+	u8 count_latched;
+	u8 status_latched;
+	u8 status;
+	u8 read_state;
+	u8 write_state;
+	u8 write_latch;
+	u8 rw_mode;
+	u8 mode;
+	u8 bcd; /* not supported */
+	u8 gate; /* timer start */
+	ktime_t count_load_time;
+};
+
+struct kvm_kpit_state {
+	struct kvm_kpit_channel_state channels[3];
+	struct kvm_kpit_timer pit_timer;
+	u32    speaker_data_on;
+	struct mutex lock;
+	struct kvm_pit *pit;
+	bool inject_pending; /* if inject pending interrupts */
+	unsigned long last_injected_time;
+};
+
+struct kvm_pit {
+	unsigned long base_addresss;
+	struct kvm_io_device dev;
+	struct kvm_io_device speaker_dev;
+	struct kvm *kvm;
+	struct kvm_kpit_state pit_state;
+};
+
+#define KVM_PIT_BASE_ADDRESS	    0x40
+#define KVM_SPEAKER_BASE_ADDRESS    0x61
+#define KVM_PIT_MEM_LENGTH	    4
+#define KVM_PIT_FREQ		    1193181
+#define KVM_MAX_PIT_INTR_INTERVAL   HZ / 100
+#define KVM_PIT_CHANNEL_MASK	    0x3
+
+void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_pit_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
+void kvm_pit_load_count(struct kvm *kvm, int channel, u32 val);
+struct kvm_pit *kvm_create_pit(struct kvm *kvm);
+void kvm_free_pit(struct kvm *kvm);
+void kvm_pit_reset(struct kvm_pit *pit);
+
+#endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index e5714759e97f..ce1f583459b1 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -23,6 +23,22 @@
 #include <linux/kvm_host.h>
 
 #include "irq.h"
+#include "i8254.h"
+
+/*
+ * check if there are pending timer events
+ * to be processed.
+ */
+int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	int ret;
+
+	ret = pit_has_pending_timer(vcpu);
+	ret |= apic_has_pending_timer(vcpu);
+
+	return ret;
+}
+EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
  * check if there is pending interrupt without
@@ -66,6 +82,7 @@ EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	kvm_inject_apic_timer_irqs(vcpu);
+	kvm_inject_pit_timer_irqs(vcpu);
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
@@ -73,6 +90,7 @@ EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
 {
 	kvm_apic_timer_intr_post(vcpu, vec);
+	kvm_pit_timer_intr_post(vcpu, vec);
 	/* TODO: PIT, RTC etc. */
 }
 EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index fa5ed5d59b5d..1802134b836f 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -85,4 +85,7 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 
+int pit_has_pending_timer(struct kvm_vcpu *vcpu);
+int apic_has_pending_timer(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h
index ecdfe97e4635..65ef0fc2c036 100644
--- a/arch/x86/kvm/kvm_svm.h
+++ b/arch/x86/kvm/kvm_svm.h
@@ -39,6 +39,8 @@ struct vcpu_svm {
 	unsigned long host_db_regs[NUM_DB_REGS];
 	unsigned long host_dr6;
 	unsigned long host_dr7;
+
+	u32 *msrpm;
 };
 
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 68a6b1511934..57ac4e4c556a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -338,10 +338,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		} else
 			apic_clear_vector(vector, apic->regs + APIC_TMR);
 
-		if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
+		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 			kvm_vcpu_kick(vcpu);
-		else if (vcpu->arch.mp_state == VCPU_MP_STATE_HALTED) {
-			vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+		else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
+			vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 			if (waitqueue_active(&vcpu->wq))
 				wake_up_interruptible(&vcpu->wq);
 		}
@@ -362,11 +362,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 
 	case APIC_DM_INIT:
 		if (level) {
-			if (vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE)
+			if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
 				printk(KERN_DEBUG
 				       "INIT on a runnable vcpu %d\n",
 				       vcpu->vcpu_id);
-			vcpu->arch.mp_state = VCPU_MP_STATE_INIT_RECEIVED;
+			vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
 			kvm_vcpu_kick(vcpu);
 		} else {
 			printk(KERN_DEBUG
@@ -379,9 +379,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 	case APIC_DM_STARTUP:
 		printk(KERN_DEBUG "SIPI to vcpu %d vector 0x%02x\n",
 		       vcpu->vcpu_id, vector);
-		if (vcpu->arch.mp_state == VCPU_MP_STATE_INIT_RECEIVED) {
+		if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
 			vcpu->arch.sipi_vector = vector;
-			vcpu->arch.mp_state = VCPU_MP_STATE_SIPI_RECEIVED;
+			vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
 			if (waitqueue_active(&vcpu->wq))
 				wake_up_interruptible(&vcpu->wq);
 		}
@@ -658,7 +658,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
 			   PRIx64 ", "
 			   "timer initial count 0x%x, period %lldns, "
-			   "expire @ 0x%016" PRIx64 ".\n", __FUNCTION__,
+			   "expire @ 0x%016" PRIx64 ".\n", __func__,
 			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
 			   apic_get_reg(apic, APIC_TMICT),
 			   apic->timer.period,
@@ -691,7 +691,7 @@ static void apic_mmio_write(struct kvm_io_device *this,
 	/* too common printing */
 	if (offset != APIC_EOI)
 		apic_debug("%s: offset 0x%x with length 0x%x, and value is "
-			   "0x%x\n", __FUNCTION__, offset, len, val);
+			   "0x%x\n", __func__, offset, len, val);
 
 	offset &= 0xff0;
 
@@ -822,6 +822,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
 		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
 }
+EXPORT_SYMBOL_GPL(kvm_lapic_set_tpr);
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
@@ -869,7 +870,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic;
 	int i;
 
-	apic_debug("%s\n", __FUNCTION__);
+	apic_debug("%s\n", __func__);
 
 	ASSERT(vcpu);
 	apic = vcpu->arch.apic;
@@ -907,7 +908,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	apic_update_ppr(apic);
 
 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
-		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
+		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
 		   vcpu, kvm_apic_id(apic),
 		   vcpu->arch.apic_base, apic->base_address);
 }
@@ -940,7 +941,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 
 	atomic_inc(&apic->timer.pending);
 	if (waitqueue_active(q)) {
-		apic->vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+		apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 		wake_up_interruptible(q);
 	}
 	if (apic_lvtt_period(apic)) {
@@ -952,6 +953,16 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 	return result;
 }
 
+int apic_has_pending_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *lapic = vcpu->arch.apic;
+
+	if (lapic)
+		return atomic_read(&lapic->timer.pending);
+
+	return 0;
+}
+
 static int __inject_apic_timer_irq(struct kvm_lapic *apic)
 {
 	int vector;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e55af12e11b7..2ad6f5481671 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -27,11 +27,22 @@
 #include <linux/highmem.h>
 #include <linux/module.h>
 #include <linux/swap.h>
+#include <linux/hugetlb.h>
+#include <linux/compiler.h>
 
 #include <asm/page.h>
 #include <asm/cmpxchg.h>
 #include <asm/io.h>
 
+/*
+ * When setting this variable to true it enables Two-Dimensional-Paging
+ * where the hardware walks 2 page tables:
+ * 1. the guest-virtual to guest-physical
+ * 2. while doing 1. it walks guest-physical to host-physical
+ * If the hardware supports that we don't need to do shadow paging.
+ */
+bool tdp_enabled = false;
+
 #undef MMU_DEBUG
 
 #undef AUDIT
@@ -101,8 +112,6 @@ static int dbg = 1;
 #define PT_FIRST_AVAIL_BITS_SHIFT 9
 #define PT64_SECOND_AVAIL_BITS_SHIFT 52
 
-#define PT_SHADOW_IO_MARK (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
-
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
 #define PT64_LEVEL_BITS 9
@@ -159,6 +168,13 @@ static int dbg = 1;
 #define ACC_USER_MASK    PT_USER_MASK
 #define ACC_ALL          (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
 
+struct kvm_pv_mmu_op_buffer {
+	void *ptr;
+	unsigned len;
+	unsigned processed;
+	char buf[512] __aligned(sizeof(long));
+};
+
 struct kvm_rmap_desc {
 	u64 *shadow_ptes[RMAP_EXT];
 	struct kvm_rmap_desc *more;
@@ -200,11 +216,15 @@ static int is_present_pte(unsigned long pte)
 
 static int is_shadow_present_pte(u64 pte)
 {
-	pte &= ~PT_SHADOW_IO_MARK;
 	return pte != shadow_trap_nonpresent_pte
 		&& pte != shadow_notrap_nonpresent_pte;
 }
 
+static int is_large_pte(u64 pte)
+{
+	return pte & PT_PAGE_SIZE_MASK;
+}
+
 static int is_writeble_pte(unsigned long pte)
 {
 	return pte & PT_WRITABLE_MASK;
@@ -215,14 +235,14 @@ static int is_dirty_pte(unsigned long pte)
 	return pte & PT_DIRTY_MASK;
 }
 
-static int is_io_pte(unsigned long pte)
+static int is_rmap_pte(u64 pte)
 {
-	return pte & PT_SHADOW_IO_MARK;
+	return is_shadow_present_pte(pte);
 }
 
-static int is_rmap_pte(u64 pte)
+static pfn_t spte_to_pfn(u64 pte)
 {
-	return is_shadow_present_pte(pte);
+	return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 }
 
 static gfn_t pse36_gfn_delta(u32 gpte)
@@ -349,16 +369,100 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
 }
 
 /*
+ * Return the pointer to the largepage write count for a given
+ * gfn, handling slots that are not large page aligned.
+ */
+static int *slot_largepage_idx(gfn_t gfn, struct kvm_memory_slot *slot)
+{
+	unsigned long idx;
+
+	idx = (gfn / KVM_PAGES_PER_HPAGE) -
+	      (slot->base_gfn / KVM_PAGES_PER_HPAGE);
+	return &slot->lpage_info[idx].write_count;
+}
+
+static void account_shadowed(struct kvm *kvm, gfn_t gfn)
+{
+	int *write_count;
+
+	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	*write_count += 1;
+	WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
+}
+
+static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
+{
+	int *write_count;
+
+	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+	*write_count -= 1;
+	WARN_ON(*write_count < 0);
+}
+
+static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+	int *largepage_idx;
+
+	if (slot) {
+		largepage_idx = slot_largepage_idx(gfn, slot);
+		return *largepage_idx;
+	}
+
+	return 1;
+}
+
+static int host_largepage_backed(struct kvm *kvm, gfn_t gfn)
+{
+	struct vm_area_struct *vma;
+	unsigned long addr;
+
+	addr = gfn_to_hva(kvm, gfn);
+	if (kvm_is_error_hva(addr))
+		return 0;
+
+	vma = find_vma(current->mm, addr);
+	if (vma && is_vm_hugetlb_page(vma))
+		return 1;
+
+	return 0;
+}
+
+static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
+{
+	struct kvm_memory_slot *slot;
+
+	if (has_wrprotected_page(vcpu->kvm, large_gfn))
+		return 0;
+
+	if (!host_largepage_backed(vcpu->kvm, large_gfn))
+		return 0;
+
+	slot = gfn_to_memslot(vcpu->kvm, large_gfn);
+	if (slot && slot->dirty_bitmap)
+		return 0;
+
+	return 1;
+}
+
+/*
  * Take gfn and return the reverse mapping to it.
  * Note: gfn must be unaliased before this function get called
  */
 
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
 {
 	struct kvm_memory_slot *slot;
+	unsigned long idx;
 
 	slot = gfn_to_memslot(kvm, gfn);
-	return &slot->rmap[gfn - slot->base_gfn];
+	if (!lpage)
+		return &slot->rmap[gfn - slot->base_gfn];
+
+	idx = (gfn / KVM_PAGES_PER_HPAGE) -
+	      (slot->base_gfn / KVM_PAGES_PER_HPAGE);
+
+	return &slot->lpage_info[idx].rmap_pde;
 }
 
 /*
@@ -370,7 +474,7 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
  * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
  * containing more mappings.
  */
-static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
+static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
 {
 	struct kvm_mmu_page *sp;
 	struct kvm_rmap_desc *desc;
@@ -382,7 +486,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	gfn = unalias_gfn(vcpu->kvm, gfn);
 	sp = page_header(__pa(spte));
 	sp->gfns[spte - sp->spt] = gfn;
-	rmapp = gfn_to_rmap(vcpu->kvm, gfn);
+	rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
 	if (!*rmapp) {
 		rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
 		*rmapp = (unsigned long)spte;
@@ -435,20 +539,21 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
 	struct kvm_rmap_desc *desc;
 	struct kvm_rmap_desc *prev_desc;
 	struct kvm_mmu_page *sp;
-	struct page *page;
+	pfn_t pfn;
 	unsigned long *rmapp;
 	int i;
 
 	if (!is_rmap_pte(*spte))
 		return;
 	sp = page_header(__pa(spte));
-	page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
-	mark_page_accessed(page);
+	pfn = spte_to_pfn(*spte);
+	if (*spte & PT_ACCESSED_MASK)
+		kvm_set_pfn_accessed(pfn);
 	if (is_writeble_pte(*spte))
-		kvm_release_page_dirty(page);
+		kvm_release_pfn_dirty(pfn);
 	else
-		kvm_release_page_clean(page);
-	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt]);
+		kvm_release_pfn_clean(pfn);
+	rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], is_large_pte(*spte));
 	if (!*rmapp) {
 		printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
 		BUG();
@@ -514,7 +619,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 	int write_protected = 0;
 
 	gfn = unalias_gfn(kvm, gfn);
-	rmapp = gfn_to_rmap(kvm, gfn);
+	rmapp = gfn_to_rmap(kvm, gfn, 0);
 
 	spte = rmap_next(kvm, rmapp, NULL);
 	while (spte) {
@@ -527,8 +632,35 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 		}
 		spte = rmap_next(kvm, rmapp, spte);
 	}
+	if (write_protected) {
+		pfn_t pfn;
+
+		spte = rmap_next(kvm, rmapp, NULL);
+		pfn = spte_to_pfn(*spte);
+		kvm_set_pfn_dirty(pfn);
+	}
+
+	/* check for huge page mappings */
+	rmapp = gfn_to_rmap(kvm, gfn, 1);
+	spte = rmap_next(kvm, rmapp, NULL);
+	while (spte) {
+		BUG_ON(!spte);
+		BUG_ON(!(*spte & PT_PRESENT_MASK));
+		BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
+		pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
+		if (is_writeble_pte(*spte)) {
+			rmap_remove(kvm, spte);
+			--kvm->stat.lpages;
+			set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+			write_protected = 1;
+		}
+		spte = rmap_next(kvm, rmapp, spte);
+	}
+
 	if (write_protected)
 		kvm_flush_remote_tlbs(kvm);
+
+	account_shadowed(kvm, gfn);
 }
 
 #ifdef MMU_DEBUG
@@ -538,8 +670,8 @@ static int is_empty_shadow_page(u64 *spt)
 	u64 *end;
 
 	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
-		if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) {
-			printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
+		if (*pos != shadow_trap_nonpresent_pte) {
+			printk(KERN_ERR "%s: %p %llx\n", __func__,
 			       pos, *pos);
 			return 0;
 		}
@@ -559,7 +691,7 @@ static void kvm_mmu_free_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static unsigned kvm_page_table_hashfn(gfn_t gfn)
 {
-	return gfn;
+	return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1);
 }
 
 static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
@@ -662,13 +794,14 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn)
 	struct kvm_mmu_page *sp;
 	struct hlist_node *node;
 
-	pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+	pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
+	index = kvm_page_table_hashfn(gfn);
 	bucket = &kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry(sp, node, bucket, hash_link)
-		if (sp->gfn == gfn && !sp->role.metaphysical) {
+		if (sp->gfn == gfn && !sp->role.metaphysical
+		    && !sp->role.invalid) {
 			pgprintk("%s: found role %x\n",
-				 __FUNCTION__, sp->role.word);
+				 __func__, sp->role.word);
 			return sp;
 		}
 	return NULL;
@@ -699,27 +832,27 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
 		role.quadrant = quadrant;
 	}
-	pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
+	pgprintk("%s: looking gfn %lx role %x\n", __func__,
 		 gfn, role.word);
-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry(sp, node, bucket, hash_link)
 		if (sp->gfn == gfn && sp->role.word == role.word) {
 			mmu_page_add_parent_pte(vcpu, sp, parent_pte);
-			pgprintk("%s: found\n", __FUNCTION__);
+			pgprintk("%s: found\n", __func__);
 			return sp;
 		}
 	++vcpu->kvm->stat.mmu_cache_miss;
 	sp = kvm_mmu_alloc_page(vcpu, parent_pte);
 	if (!sp)
 		return sp;
-	pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
+	pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word);
 	sp->gfn = gfn;
 	sp->role = role;
 	hlist_add_head(&sp->hash_link, bucket);
-	vcpu->arch.mmu.prefetch_page(vcpu, sp);
 	if (!metaphysical)
 		rmap_write_protect(vcpu->kvm, gfn);
+	vcpu->arch.mmu.prefetch_page(vcpu, sp);
 	return sp;
 }
 
@@ -745,11 +878,17 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
 		ent = pt[i];
 
+		if (is_shadow_present_pte(ent)) {
+			if (!is_large_pte(ent)) {
+				ent &= PT64_BASE_ADDR_MASK;
+				mmu_page_remove_parent_pte(page_header(ent),
+							   &pt[i]);
+			} else {
+				--kvm->stat.lpages;
+				rmap_remove(kvm, &pt[i]);
+			}
+		}
 		pt[i] = shadow_trap_nonpresent_pte;
-		if (!is_shadow_present_pte(ent))
-			continue;
-		ent &= PT64_BASE_ADDR_MASK;
-		mmu_page_remove_parent_pte(page_header(ent), &pt[i]);
 	}
 	kvm_flush_remote_tlbs(kvm);
 }
@@ -789,10 +928,15 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	}
 	kvm_mmu_page_unlink_children(kvm, sp);
 	if (!sp->root_count) {
+		if (!sp->role.metaphysical)
+			unaccount_shadowed(kvm, sp->gfn);
 		hlist_del(&sp->hash_link);
 		kvm_mmu_free_page(kvm, sp);
-	} else
+	} else {
 		list_move(&sp->link, &kvm->arch.active_mmu_pages);
+		sp->role.invalid = 1;
+		kvm_reload_remote_mmus(kvm);
+	}
 	kvm_mmu_reset_last_pte_updated(kvm);
 }
 
@@ -838,13 +982,13 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 	struct hlist_node *node, *n;
 	int r;
 
-	pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
+	pgprintk("%s: looking for gfn %lx\n", __func__, gfn);
 	r = 0;
-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+	index = kvm_page_table_hashfn(gfn);
 	bucket = &kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
 		if (sp->gfn == gfn && !sp->role.metaphysical) {
-			pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
+			pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
 				 sp->role.word);
 			kvm_mmu_zap_page(kvm, sp);
 			r = 1;
@@ -857,7 +1001,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 	struct kvm_mmu_page *sp;
 
 	while ((sp = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
-		pgprintk("%s: zap %lx %x\n", __FUNCTION__, gfn, sp->role.word);
+		pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word);
 		kvm_mmu_zap_page(kvm, sp);
 	}
 }
@@ -889,26 +1033,39 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 			 unsigned pt_access, unsigned pte_access,
 			 int user_fault, int write_fault, int dirty,
-			 int *ptwrite, gfn_t gfn, struct page *page)
+			 int *ptwrite, int largepage, gfn_t gfn,
+			 pfn_t pfn, bool speculative)
 {
 	u64 spte;
 	int was_rmapped = 0;
 	int was_writeble = is_writeble_pte(*shadow_pte);
-	hfn_t host_pfn = (*shadow_pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
 	pgprintk("%s: spte %llx access %x write_fault %d"
 		 " user_fault %d gfn %lx\n",
-		 __FUNCTION__, *shadow_pte, pt_access,
+		 __func__, *shadow_pte, pt_access,
 		 write_fault, user_fault, gfn);
 
 	if (is_rmap_pte(*shadow_pte)) {
-		if (host_pfn != page_to_pfn(page)) {
+		/*
+		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
+		 * the parent of the now unreachable PTE.
+		 */
+		if (largepage && !is_large_pte(*shadow_pte)) {
+			struct kvm_mmu_page *child;
+			u64 pte = *shadow_pte;
+
+			child = page_header(pte & PT64_BASE_ADDR_MASK);
+			mmu_page_remove_parent_pte(child, shadow_pte);
+		} else if (pfn != spte_to_pfn(*shadow_pte)) {
 			pgprintk("hfn old %lx new %lx\n",
-				 host_pfn, page_to_pfn(page));
+				 spte_to_pfn(*shadow_pte), pfn);
 			rmap_remove(vcpu->kvm, shadow_pte);
+		} else {
+			if (largepage)
+				was_rmapped = is_large_pte(*shadow_pte);
+			else
+				was_rmapped = 1;
 		}
-		else
-			was_rmapped = 1;
 	}
 
 	/*
@@ -917,6 +1074,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	 * demand paging).
 	 */
 	spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
+	if (!speculative)
+		pte_access |= PT_ACCESSED_MASK;
 	if (!dirty)
 		pte_access &= ~ACC_WRITE_MASK;
 	if (!(pte_access & ACC_EXEC_MASK))
@@ -925,15 +1084,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 	spte |= PT_PRESENT_MASK;
 	if (pte_access & ACC_USER_MASK)
 		spte |= PT_USER_MASK;
+	if (largepage)
+		spte |= PT_PAGE_SIZE_MASK;
 
-	if (is_error_page(page)) {
-		set_shadow_pte(shadow_pte,
-			       shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
-		kvm_release_page_clean(page);
-		return;
-	}
-
-	spte |= page_to_phys(page);
+	spte |= (u64)pfn << PAGE_SHIFT;
 
 	if ((pte_access & ACC_WRITE_MASK)
 	    || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
@@ -946,9 +1100,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
 		}
 
 		shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
-		if (shadow) {
+		if (shadow ||
+		   (largepage && has_wrprotected_page(vcpu->kvm, gfn))) {
 			pgprintk("%s: found shadow page for %lx, marking ro\n",
-				 __FUNCTION__, gfn);
+				 __func__, gfn);
 			pte_access &= ~ACC_WRITE_MASK;
 			if (is_writeble_pte(spte)) {
 				spte &= ~PT_WRITABLE_MASK;
@@ -964,18 +1119,25 @@ unshadowed:
 	if (pte_access & ACC_WRITE_MASK)
 		mark_page_dirty(vcpu->kvm, gfn);
 
-	pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
+	pgprintk("%s: setting spte %llx\n", __func__, spte);
+	pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n",
+		 (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB",
+		 (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte);
 	set_shadow_pte(shadow_pte, spte);
+	if (!was_rmapped && (spte & PT_PAGE_SIZE_MASK)
+	    && (spte & PT_PRESENT_MASK))
+		++vcpu->kvm->stat.lpages;
+
 	page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
 	if (!was_rmapped) {
-		rmap_add(vcpu, shadow_pte, gfn);
+		rmap_add(vcpu, shadow_pte, gfn, largepage);
 		if (!is_rmap_pte(*shadow_pte))
-			kvm_release_page_clean(page);
+			kvm_release_pfn_clean(pfn);
 	} else {
 		if (was_writeble)
-			kvm_release_page_dirty(page);
+			kvm_release_pfn_dirty(pfn);
 		else
-			kvm_release_page_clean(page);
+			kvm_release_pfn_clean(pfn);
 	}
 	if (!ptwrite || !*ptwrite)
 		vcpu->arch.last_pte_updated = shadow_pte;
@@ -985,10 +1147,10 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
 {
 }
 
-static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
-			   gfn_t gfn, struct page *page)
+static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+			   int largepage, gfn_t gfn, pfn_t pfn,
+			   int level)
 {
-	int level = PT32E_ROOT_LEVEL;
 	hpa_t table_addr = vcpu->arch.mmu.root_hpa;
 	int pt_write = 0;
 
@@ -1001,8 +1163,14 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
 
 		if (level == 1) {
 			mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
-				     0, write, 1, &pt_write, gfn, page);
-			return pt_write || is_io_pte(table[index]);
+				     0, write, 1, &pt_write, 0, gfn, pfn, false);
+			return pt_write;
+		}
+
+		if (largepage && level == 2) {
+			mmu_set_spte(vcpu, &table[index], ACC_ALL, ACC_ALL,
+				     0, write, 1, &pt_write, 1, gfn, pfn, false);
+			return pt_write;
 		}
 
 		if (table[index] == shadow_trap_nonpresent_pte) {
@@ -1016,7 +1184,7 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
 						     1, ACC_ALL, &table[index]);
 			if (!new_table) {
 				pgprintk("nonpaging_map: ENOMEM\n");
-				kvm_release_page_clean(page);
+				kvm_release_pfn_clean(pfn);
 				return -ENOMEM;
 			}
 
@@ -1030,21 +1198,30 @@ static int __nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write,
 static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
 {
 	int r;
-
-	struct page *page;
-
-	down_read(&vcpu->kvm->slots_lock);
+	int largepage = 0;
+	pfn_t pfn;
 
 	down_read(&current->mm->mmap_sem);
-	page = gfn_to_page(vcpu->kvm, gfn);
+	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
+		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+		largepage = 1;
+	}
+
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 
+	/* mmio */
+	if (is_error_pfn(pfn)) {
+		kvm_release_pfn_clean(pfn);
+		return 1;
+	}
+
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
-	r = __nonpaging_map(vcpu, v, write, gfn, page);
+	r = __direct_map(vcpu, v, write, largepage, gfn, pfn,
+			 PT32E_ROOT_LEVEL);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
-	up_read(&vcpu->kvm->slots_lock);
 
 	return r;
 }
@@ -1073,6 +1250,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 
 		sp = page_header(root);
 		--sp->root_count;
+		if (!sp->root_count && sp->role.invalid)
+			kvm_mmu_zap_page(vcpu->kvm, sp);
 		vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		return;
@@ -1085,6 +1264,8 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
 			root &= PT64_BASE_ADDR_MASK;
 			sp = page_header(root);
 			--sp->root_count;
+			if (!sp->root_count && sp->role.invalid)
+				kvm_mmu_zap_page(vcpu->kvm, sp);
 		}
 		vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
 	}
@@ -1097,6 +1278,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 	int i;
 	gfn_t root_gfn;
 	struct kvm_mmu_page *sp;
+	int metaphysical = 0;
 
 	root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
 
@@ -1105,14 +1287,20 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 		hpa_t root = vcpu->arch.mmu.root_hpa;
 
 		ASSERT(!VALID_PAGE(root));
+		if (tdp_enabled)
+			metaphysical = 1;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
-				      PT64_ROOT_LEVEL, 0, ACC_ALL, NULL);
+				      PT64_ROOT_LEVEL, metaphysical,
+				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		vcpu->arch.mmu.root_hpa = root;
 		return;
 	}
 #endif
+	metaphysical = !is_paging(vcpu);
+	if (tdp_enabled)
+		metaphysical = 1;
 	for (i = 0; i < 4; ++i) {
 		hpa_t root = vcpu->arch.mmu.pae_root[i];
 
@@ -1126,7 +1314,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
 		} else if (vcpu->arch.mmu.root_level == 0)
 			root_gfn = 0;
 		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
-				      PT32_ROOT_LEVEL, !is_paging(vcpu),
+				      PT32_ROOT_LEVEL, metaphysical,
 				      ACC_ALL, NULL);
 		root = __pa(sp->spt);
 		++sp->root_count;
@@ -1146,7 +1334,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 	gfn_t gfn;
 	int r;
 
-	pgprintk("%s: gva %lx error %x\n", __FUNCTION__, gva, error_code);
+	pgprintk("%s: gva %lx error %x\n", __func__, gva, error_code);
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
 		return r;
@@ -1160,6 +1348,41 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
 			     error_code & PFERR_WRITE_MASK, gfn);
 }
 
+static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
+				u32 error_code)
+{
+	pfn_t pfn;
+	int r;
+	int largepage = 0;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
+
+	ASSERT(vcpu);
+	ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa));
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		return r;
+
+	down_read(&current->mm->mmap_sem);
+	if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
+		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+		largepage = 1;
+	}
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
+	up_read(&current->mm->mmap_sem);
+	if (is_error_pfn(pfn)) {
+		kvm_release_pfn_clean(pfn);
+		return 1;
+	}
+	spin_lock(&vcpu->kvm->mmu_lock);
+	kvm_mmu_free_some_pages(vcpu);
+	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+			 largepage, gfn, pfn, TDP_ROOT_LEVEL);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+
+	return r;
+}
+
 static void nonpaging_free(struct kvm_vcpu *vcpu)
 {
 	mmu_free_roots(vcpu);
@@ -1188,7 +1411,7 @@ void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
 
 static void paging_new_cr3(struct kvm_vcpu *vcpu)
 {
-	pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->arch.cr3);
+	pgprintk("%s: cr3 %lx\n", __func__, vcpu->arch.cr3);
 	mmu_free_roots(vcpu);
 }
 
@@ -1253,7 +1476,35 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu)
 	return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
 }
 
-static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+{
+	struct kvm_mmu *context = &vcpu->arch.mmu;
+
+	context->new_cr3 = nonpaging_new_cr3;
+	context->page_fault = tdp_page_fault;
+	context->free = nonpaging_free;
+	context->prefetch_page = nonpaging_prefetch_page;
+	context->shadow_root_level = TDP_ROOT_LEVEL;
+	context->root_hpa = INVALID_PAGE;
+
+	if (!is_paging(vcpu)) {
+		context->gva_to_gpa = nonpaging_gva_to_gpa;
+		context->root_level = 0;
+	} else if (is_long_mode(vcpu)) {
+		context->gva_to_gpa = paging64_gva_to_gpa;
+		context->root_level = PT64_ROOT_LEVEL;
+	} else if (is_pae(vcpu)) {
+		context->gva_to_gpa = paging64_gva_to_gpa;
+		context->root_level = PT32E_ROOT_LEVEL;
+	} else {
+		context->gva_to_gpa = paging32_gva_to_gpa;
+		context->root_level = PT32_ROOT_LEVEL;
+	}
+
+	return 0;
+}
+
+static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
 {
 	ASSERT(vcpu);
 	ASSERT(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
@@ -1268,6 +1519,16 @@ static int init_kvm_mmu(struct kvm_vcpu *vcpu)
 		return paging32_init_context(vcpu);
 }
 
+static int init_kvm_mmu(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.update_pte.pfn = bad_pfn;
+
+	if (tdp_enabled)
+		return init_kvm_tdp_mmu(vcpu);
+	else
+		return init_kvm_softmmu(vcpu);
+}
+
 static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
 {
 	ASSERT(vcpu);
@@ -1316,7 +1577,8 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 
 	pte = *spte;
 	if (is_shadow_present_pte(pte)) {
-		if (sp->role.level == PT_PAGE_TABLE_LEVEL)
+		if (sp->role.level == PT_PAGE_TABLE_LEVEL ||
+		    is_large_pte(pte))
 			rmap_remove(vcpu->kvm, spte);
 		else {
 			child = page_header(pte & PT64_BASE_ADDR_MASK);
@@ -1324,24 +1586,26 @@ static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu,
 		}
 	}
 	set_shadow_pte(spte, shadow_trap_nonpresent_pte);
+	if (is_large_pte(pte))
+		--vcpu->kvm->stat.lpages;
 }
 
 static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
 				  struct kvm_mmu_page *sp,
 				  u64 *spte,
-				  const void *new, int bytes,
-				  int offset_in_pte)
+				  const void *new)
 {
-	if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
+	if ((sp->role.level != PT_PAGE_TABLE_LEVEL)
+	    && !vcpu->arch.update_pte.largepage) {
 		++vcpu->kvm->stat.mmu_pde_zapped;
 		return;
 	}
 
 	++vcpu->kvm->stat.mmu_pte_updated;
 	if (sp->role.glevels == PT32_ROOT_LEVEL)
-		paging32_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+		paging32_update_pte(vcpu, sp, spte, new);
 	else
-		paging64_update_pte(vcpu, sp, spte, new, bytes, offset_in_pte);
+		paging64_update_pte(vcpu, sp, spte, new);
 }
 
 static bool need_remote_flush(u64 old, u64 new)
@@ -1378,7 +1642,9 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn_t gfn;
 	int r;
 	u64 gpte = 0;
-	struct page *page;
+	pfn_t pfn;
+
+	vcpu->arch.update_pte.largepage = 0;
 
 	if (bytes != 4 && bytes != 8)
 		return;
@@ -1408,11 +1674,19 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
 	down_read(&current->mm->mmap_sem);
-	page = gfn_to_page(vcpu->kvm, gfn);
+	if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
+		gfn &= ~(KVM_PAGES_PER_HPAGE-1);
+		vcpu->arch.update_pte.largepage = 1;
+	}
+	pfn = gfn_to_pfn(vcpu->kvm, gfn);
 	up_read(&current->mm->mmap_sem);
 
+	if (is_error_pfn(pfn)) {
+		kvm_release_pfn_clean(pfn);
+		return;
+	}
 	vcpu->arch.update_pte.gfn = gfn;
-	vcpu->arch.update_pte.page = page;
+	vcpu->arch.update_pte.pfn = pfn;
 }
 
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
@@ -1423,7 +1697,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	struct hlist_node *node, *n;
 	struct hlist_head *bucket;
 	unsigned index;
-	u64 entry;
+	u64 entry, gentry;
 	u64 *spte;
 	unsigned offset = offset_in_page(gpa);
 	unsigned pte_size;
@@ -1433,8 +1707,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 	int level;
 	int flooded = 0;
 	int npte;
+	int r;
 
-	pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
+	pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
 	mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
@@ -1450,7 +1725,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 		vcpu->arch.last_pt_write_count = 1;
 		vcpu->arch.last_pte_updated = NULL;
 	}
-	index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
+	index = kvm_page_table_hashfn(gfn);
 	bucket = &vcpu->kvm->arch.mmu_page_hash[index];
 	hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
 		if (sp->gfn != gfn || sp->role.metaphysical)
@@ -1496,20 +1771,29 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 				continue;
 		}
 		spte = &sp->spt[page_offset / sizeof(*spte)];
+		if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
+			gentry = 0;
+			r = kvm_read_guest_atomic(vcpu->kvm,
+						  gpa & ~(u64)(pte_size - 1),
+						  &gentry, pte_size);
+			new = (const void *)&gentry;
+			if (r < 0)
+				new = NULL;
+		}
 		while (npte--) {
 			entry = *spte;
 			mmu_pte_write_zap_pte(vcpu, sp, spte);
-			mmu_pte_write_new_pte(vcpu, sp, spte, new, bytes,
-					      page_offset & (pte_size - 1));
+			if (new)
+				mmu_pte_write_new_pte(vcpu, sp, spte, new);
 			mmu_pte_write_flush_tlb(vcpu, entry, *spte);
 			++spte;
 		}
 	}
 	kvm_mmu_audit(vcpu, "post pte write");
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	if (vcpu->arch.update_pte.page) {
-		kvm_release_page_clean(vcpu->arch.update_pte.page);
-		vcpu->arch.update_pte.page = NULL;
+	if (!is_error_pfn(vcpu->arch.update_pte.pfn)) {
+		kvm_release_pfn_clean(vcpu->arch.update_pte.pfn);
+		vcpu->arch.update_pte.pfn = bad_pfn;
 	}
 }
 
@@ -1518,9 +1802,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
 	gpa_t gpa;
 	int r;
 
-	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
-	up_read(&vcpu->kvm->slots_lock);
 
 	spin_lock(&vcpu->kvm->mmu_lock);
 	r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -1577,6 +1859,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
 
+void kvm_enable_tdp(void)
+{
+	tdp_enabled = true;
+}
+EXPORT_SYMBOL_GPL(kvm_enable_tdp);
+
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
 	struct kvm_mmu_page *sp;
@@ -1677,7 +1965,53 @@ void kvm_mmu_zap_all(struct kvm *kvm)
 	kvm_flush_remote_tlbs(kvm);
 }
 
-void kvm_mmu_module_exit(void)
+void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm)
+{
+	struct kvm_mmu_page *page;
+
+	page = container_of(kvm->arch.active_mmu_pages.prev,
+			    struct kvm_mmu_page, link);
+	kvm_mmu_zap_page(kvm, page);
+}
+
+static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+{
+	struct kvm *kvm;
+	struct kvm *kvm_freed = NULL;
+	int cache_count = 0;
+
+	spin_lock(&kvm_lock);
+
+	list_for_each_entry(kvm, &vm_list, vm_list) {
+		int npages;
+
+		spin_lock(&kvm->mmu_lock);
+		npages = kvm->arch.n_alloc_mmu_pages -
+			 kvm->arch.n_free_mmu_pages;
+		cache_count += npages;
+		if (!kvm_freed && nr_to_scan > 0 && npages > 0) {
+			kvm_mmu_remove_one_alloc_mmu_page(kvm);
+			cache_count--;
+			kvm_freed = kvm;
+		}
+		nr_to_scan--;
+
+		spin_unlock(&kvm->mmu_lock);
+	}
+	if (kvm_freed)
+		list_move_tail(&kvm_freed->vm_list, &vm_list);
+
+	spin_unlock(&kvm_lock);
+
+	return cache_count;
+}
+
+static struct shrinker mmu_shrinker = {
+	.shrink = mmu_shrink,
+	.seeks = DEFAULT_SEEKS * 10,
+};
+
+void mmu_destroy_caches(void)
 {
 	if (pte_chain_cache)
 		kmem_cache_destroy(pte_chain_cache);
@@ -1687,6 +2021,12 @@ void kvm_mmu_module_exit(void)
 		kmem_cache_destroy(mmu_page_header_cache);
 }
 
+void kvm_mmu_module_exit(void)
+{
+	mmu_destroy_caches();
+	unregister_shrinker(&mmu_shrinker);
+}
+
 int kvm_mmu_module_init(void)
 {
 	pte_chain_cache = kmem_cache_create("kvm_pte_chain",
@@ -1706,10 +2046,12 @@ int kvm_mmu_module_init(void)
 	if (!mmu_page_header_cache)
 		goto nomem;
 
+	register_shrinker(&mmu_shrinker);
+
 	return 0;
 
 nomem:
-	kvm_mmu_module_exit();
+	mmu_destroy_caches();
 	return -ENOMEM;
 }
 
@@ -1732,6 +2074,127 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
 	return nr_mmu_pages;
 }
 
+static void *pv_mmu_peek_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+				unsigned len)
+{
+	if (len > buffer->len)
+		return NULL;
+	return buffer->ptr;
+}
+
+static void *pv_mmu_read_buffer(struct kvm_pv_mmu_op_buffer *buffer,
+				unsigned len)
+{
+	void *ret;
+
+	ret = pv_mmu_peek_buffer(buffer, len);
+	if (!ret)
+		return ret;
+	buffer->ptr += len;
+	buffer->len -= len;
+	buffer->processed += len;
+	return ret;
+}
+
+static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu,
+			     gpa_t addr, gpa_t value)
+{
+	int bytes = 8;
+	int r;
+
+	if (!is_long_mode(vcpu) && !is_pae(vcpu))
+		bytes = 4;
+
+	r = mmu_topup_memory_caches(vcpu);
+	if (r)
+		return r;
+
+	if (!emulator_write_phys(vcpu, addr, &value, bytes))
+		return -EFAULT;
+
+	return 1;
+}
+
+static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	kvm_x86_ops->tlb_flush(vcpu);
+	return 1;
+}
+
+static int kvm_pv_mmu_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+	spin_lock(&vcpu->kvm->mmu_lock);
+	mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+	spin_unlock(&vcpu->kvm->mmu_lock);
+	return 1;
+}
+
+static int kvm_pv_mmu_op_one(struct kvm_vcpu *vcpu,
+			     struct kvm_pv_mmu_op_buffer *buffer)
+{
+	struct kvm_mmu_op_header *header;
+
+	header = pv_mmu_peek_buffer(buffer, sizeof *header);
+	if (!header)
+		return 0;
+	switch (header->op) {
+	case KVM_MMU_OP_WRITE_PTE: {
+		struct kvm_mmu_op_write_pte *wpte;
+
+		wpte = pv_mmu_read_buffer(buffer, sizeof *wpte);
+		if (!wpte)
+			return 0;
+		return kvm_pv_mmu_write(vcpu, wpte->pte_phys,
+					wpte->pte_val);
+	}
+	case KVM_MMU_OP_FLUSH_TLB: {
+		struct kvm_mmu_op_flush_tlb *ftlb;
+
+		ftlb = pv_mmu_read_buffer(buffer, sizeof *ftlb);
+		if (!ftlb)
+			return 0;
+		return kvm_pv_mmu_flush_tlb(vcpu);
+	}
+	case KVM_MMU_OP_RELEASE_PT: {
+		struct kvm_mmu_op_release_pt *rpt;
+
+		rpt = pv_mmu_read_buffer(buffer, sizeof *rpt);
+		if (!rpt)
+			return 0;
+		return kvm_pv_mmu_release_pt(vcpu, rpt->pt_phys);
+	}
+	default: return 0;
+	}
+}
+
+int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
+		  gpa_t addr, unsigned long *ret)
+{
+	int r;
+	struct kvm_pv_mmu_op_buffer buffer;
+
+	buffer.ptr = buffer.buf;
+	buffer.len = min_t(unsigned long, bytes, sizeof buffer.buf);
+	buffer.processed = 0;
+
+	r = kvm_read_guest(vcpu->kvm, addr, buffer.buf, buffer.len);
+	if (r)
+		goto out;
+
+	while (buffer.len) {
+		r = kvm_pv_mmu_op_one(vcpu, &buffer);
+		if (r < 0)
+			goto out;
+		if (r == 0)
+			break;
+	}
+
+	r = 1;
+out:
+	*ret = buffer.processed;
+	return r;
+}
+
 #ifdef AUDIT
 
 static const char *audit_msg;
@@ -1768,8 +2231,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
 			audit_mappings_page(vcpu, ent, va, level - 1);
 		} else {
 			gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
-			struct page *page = gpa_to_page(vcpu, gpa);
-			hpa_t hpa = page_to_phys(page);
+			hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT;
 
 			if (is_shadow_present_pte(ent)
 			    && (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1782,7 +2244,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
 				 && !is_error_hpa(hpa))
 				printk(KERN_ERR "audit: (%s) notrap shadow,"
 				       " valid guest gva %lx\n", audit_msg, va);
-			kvm_release_page_clean(page);
+			kvm_release_pfn_clean(pfn);
 
 		}
 	}
@@ -1867,7 +2329,7 @@ static void audit_rmap(struct kvm_vcpu *vcpu)
 
 	if (n_rmap != n_actual)
 		printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
-		       __FUNCTION__, audit_msg, n_rmap, n_actual);
+		       __func__, audit_msg, n_rmap, n_actual);
 }
 
 static void audit_write_protection(struct kvm_vcpu *vcpu)
@@ -1887,7 +2349,7 @@ static void audit_write_protection(struct kvm_vcpu *vcpu)
 		if (*rmapp)
 			printk(KERN_ERR "%s: (%s) shadow page has writable"
 			       " mappings: gfn %lx role %x\n",
-			       __FUNCTION__, audit_msg, sp->gfn,
+			       __func__, audit_msg, sp->gfn,
 			       sp->role.word);
 	}
 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 1fce19ec7a23..e64e9f56a65e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -3,6 +3,12 @@
 
 #include <linux/kvm_host.h>
 
+#ifdef CONFIG_X86_64
+#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
+#else
+#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
+#endif
+
 static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
 {
 	if (unlikely(vcpu->kvm->arch.n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ecc0856268c4..156fe10288ae 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -130,7 +130,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
 
-	pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
+	pgprintk("%s: addr %lx\n", __func__, addr);
 walk:
 	walker->level = vcpu->arch.mmu.root_level;
 	pte = vcpu->arch.cr3;
@@ -155,7 +155,7 @@ walk:
 		pte_gpa += index * sizeof(pt_element_t);
 		walker->table_gfn[walker->level - 1] = table_gfn;
 		walker->pte_gpa[walker->level - 1] = pte_gpa;
-		pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+		pgprintk("%s: table_gfn[%d] %lx\n", __func__,
 			 walker->level - 1, table_gfn);
 
 		kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte));
@@ -222,7 +222,7 @@ walk:
 	walker->pt_access = pt_access;
 	walker->pte_access = pte_access;
 	pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
-		 __FUNCTION__, (u64)pte, pt_access, pte_access);
+		 __func__, (u64)pte, pt_access, pte_access);
 	return 1;
 
 not_present:
@@ -243,31 +243,30 @@ err:
 }
 
 static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
-			      u64 *spte, const void *pte, int bytes,
-			      int offset_in_pte)
+			      u64 *spte, const void *pte)
 {
 	pt_element_t gpte;
 	unsigned pte_access;
-	struct page *npage;
+	pfn_t pfn;
+	int largepage = vcpu->arch.update_pte.largepage;
 
 	gpte = *(const pt_element_t *)pte;
 	if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
-		if (!offset_in_pte && !is_present_pte(gpte))
+		if (!is_present_pte(gpte))
 			set_shadow_pte(spte, shadow_notrap_nonpresent_pte);
 		return;
 	}
-	if (bytes < sizeof(pt_element_t))
-		return;
-	pgprintk("%s: gpte %llx spte %p\n", __FUNCTION__, (u64)gpte, spte);
+	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
 	pte_access = page->role.access & FNAME(gpte_access)(vcpu, gpte);
 	if (gpte_to_gfn(gpte) != vcpu->arch.update_pte.gfn)
 		return;
-	npage = vcpu->arch.update_pte.page;
-	if (!npage)
+	pfn = vcpu->arch.update_pte.pfn;
+	if (is_error_pfn(pfn))
 		return;
-	get_page(npage);
+	kvm_get_pfn(pfn);
 	mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
-		     gpte & PT_DIRTY_MASK, NULL, gpte_to_gfn(gpte), npage);
+		     gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte),
+		     pfn, true);
 }
 
 /*
@@ -275,8 +274,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
  */
 static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			 struct guest_walker *walker,
-			 int user_fault, int write_fault, int *ptwrite,
-			 struct page *page)
+			 int user_fault, int write_fault, int largepage,
+			 int *ptwrite, pfn_t pfn)
 {
 	hpa_t shadow_addr;
 	int level;
@@ -304,11 +303,19 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		shadow_ent = ((u64 *)__va(shadow_addr)) + index;
 		if (level == PT_PAGE_TABLE_LEVEL)
 			break;
-		if (is_shadow_present_pte(*shadow_ent)) {
+
+		if (largepage && level == PT_DIRECTORY_LEVEL)
+			break;
+
+		if (is_shadow_present_pte(*shadow_ent)
+		    && !is_large_pte(*shadow_ent)) {
 			shadow_addr = *shadow_ent & PT64_BASE_ADDR_MASK;
 			continue;
 		}
 
+		if (is_large_pte(*shadow_ent))
+			rmap_remove(vcpu->kvm, shadow_ent);
+
 		if (level - 1 == PT_PAGE_TABLE_LEVEL
 		    && walker->level == PT_DIRECTORY_LEVEL) {
 			metaphysical = 1;
@@ -329,7 +336,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 						  walker->pte_gpa[level - 2],
 						  &curr_pte, sizeof(curr_pte));
 			if (r || curr_pte != walker->ptes[level - 2]) {
-				kvm_release_page_clean(page);
+				kvm_release_pfn_clean(pfn);
 				return NULL;
 			}
 		}
@@ -342,7 +349,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
 		     user_fault, write_fault,
 		     walker->ptes[walker->level-1] & PT_DIRTY_MASK,
-		     ptwrite, walker->gfn, page);
+		     ptwrite, largepage, walker->gfn, pfn, false);
 
 	return shadow_ent;
 }
@@ -371,16 +378,16 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	u64 *shadow_pte;
 	int write_pt = 0;
 	int r;
-	struct page *page;
+	pfn_t pfn;
+	int largepage = 0;
 
-	pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
+	pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code);
 	kvm_mmu_audit(vcpu, "pre page fault");
 
 	r = mmu_topup_memory_caches(vcpu);
 	if (r)
 		return r;
 
-	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Look up the shadow pte for the faulting address.
 	 */
@@ -391,40 +398,45 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
 	 * The page is not mapped by the guest.  Let the guest handle it.
 	 */
 	if (!r) {
-		pgprintk("%s: guest page fault\n", __FUNCTION__);
+		pgprintk("%s: guest page fault\n", __func__);
 		inject_page_fault(vcpu, addr, walker.error_code);
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
-		up_read(&vcpu->kvm->slots_lock);
 		return 0;
 	}
 
 	down_read(&current->mm->mmap_sem);
-	page = gfn_to_page(vcpu->kvm, walker.gfn);
+	if (walker.level == PT_DIRECTORY_LEVEL) {
+		gfn_t large_gfn;
+		large_gfn = walker.gfn & ~(KVM_PAGES_PER_HPAGE-1);
+		if (is_largepage_backed(vcpu, large_gfn)) {
+			walker.gfn = large_gfn;
+			largepage = 1;
+		}
+	}
+	pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
 	up_read(&current->mm->mmap_sem);
 
+	/* mmio */
+	if (is_error_pfn(pfn)) {
+		pgprintk("gfn %x is mmio\n", walker.gfn);
+		kvm_release_pfn_clean(pfn);
+		return 1;
+	}
+
 	spin_lock(&vcpu->kvm->mmu_lock);
 	kvm_mmu_free_some_pages(vcpu);
 	shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
-				  &write_pt, page);
-	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __FUNCTION__,
+				  largepage, &write_pt, pfn);
+
+	pgprintk("%s: shadow pte %p %llx ptwrite %d\n", __func__,
 		 shadow_pte, *shadow_pte, write_pt);
 
 	if (!write_pt)
 		vcpu->arch.last_pt_write_count = 0; /* reset fork detector */
 
-	/*
-	 * mmio: emulate if accessible, otherwise its a guest fault.
-	 */
-	if (shadow_pte && is_io_pte(*shadow_pte)) {
-		spin_unlock(&vcpu->kvm->mmu_lock);
-		up_read(&vcpu->kvm->slots_lock);
-		return 1;
-	}
-
 	++vcpu->stat.pf_fixed;
 	kvm_mmu_audit(vcpu, "post page fault (fixed)");
 	spin_unlock(&vcpu->kvm->mmu_lock);
-	up_read(&vcpu->kvm->slots_lock);
 
 	return write_pt;
 }
diff --git a/arch/x86/kvm/segment_descriptor.h b/arch/x86/kvm/segment_descriptor.h
deleted file mode 100644
index 56fc4c873389..000000000000
--- a/arch/x86/kvm/segment_descriptor.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef __SEGMENT_DESCRIPTOR_H
-#define __SEGMENT_DESCRIPTOR_H
-
-struct segment_descriptor {
-	u16 limit_low;
-	u16 base_low;
-	u8  base_mid;
-	u8  type : 4;
-	u8  system : 1;
-	u8  dpl : 2;
-	u8  present : 1;
-	u8  limit_high : 4;
-	u8  avl : 1;
-	u8  long_mode : 1;
-	u8  default_op : 1;
-	u8  granularity : 1;
-	u8  base_high;
-} __attribute__((packed));
-
-#ifdef CONFIG_X86_64
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct segment_descriptor_64 {
-	struct segment_descriptor s;
-	u32 base_higher;
-	u32 pad_zero;
-};
-
-#endif
-#endif
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1a582f1090e8..89e0be2c10d0 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -47,6 +47,18 @@ MODULE_LICENSE("GPL");
 #define SVM_FEATURE_LBRV (1 << 1)
 #define SVM_DEATURE_SVML (1 << 2)
 
+#define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
+
+/* enable NPT for AMD64 and X86 with PAE */
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static bool npt_enabled = true;
+#else
+static bool npt_enabled = false;
+#endif
+static int npt = 1;
+
+module_param(npt, int, S_IRUGO);
+
 static void kvm_reput_irq(struct vcpu_svm *svm);
 
 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
@@ -54,8 +66,7 @@ static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_svm, vcpu);
 }
 
-unsigned long iopm_base;
-unsigned long msrpm_base;
+static unsigned long iopm_base;
 
 struct kvm_ldttss_desc {
 	u16 limit0;
@@ -182,7 +193,7 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu)
 
 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
-	if (!(efer & EFER_LMA))
+	if (!npt_enabled && !(efer & EFER_LMA))
 		efer &= ~EFER_LME;
 
 	to_svm(vcpu)->vmcb->save.efer = efer | MSR_EFER_SVME_MASK;
@@ -219,12 +230,12 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	if (!svm->next_rip) {
-		printk(KERN_DEBUG "%s: NOP\n", __FUNCTION__);
+		printk(KERN_DEBUG "%s: NOP\n", __func__);
 		return;
 	}
 	if (svm->next_rip - svm->vmcb->save.rip > MAX_INST_SIZE)
 		printk(KERN_ERR "%s: ip 0x%llx next 0x%llx\n",
-		       __FUNCTION__,
+		       __func__,
 		       svm->vmcb->save.rip,
 		       svm->next_rip);
 
@@ -279,11 +290,7 @@ static void svm_hardware_enable(void *garbage)
 
 	struct svm_cpu_data *svm_data;
 	uint64_t efer;
-#ifdef CONFIG_X86_64
-	struct desc_ptr gdt_descr;
-#else
 	struct desc_ptr gdt_descr;
-#endif
 	struct desc_struct *gdt;
 	int me = raw_smp_processor_id();
 
@@ -302,7 +309,6 @@ static void svm_hardware_enable(void *garbage)
 	svm_data->asid_generation = 1;
 	svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
 	svm_data->next_asid = svm_data->max_asid + 1;
-	svm_features = cpuid_edx(SVM_CPUID_FUNC);
 
 	asm volatile ("sgdt %0" : "=m"(gdt_descr));
 	gdt = (struct desc_struct *)gdt_descr.address;
@@ -361,12 +367,51 @@ static void set_msr_interception(u32 *msrpm, unsigned msr,
 	BUG();
 }
 
+static void svm_vcpu_init_msrpm(u32 *msrpm)
+{
+	memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
+
+#ifdef CONFIG_X86_64
+	set_msr_interception(msrpm, MSR_GS_BASE, 1, 1);
+	set_msr_interception(msrpm, MSR_FS_BASE, 1, 1);
+	set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1);
+	set_msr_interception(msrpm, MSR_LSTAR, 1, 1);
+	set_msr_interception(msrpm, MSR_CSTAR, 1, 1);
+	set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1);
+#endif
+	set_msr_interception(msrpm, MSR_K6_STAR, 1, 1);
+	set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1);
+	set_msr_interception(msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
+	set_msr_interception(msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
+}
+
+static void svm_enable_lbrv(struct vcpu_svm *svm)
+{
+	u32 *msrpm = svm->msrpm;
+
+	svm->vmcb->control.lbr_ctl = 1;
+	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
+	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
+	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
+	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
+}
+
+static void svm_disable_lbrv(struct vcpu_svm *svm)
+{
+	u32 *msrpm = svm->msrpm;
+
+	svm->vmcb->control.lbr_ctl = 0;
+	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
+	set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
+	set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
+	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
+}
+
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
 	struct page *iopm_pages;
-	struct page *msrpm_pages;
-	void *iopm_va, *msrpm_va;
+	void *iopm_va;
 	int r;
 
 	iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
@@ -379,41 +424,33 @@ static __init int svm_hardware_setup(void)
 	clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
 	iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
 
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
 
-	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+	for_each_online_cpu(cpu) {
+		r = svm_cpu_init(cpu);
+		if (r)
+			goto err;
+	}
 
-	r = -ENOMEM;
-	if (!msrpm_pages)
-		goto err_1;
+	svm_features = cpuid_edx(SVM_CPUID_FUNC);
 
-	msrpm_va = page_address(msrpm_pages);
-	memset(msrpm_va, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
-	msrpm_base = page_to_pfn(msrpm_pages) << PAGE_SHIFT;
+	if (!svm_has(SVM_FEATURE_NPT))
+		npt_enabled = false;
 
-#ifdef CONFIG_X86_64
-	set_msr_interception(msrpm_va, MSR_GS_BASE, 1, 1);
-	set_msr_interception(msrpm_va, MSR_FS_BASE, 1, 1);
-	set_msr_interception(msrpm_va, MSR_KERNEL_GS_BASE, 1, 1);
-	set_msr_interception(msrpm_va, MSR_LSTAR, 1, 1);
-	set_msr_interception(msrpm_va, MSR_CSTAR, 1, 1);
-	set_msr_interception(msrpm_va, MSR_SYSCALL_MASK, 1, 1);
-#endif
-	set_msr_interception(msrpm_va, MSR_K6_STAR, 1, 1);
-	set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_CS, 1, 1);
-	set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_ESP, 1, 1);
-	set_msr_interception(msrpm_va, MSR_IA32_SYSENTER_EIP, 1, 1);
+	if (npt_enabled && !npt) {
+		printk(KERN_INFO "kvm: Nested Paging disabled\n");
+		npt_enabled = false;
+	}
 
-	for_each_online_cpu(cpu) {
-		r = svm_cpu_init(cpu);
-		if (r)
-			goto err_2;
+	if (npt_enabled) {
+		printk(KERN_INFO "kvm: Nested Paging enabled\n");
+		kvm_enable_tdp();
 	}
+
 	return 0;
 
-err_2:
-	__free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
-	msrpm_base = 0;
-err_1:
+err:
 	__free_pages(iopm_pages, IOPM_ALLOC_ORDER);
 	iopm_base = 0;
 	return r;
@@ -421,9 +458,8 @@ err_1:
 
 static __exit void svm_hardware_unsetup(void)
 {
-	__free_pages(pfn_to_page(msrpm_base >> PAGE_SHIFT), MSRPM_ALLOC_ORDER);
 	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
-	iopm_base = msrpm_base = 0;
+	iopm_base = 0;
 }
 
 static void init_seg(struct vmcb_seg *seg)
@@ -443,15 +479,14 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
 	seg->base = 0;
 }
 
-static void init_vmcb(struct vmcb *vmcb)
+static void init_vmcb(struct vcpu_svm *svm)
 {
-	struct vmcb_control_area *control = &vmcb->control;
-	struct vmcb_save_area *save = &vmcb->save;
+	struct vmcb_control_area *control = &svm->vmcb->control;
+	struct vmcb_save_area *save = &svm->vmcb->save;
 
 	control->intercept_cr_read = 	INTERCEPT_CR0_MASK |
 					INTERCEPT_CR3_MASK |
-					INTERCEPT_CR4_MASK |
-					INTERCEPT_CR8_MASK;
+					INTERCEPT_CR4_MASK;
 
 	control->intercept_cr_write = 	INTERCEPT_CR0_MASK |
 					INTERCEPT_CR3_MASK |
@@ -471,23 +506,13 @@ static void init_vmcb(struct vmcb *vmcb)
 					INTERCEPT_DR7_MASK;
 
 	control->intercept_exceptions = (1 << PF_VECTOR) |
-					(1 << UD_VECTOR);
+					(1 << UD_VECTOR) |
+					(1 << MC_VECTOR);
 
 
 	control->intercept = 	(1ULL << INTERCEPT_INTR) |
 				(1ULL << INTERCEPT_NMI) |
 				(1ULL << INTERCEPT_SMI) |
-		/*
-		 * selective cr0 intercept bug?
-		 *    	0:   0f 22 d8                mov    %eax,%cr3
-		 *	3:   0f 20 c0                mov    %cr0,%eax
-		 *	6:   0d 00 00 00 80          or     $0x80000000,%eax
-		 *	b:   0f 22 c0                mov    %eax,%cr0
-		 * set cr3 ->interception
-		 * get cr0 ->interception
-		 * set cr0 -> no interception
-		 */
-		/*              (1ULL << INTERCEPT_SELECTIVE_CR0) | */
 				(1ULL << INTERCEPT_CPUID) |
 				(1ULL << INTERCEPT_INVD) |
 				(1ULL << INTERCEPT_HLT) |
@@ -508,7 +533,7 @@ static void init_vmcb(struct vmcb *vmcb)
 				(1ULL << INTERCEPT_MWAIT);
 
 	control->iopm_base_pa = iopm_base;
-	control->msrpm_base_pa = msrpm_base;
+	control->msrpm_base_pa = __pa(svm->msrpm);
 	control->tsc_offset = 0;
 	control->int_ctl = V_INTR_MASKING_MASK;
 
@@ -550,13 +575,30 @@ static void init_vmcb(struct vmcb *vmcb)
 	save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
 	save->cr4 = X86_CR4_PAE;
 	/* rdx = ?? */
+
+	if (npt_enabled) {
+		/* Setup VMCB for Nested Paging */
+		control->nested_ctl = 1;
+		control->intercept &= ~(1ULL << INTERCEPT_TASK_SWITCH);
+		control->intercept_exceptions &= ~(1 << PF_VECTOR);
+		control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
+						INTERCEPT_CR3_MASK);
+		control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
+						 INTERCEPT_CR3_MASK);
+		save->g_pat = 0x0007040600070406ULL;
+		/* enable caching because the QEMU Bios doesn't enable it */
+		save->cr0 = X86_CR0_ET;
+		save->cr3 = 0;
+		save->cr4 = 0;
+	}
+	force_new_asid(&svm->vcpu);
 }
 
 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	init_vmcb(svm->vmcb);
+	init_vmcb(svm);
 
 	if (vcpu->vcpu_id != 0) {
 		svm->vmcb->save.rip = 0;
@@ -571,6 +613,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 {
 	struct vcpu_svm *svm;
 	struct page *page;
+	struct page *msrpm_pages;
 	int err;
 
 	svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
@@ -589,12 +632,19 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 		goto uninit;
 	}
 
+	err = -ENOMEM;
+	msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
+	if (!msrpm_pages)
+		goto uninit;
+	svm->msrpm = page_address(msrpm_pages);
+	svm_vcpu_init_msrpm(svm->msrpm);
+
 	svm->vmcb = page_address(page);
 	clear_page(svm->vmcb);
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 	svm->asid_generation = 0;
 	memset(svm->db_regs, 0, sizeof(svm->db_regs));
-	init_vmcb(svm->vmcb);
+	init_vmcb(svm);
 
 	fx_init(&svm->vcpu);
 	svm->vcpu.fpu_active = 1;
@@ -617,6 +667,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu)
 	struct vcpu_svm *svm = to_svm(vcpu);
 
 	__free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
+	__free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, svm);
 }
@@ -731,6 +782,13 @@ static void svm_get_segment(struct kvm_vcpu *vcpu,
 	var->unusable = !var->present;
 }
 
+static int svm_get_cpl(struct kvm_vcpu *vcpu)
+{
+	struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
+
+	return save->cpl;
+}
+
 static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -784,6 +842,9 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 		}
 	}
 #endif
+	if (npt_enabled)
+		goto set;
+
 	if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
 		svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
 		vcpu->fpu_active = 1;
@@ -791,18 +852,29 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 	vcpu->arch.cr0 = cr0;
 	cr0 |= X86_CR0_PG | X86_CR0_WP;
-	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	if (!vcpu->fpu_active) {
 		svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
 		cr0 |= X86_CR0_TS;
 	}
+set:
+	/*
+	 * re-enable caching here because the QEMU bios
+	 * does not do it - this results in some delay at
+	 * reboot
+	 */
+	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
 }
 
 static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
-       vcpu->arch.cr4 = cr4;
-       to_svm(vcpu)->vmcb->save.cr4 = cr4 | X86_CR4_PAE;
+	unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
+
+	vcpu->arch.cr4 = cr4;
+	if (!npt_enabled)
+		cr4 |= X86_CR4_PAE;
+	cr4 |= host_cr4_mce;
+	to_svm(vcpu)->vmcb->save.cr4 = cr4;
 }
 
 static void svm_set_segment(struct kvm_vcpu *vcpu,
@@ -833,13 +905,6 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 
 }
 
-/* FIXME:
-
-	svm(vcpu)->vmcb->control.int_ctl &= ~V_TPR_MASK;
-	svm(vcpu)->vmcb->control.int_ctl |= (sregs->cr8 & V_TPR_MASK);
-
-*/
-
 static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_debug_guest *dbg)
 {
 	return -EOPNOTSUPP;
@@ -920,7 +985,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 	}
 	default:
 		printk(KERN_DEBUG "%s: unexpected dr %u\n",
-		       __FUNCTION__, dr);
+		       __func__, dr);
 		*exception = UD_VECTOR;
 		return;
 	}
@@ -962,6 +1027,19 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
+{
+	/*
+	 * On an #MC intercept the MCE handler is not called automatically in
+	 * the host. So do it by hand here.
+	 */
+	asm volatile (
+		"int $0x12\n");
+	/* not sure if we ever come back to this point */
+
+	return 1;
+}
+
 static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 {
 	/*
@@ -969,7 +1047,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 	 * so reinitialize it.
 	 */
 	clear_page(svm->vmcb);
-	init_vmcb(svm->vmcb);
+	init_vmcb(svm);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
@@ -1033,9 +1111,18 @@ static int invalid_op_interception(struct vcpu_svm *svm,
 static int task_switch_interception(struct vcpu_svm *svm,
 				    struct kvm_run *kvm_run)
 {
-	pr_unimpl(&svm->vcpu, "%s: task switch is unsupported\n", __FUNCTION__);
-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-	return 0;
+	u16 tss_selector;
+
+	tss_selector = (u16)svm->vmcb->control.exit_info_1;
+	if (svm->vmcb->control.exit_info_2 &
+	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
+		return kvm_task_switch(&svm->vcpu, tss_selector,
+				       TASK_SWITCH_IRET);
+	if (svm->vmcb->control.exit_info_2 &
+	    (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
+		return kvm_task_switch(&svm->vcpu, tss_selector,
+				       TASK_SWITCH_JMP);
+	return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL);
 }
 
 static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
@@ -1049,7 +1136,7 @@ static int emulate_on_interception(struct vcpu_svm *svm,
 				   struct kvm_run *kvm_run)
 {
 	if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
-		pr_unimpl(&svm->vcpu, "%s: failed\n", __FUNCTION__);
+		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
 	return 1;
 }
 
@@ -1179,8 +1266,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 		svm->vmcb->save.sysenter_esp = data;
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
-		pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
-				__FUNCTION__, data);
+		if (!svm_has(SVM_FEATURE_LBRV)) {
+			pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
+					__func__, data);
+			break;
+		}
+		if (data & DEBUGCTL_RESERVED_BITS)
+			return 1;
+
+		svm->vmcb->save.dbgctl = data;
+		if (data & (1ULL<<0))
+			svm_enable_lbrv(svm);
+		else
+			svm_disable_lbrv(svm);
 		break;
 	case MSR_K7_EVNTSEL0:
 	case MSR_K7_EVNTSEL1:
@@ -1265,6 +1363,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_EXCP_BASE + UD_VECTOR]	= ud_interception,
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR] 	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR] 	= nm_interception,
+	[SVM_EXIT_EXCP_BASE + MC_VECTOR] 	= mc_interception,
 	[SVM_EXIT_INTR] 			= nop_on_interception,
 	[SVM_EXIT_NMI]				= nop_on_interception,
 	[SVM_EXIT_SMI]				= nop_on_interception,
@@ -1290,14 +1389,34 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
 	[SVM_EXIT_MONITOR]			= invalid_op_interception,
 	[SVM_EXIT_MWAIT]			= invalid_op_interception,
+	[SVM_EXIT_NPF]				= pf_interception,
 };
 
-
 static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	u32 exit_code = svm->vmcb->control.exit_code;
 
+	if (npt_enabled) {
+		int mmu_reload = 0;
+		if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
+			svm_set_cr0(vcpu, svm->vmcb->save.cr0);
+			mmu_reload = 1;
+		}
+		vcpu->arch.cr0 = svm->vmcb->save.cr0;
+		vcpu->arch.cr3 = svm->vmcb->save.cr3;
+		if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
+			if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
+				kvm_inject_gp(vcpu, 0);
+				return 1;
+			}
+		}
+		if (mmu_reload) {
+			kvm_mmu_reset_context(vcpu);
+			kvm_mmu_load(vcpu);
+		}
+	}
+
 	kvm_reput_irq(svm);
 
 	if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
@@ -1308,10 +1427,11 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	}
 
 	if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
-	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR)
+	    exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
+	    exit_code != SVM_EXIT_NPF)
 		printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
 		       "exit_code 0x%x\n",
-		       __FUNCTION__, svm->vmcb->control.exit_int_info,
+		       __func__, svm->vmcb->control.exit_int_info,
 		       exit_code);
 
 	if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
@@ -1364,6 +1484,27 @@ static void svm_set_irq(struct kvm_vcpu *vcpu, int irq)
 	svm_inject_irq(svm, irq);
 }
 
+static void update_cr8_intercept(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb *vmcb = svm->vmcb;
+	int max_irr, tpr;
+
+	if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr)
+		return;
+
+	vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
+
+	max_irr = kvm_lapic_find_highest_irr(vcpu);
+	if (max_irr == -1)
+		return;
+
+	tpr = kvm_lapic_get_cr8(vcpu) << 4;
+
+	if (tpr >= (max_irr & 0xf0))
+		vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK;
+}
+
 static void svm_intr_assist(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -1376,14 +1517,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 			      SVM_EVTINJ_VEC_MASK;
 		vmcb->control.exit_int_info = 0;
 		svm_inject_irq(svm, intr_vector);
-		return;
+		goto out;
 	}
 
 	if (vmcb->control.int_ctl & V_IRQ_MASK)
-		return;
+		goto out;
 
 	if (!kvm_cpu_has_interrupt(vcpu))
-		return;
+		goto out;
 
 	if (!(vmcb->save.rflags & X86_EFLAGS_IF) ||
 	    (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
@@ -1391,12 +1532,14 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
 		/* unable to deliver irq, set pending irq */
 		vmcb->control.intercept |= (1ULL << INTERCEPT_VINTR);
 		svm_inject_irq(svm, 0x0);
-		return;
+		goto out;
 	}
 	/* Okay, we can deliver the interrupt: grab it and update PIC state. */
 	intr_vector = kvm_cpu_get_interrupt(vcpu);
 	svm_inject_irq(svm, intr_vector);
 	kvm_timer_intr_post(vcpu, intr_vector);
+out:
+	update_cr8_intercept(vcpu);
 }
 
 static void kvm_reput_irq(struct vcpu_svm *svm)
@@ -1482,6 +1625,29 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
 {
 }
 
+static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) {
+		int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
+		kvm_lapic_set_tpr(vcpu, cr8);
+	}
+}
+
+static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	u64 cr8;
+
+	if (!irqchip_in_kernel(vcpu->kvm))
+		return;
+
+	cr8 = kvm_get_cr8(vcpu);
+	svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
+	svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
+}
+
 static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -1491,6 +1657,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	pre_svm_run(svm);
 
+	sync_lapic_to_cr8(vcpu);
+
 	save_host_msrs(vcpu);
 	fs_selector = read_fs();
 	gs_selector = read_gs();
@@ -1499,6 +1667,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	svm->host_dr6 = read_dr6();
 	svm->host_dr7 = read_dr7();
 	svm->vmcb->save.cr2 = vcpu->arch.cr2;
+	/* required for live migration with NPT */
+	if (npt_enabled)
+		svm->vmcb->save.cr3 = vcpu->arch.cr3;
 
 	if (svm->vmcb->save.dr7 & 0xff) {
 		write_dr7(0);
@@ -1635,6 +1806,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	stgi();
 
+	sync_cr8_to_lapic(vcpu);
+
 	svm->next_rip = 0;
 }
 
@@ -1642,6 +1815,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	if (npt_enabled) {
+		svm->vmcb->control.nested_cr3 = root;
+		force_new_asid(vcpu);
+		return;
+	}
+
 	svm->vmcb->save.cr3 = root;
 	force_new_asid(vcpu);
 
@@ -1709,6 +1888,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.get_segment_base = svm_get_segment_base,
 	.get_segment = svm_get_segment,
 	.set_segment = svm_set_segment,
+	.get_cpl = svm_get_cpl,
 	.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
 	.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
 	.set_cr0 = svm_set_cr0,
diff --git a/arch/x86/kvm/svm.h b/arch/x86/kvm/svm.h
index 5fd50491b555..1b8afa78e869 100644
--- a/arch/x86/kvm/svm.h
+++ b/arch/x86/kvm/svm.h
@@ -238,6 +238,9 @@ struct __attribute__ ((__packed__)) vmcb {
 #define SVM_EXITINTINFO_VALID SVM_EVTINJ_VALID
 #define SVM_EXITINTINFO_VALID_ERR SVM_EVTINJ_VALID_ERR
 
+#define SVM_EXITINFOSHIFT_TS_REASON_IRET 36
+#define SVM_EXITINFOSHIFT_TS_REASON_JMP 38
+
 #define	SVM_EXIT_READ_CR0 	0x000
 #define	SVM_EXIT_READ_CR3 	0x003
 #define	SVM_EXIT_READ_CR4 	0x004
diff --git a/arch/x86/kvm/tss.h b/arch/x86/kvm/tss.h
new file mode 100644
index 000000000000..622aa10f692f
--- /dev/null
+++ b/arch/x86/kvm/tss.h
@@ -0,0 +1,59 @@
+#ifndef __TSS_SEGMENT_H
+#define __TSS_SEGMENT_H
+
+struct tss_segment_32 {
+	u32 prev_task_link;
+	u32 esp0;
+	u32 ss0;
+	u32 esp1;
+	u32 ss1;
+	u32 esp2;
+	u32 ss2;
+	u32 cr3;
+	u32 eip;
+	u32 eflags;
+	u32 eax;
+	u32 ecx;
+	u32 edx;
+	u32 ebx;
+	u32 esp;
+	u32 ebp;
+	u32 esi;
+	u32 edi;
+	u32 es;
+	u32 cs;
+	u32 ss;
+	u32 ds;
+	u32 fs;
+	u32 gs;
+	u32 ldt_selector;
+	u16 t;
+	u16 io_map;
+};
+
+struct tss_segment_16 {
+	u16 prev_task_link;
+	u16 sp0;
+	u16 ss0;
+	u16 sp1;
+	u16 ss1;
+	u16 sp2;
+	u16 ss2;
+	u16 ip;
+	u16 flag;
+	u16 ax;
+	u16 cx;
+	u16 dx;
+	u16 bx;
+	u16 sp;
+	u16 bp;
+	u16 si;
+	u16 di;
+	u16 es;
+	u16 cs;
+	u16 ss;
+	u16 ds;
+	u16 ldt;
+};
+
+#endif
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 8e1462880d1f..8e5d6645b90d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -17,7 +17,6 @@
 
 #include "irq.h"
 #include "vmx.h"
-#include "segment_descriptor.h"
 #include "mmu.h"
 
 #include <linux/kvm_host.h>
@@ -37,6 +36,12 @@ MODULE_LICENSE("GPL");
 static int bypass_guest_pf = 1;
 module_param(bypass_guest_pf, bool, 0);
 
+static int enable_vpid = 1;
+module_param(enable_vpid, bool, 0);
+
+static int flexpriority_enabled = 1;
+module_param(flexpriority_enabled, bool, 0);
+
 struct vmcs {
 	u32 revision_id;
 	u32 abort;
@@ -71,6 +76,7 @@ struct vcpu_vmx {
 			unsigned rip;
 		} irq;
 	} rmode;
+	int vpid;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -85,6 +91,10 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 
 static struct page *vmx_io_bitmap_a;
 static struct page *vmx_io_bitmap_b;
+static struct page *vmx_msr_bitmap;
+
+static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
+static DEFINE_SPINLOCK(vmx_vpid_lock);
 
 static struct vmcs_config {
 	int size;
@@ -176,6 +186,11 @@ static inline int is_external_interrupt(u32 intr_info)
 		== (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
 }
 
+static inline int cpu_has_vmx_msr_bitmap(void)
+{
+	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS);
+}
+
 static inline int cpu_has_vmx_tpr_shadow(void)
 {
 	return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW);
@@ -194,8 +209,9 @@ static inline int cpu_has_secondary_exec_ctrls(void)
 
 static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
 {
-	return (vmcs_config.cpu_based_2nd_exec_ctrl &
-		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+	return flexpriority_enabled
+		&& (vmcs_config.cpu_based_2nd_exec_ctrl &
+		    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 }
 
 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
@@ -204,6 +220,12 @@ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
 		(irqchip_in_kernel(kvm)));
 }
 
+static inline int cpu_has_vmx_vpid(void)
+{
+	return (vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_ENABLE_VPID);
+}
+
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
@@ -214,6 +236,20 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 	return -1;
 }
 
+static inline void __invvpid(int ext, u16 vpid, gva_t gva)
+{
+    struct {
+	u64 vpid : 16;
+	u64 rsvd : 48;
+	u64 gva;
+    } operand = { vpid, 0, gva };
+
+    asm volatile (ASM_VMX_INVVPID
+		  /* CF==1 or ZF==1 --> rc = -1 */
+		  "; ja 1f ; ud2 ; 1:"
+		  : : "a"(&operand), "c"(ext) : "cc", "memory");
+}
+
 static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
 {
 	int i;
@@ -257,6 +293,14 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
 	vmx->launched = 0;
 }
 
+static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
+{
+	if (vmx->vpid == 0)
+		return;
+
+	__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+}
+
 static unsigned long vmcs_readl(unsigned long field)
 {
 	unsigned long value;
@@ -353,7 +397,7 @@ static void reload_tss(void)
 	 * VT restores TR but not its size.  Useless.
 	 */
 	struct descriptor_table gdt;
-	struct segment_descriptor *descs;
+	struct desc_struct *descs;
 
 	get_gdt(&gdt);
 	descs = (void *)gdt.base;
@@ -485,11 +529,12 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 phys_addr = __pa(vmx->vmcs);
-	u64 tsc_this, delta;
+	u64 tsc_this, delta, new_offset;
 
 	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
 		kvm_migrate_apic_timer(vcpu);
+		vpid_sync_vcpu_all(vmx);
 	}
 
 	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
@@ -524,8 +569,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		 * Make sure the time stamp counter is monotonous.
 		 */
 		rdtscll(tsc_this);
-		delta = vcpu->arch.host_tsc - tsc_this;
-		vmcs_write64(TSC_OFFSET, vmcs_read64(TSC_OFFSET) + delta);
+		if (tsc_this < vcpu->arch.host_tsc) {
+			delta = vcpu->arch.host_tsc - tsc_this;
+			new_offset = vmcs_read64(TSC_OFFSET) + delta;
+			vmcs_write64(TSC_OFFSET, new_offset);
+		}
 	}
 }
 
@@ -596,7 +644,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 {
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 		     nr | INTR_TYPE_EXCEPTION
-		     | (has_error_code ? INTR_INFO_DELIEVER_CODE_MASK : 0)
+		     | (has_error_code ? INTR_INFO_DELIVER_CODE_MASK : 0)
 		     | INTR_INFO_VALID_MASK);
 	if (has_error_code)
 		vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
@@ -959,6 +1007,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING;
 	opt = CPU_BASED_TPR_SHADOW |
+	      CPU_BASED_USE_MSR_BITMAPS |
 	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
 				&_cpu_based_exec_control) < 0)
@@ -971,7 +1020,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
 		min = 0;
 		opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-			SECONDARY_EXEC_WBINVD_EXITING;
+			SECONDARY_EXEC_WBINVD_EXITING |
+			SECONDARY_EXEC_ENABLE_VPID;
 		if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
 			return -EIO;
@@ -1080,6 +1130,10 @@ static __init int hardware_setup(void)
 {
 	if (setup_vmcs_config(&vmcs_config) < 0)
 		return -EIO;
+
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
+
 	return alloc_kvm_area();
 }
 
@@ -1214,7 +1268,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 	guest_tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	if ((guest_tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
 		printk(KERN_DEBUG "%s: tss fixup for long mode. \n",
-		       __FUNCTION__);
+		       __func__);
 		vmcs_write32(GUEST_TR_AR_BYTES,
 			     (guest_tr_ar & ~AR_TYPE_MASK)
 			     | AR_TYPE_BUSY_64_TSS);
@@ -1239,6 +1293,11 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
 
 #endif
 
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	vpid_sync_vcpu_all(to_vmx(vcpu));
+}
+
 static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
@@ -1275,6 +1334,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 
 static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+	vmx_flush_tlb(vcpu);
 	vmcs_writel(GUEST_CR3, cr3);
 	if (vcpu->arch.cr0 & X86_CR0_PE)
 		vmx_fpu_deactivate(vcpu);
@@ -1288,14 +1348,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	vcpu->arch.cr4 = cr4;
 }
 
-#ifdef CONFIG_X86_64
-
 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
 
 	vcpu->arch.shadow_efer = efer;
+	if (!msr)
+		return;
 	if (efer & EFER_LMA) {
 		vmcs_write32(VM_ENTRY_CONTROLS,
 				     vmcs_read32(VM_ENTRY_CONTROLS) |
@@ -1312,8 +1372,6 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	setup_msrs(vmx);
 }
 
-#endif
-
 static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
@@ -1344,6 +1402,20 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 	var->unusable = (ar >> 16) & 1;
 }
 
+static int vmx_get_cpl(struct kvm_vcpu *vcpu)
+{
+	struct kvm_segment kvm_seg;
+
+	if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
+		return 0;
+
+	if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
+		return 3;
+
+	vmx_get_segment(vcpu, &kvm_seg, VCPU_SREG_CS);
+	return kvm_seg.selector & 3;
+}
+
 static u32 vmx_segment_access_rights(struct kvm_segment *var)
 {
 	u32 ar;
@@ -1433,7 +1505,6 @@ static int init_rmode_tss(struct kvm *kvm)
 	int ret = 0;
 	int r;
 
-	down_read(&kvm->slots_lock);
 	r = kvm_clear_guest_page(kvm, fn, 0, PAGE_SIZE);
 	if (r < 0)
 		goto out;
@@ -1456,7 +1527,6 @@ static int init_rmode_tss(struct kvm *kvm)
 
 	ret = 1;
 out:
-	up_read(&kvm->slots_lock);
 	return ret;
 }
 
@@ -1494,6 +1564,46 @@ out:
 	return r;
 }
 
+static void allocate_vpid(struct vcpu_vmx *vmx)
+{
+	int vpid;
+
+	vmx->vpid = 0;
+	if (!enable_vpid || !cpu_has_vmx_vpid())
+		return;
+	spin_lock(&vmx_vpid_lock);
+	vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS);
+	if (vpid < VMX_NR_VPIDS) {
+		vmx->vpid = vpid;
+		__set_bit(vpid, vmx_vpid_bitmap);
+	}
+	spin_unlock(&vmx_vpid_lock);
+}
+
+void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr)
+{
+	void *va;
+
+	if (!cpu_has_vmx_msr_bitmap())
+		return;
+
+	/*
+	 * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
+	 * have the write-low and read-high bitmap offsets the wrong way round.
+	 * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
+	 */
+	va = kmap(msr_bitmap);
+	if (msr <= 0x1fff) {
+		__clear_bit(msr, va + 0x000); /* read-low */
+		__clear_bit(msr, va + 0x800); /* write-low */
+	} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
+		msr &= 0x1fff;
+		__clear_bit(msr, va + 0x400); /* read-high */
+		__clear_bit(msr, va + 0xc00); /* write-high */
+	}
+	kunmap(msr_bitmap);
+}
+
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -1511,6 +1621,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
 	vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
 
+	if (cpu_has_vmx_msr_bitmap())
+		vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
 	/* Control */
@@ -1532,6 +1645,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		if (!vm_need_virtualize_apic_accesses(vmx->vcpu.kvm))
 			exec_control &=
 				~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+		if (vmx->vpid == 0)
+			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
 	}
 
@@ -1613,6 +1728,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	u64 msr;
 	int ret;
 
+	down_read(&vcpu->kvm->slots_lock);
 	if (!init_rmode_tss(vmx->vcpu.kvm)) {
 		ret = -ENOMEM;
 		goto out;
@@ -1621,7 +1737,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 	vmx->vcpu.arch.rmode.active = 0;
 
 	vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
-	set_cr8(&vmx->vcpu, 0);
+	kvm_set_cr8(&vmx->vcpu, 0);
 	msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
 	if (vmx->vcpu.vcpu_id == 0)
 		msr |= MSR_IA32_APICBASE_BSP;
@@ -1704,18 +1820,22 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		vmcs_write64(APIC_ACCESS_ADDR,
 			     page_to_phys(vmx->vcpu.kvm->arch.apic_access_page));
 
+	if (vmx->vpid != 0)
+		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
+
 	vmx->vcpu.arch.cr0 = 0x60000010;
 	vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
 	vmx_set_cr4(&vmx->vcpu, 0);
-#ifdef CONFIG_X86_64
 	vmx_set_efer(&vmx->vcpu, 0);
-#endif
 	vmx_fpu_activate(&vmx->vcpu);
 	update_exception_bitmap(&vmx->vcpu);
 
-	return 0;
+	vpid_sync_vcpu_all(vmx);
+
+	ret = 0;
 
 out:
+	up_read(&vcpu->kvm->slots_lock);
 	return ret;
 }
 
@@ -1723,6 +1843,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
+
 	if (vcpu->arch.rmode.active) {
 		vmx->rmode.irq.pending = true;
 		vmx->rmode.irq.vector = irq;
@@ -1844,7 +1966,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
 						!is_page_fault(intr_info))
 		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
-		       "intr info 0x%x\n", __FUNCTION__, vect_info, intr_info);
+		       "intr info 0x%x\n", __func__, vect_info, intr_info);
 
 	if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) {
 		int irq = vect_info & VECTORING_INFO_VECTOR_MASK;
@@ -1869,10 +1991,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	error_code = 0;
 	rip = vmcs_readl(GUEST_RIP);
-	if (intr_info & INTR_INFO_DELIEVER_CODE_MASK)
+	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
 	if (is_page_fault(intr_info)) {
 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
+		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
+			    (u32)((u64)cr2 >> 32), handler);
 		return kvm_mmu_page_fault(vcpu, cr2, error_code);
 	}
 
@@ -1901,6 +2025,7 @@ static int handle_external_interrupt(struct kvm_vcpu *vcpu,
 				     struct kvm_run *kvm_run)
 {
 	++vcpu->stat.irq_exits;
+	KVMTRACE_1D(INTR, vcpu, vmcs_read32(VM_EXIT_INTR_INFO), handler);
 	return 1;
 }
 
@@ -1958,25 +2083,27 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	reg = (exit_qualification >> 8) & 15;
 	switch ((exit_qualification >> 4) & 3) {
 	case 0: /* mov to cr */
+		KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)vcpu->arch.regs[reg],
+			    (u32)((u64)vcpu->arch.regs[reg] >> 32), handler);
 		switch (cr) {
 		case 0:
 			vcpu_load_rsp_rip(vcpu);
-			set_cr0(vcpu, vcpu->arch.regs[reg]);
+			kvm_set_cr0(vcpu, vcpu->arch.regs[reg]);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 3:
 			vcpu_load_rsp_rip(vcpu);
-			set_cr3(vcpu, vcpu->arch.regs[reg]);
+			kvm_set_cr3(vcpu, vcpu->arch.regs[reg]);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 4:
 			vcpu_load_rsp_rip(vcpu);
-			set_cr4(vcpu, vcpu->arch.regs[reg]);
+			kvm_set_cr4(vcpu, vcpu->arch.regs[reg]);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8:
 			vcpu_load_rsp_rip(vcpu);
-			set_cr8(vcpu, vcpu->arch.regs[reg]);
+			kvm_set_cr8(vcpu, vcpu->arch.regs[reg]);
 			skip_emulated_instruction(vcpu);
 			if (irqchip_in_kernel(vcpu->kvm))
 				return 1;
@@ -1990,6 +2117,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		vcpu->arch.cr0 &= ~X86_CR0_TS;
 		vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 		vmx_fpu_activate(vcpu);
+		KVMTRACE_0D(CLTS, vcpu, handler);
 		skip_emulated_instruction(vcpu);
 		return 1;
 	case 1: /*mov from cr*/
@@ -1998,18 +2126,24 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			vcpu_load_rsp_rip(vcpu);
 			vcpu->arch.regs[reg] = vcpu->arch.cr3;
 			vcpu_put_rsp_rip(vcpu);
+			KVMTRACE_3D(CR_READ, vcpu, (u32)cr,
+				    (u32)vcpu->arch.regs[reg],
+				    (u32)((u64)vcpu->arch.regs[reg] >> 32),
+				    handler);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		case 8:
 			vcpu_load_rsp_rip(vcpu);
-			vcpu->arch.regs[reg] = get_cr8(vcpu);
+			vcpu->arch.regs[reg] = kvm_get_cr8(vcpu);
 			vcpu_put_rsp_rip(vcpu);
+			KVMTRACE_2D(CR_READ, vcpu, (u32)cr,
+				    (u32)vcpu->arch.regs[reg], handler);
 			skip_emulated_instruction(vcpu);
 			return 1;
 		}
 		break;
 	case 3: /* lmsw */
-		lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
+		kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
 
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -2049,6 +2183,7 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 			val = 0;
 		}
 		vcpu->arch.regs[reg] = val;
+		KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler);
 	} else {
 		/* mov to dr */
 	}
@@ -2073,6 +2208,9 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		return 1;
 	}
 
+	KVMTRACE_3D(MSR_READ, vcpu, ecx, (u32)data, (u32)(data >> 32),
+		    handler);
+
 	/* FIXME: handling of bits 32:63 of rax, rdx */
 	vcpu->arch.regs[VCPU_REGS_RAX] = data & -1u;
 	vcpu->arch.regs[VCPU_REGS_RDX] = (data >> 32) & -1u;
@@ -2086,6 +2224,9 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
 		| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
 
+	KVMTRACE_3D(MSR_WRITE, vcpu, ecx, (u32)data, (u32)(data >> 32),
+		    handler);
+
 	if (vmx_set_msr(vcpu, ecx, data) != 0) {
 		kvm_inject_gp(vcpu, 0);
 		return 1;
@@ -2110,6 +2251,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 	cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
+
+	KVMTRACE_0D(PEND_INTR, vcpu, handler);
+
 	/*
 	 * If the user space waits to inject interrupts, exit as soon as
 	 * possible
@@ -2152,6 +2296,8 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
 	offset = exit_qualification & 0xffful;
 
+	KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler);
+
 	er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
 
 	if (er !=  EMULATE_DONE) {
@@ -2163,6 +2309,20 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	return 1;
 }
 
+static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+	unsigned long exit_qualification;
+	u16 tss_selector;
+	int reason;
+
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+
+	reason = (u32)exit_qualification >> 30;
+	tss_selector = exit_qualification;
+
+	return kvm_task_switch(vcpu, tss_selector, reason);
+}
+
 /*
  * The exit handlers return 1 if the exit was handled fully and guest execution
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
@@ -2185,6 +2345,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 	[EXIT_REASON_TPR_BELOW_THRESHOLD]     = handle_tpr_below_threshold,
 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -2200,6 +2361,9 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 vectoring_info = vmx->idt_vectoring_info;
 
+	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
+		    (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
+
 	if (unlikely(vmx->fail)) {
 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
 		kvm_run->fail_entry.hardware_entry_failure_reason
@@ -2210,7 +2374,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
 				exit_reason != EXIT_REASON_EXCEPTION_NMI)
 		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
-		       "exit reason is 0x%x\n", __FUNCTION__, exit_reason);
+		       "exit reason is 0x%x\n", __func__, exit_reason);
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
@@ -2221,10 +2385,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
-{
-}
-
 static void update_tpr_threshold(struct kvm_vcpu *vcpu)
 {
 	int max_irr, tpr;
@@ -2285,11 +2445,13 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 			return;
 		}
 
+		KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler);
+
 		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
 		vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
 				vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
 
-		if (unlikely(idtv_info_field & INTR_INFO_DELIEVER_CODE_MASK))
+		if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK))
 			vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
 				vmcs_read32(IDT_VECTORING_ERROR_CODE));
 		if (unlikely(has_ext_irq))
@@ -2470,8 +2632,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
 	/* We need to handle NMIs before interrupts are enabled */
-	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */
+	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */
+		KVMTRACE_0D(NMI, vcpu, handler);
 		asm("int $2");
+	}
 }
 
 static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
@@ -2489,6 +2653,10 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	spin_lock(&vmx_vpid_lock);
+	if (vmx->vpid != 0)
+		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
+	spin_unlock(&vmx_vpid_lock);
 	vmx_free_vmcs(vcpu);
 	kfree(vmx->host_msrs);
 	kfree(vmx->guest_msrs);
@@ -2505,6 +2673,8 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!vmx)
 		return ERR_PTR(-ENOMEM);
 
+	allocate_vpid(vmx);
+
 	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
 	if (err)
 		goto free_vcpu;
@@ -2591,14 +2761,13 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.get_segment_base = vmx_get_segment_base,
 	.get_segment = vmx_get_segment,
 	.set_segment = vmx_set_segment,
+	.get_cpl = vmx_get_cpl,
 	.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
 	.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
 	.set_cr0 = vmx_set_cr0,
 	.set_cr3 = vmx_set_cr3,
 	.set_cr4 = vmx_set_cr4,
-#ifdef CONFIG_X86_64
 	.set_efer = vmx_set_efer,
-#endif
 	.get_idt = vmx_get_idt,
 	.set_idt = vmx_set_idt,
 	.get_gdt = vmx_get_gdt,
@@ -2626,7 +2795,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
 static int __init vmx_init(void)
 {
-	void *iova;
+	void *va;
 	int r;
 
 	vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
@@ -2639,28 +2808,48 @@ static int __init vmx_init(void)
 		goto out;
 	}
 
+	vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	if (!vmx_msr_bitmap) {
+		r = -ENOMEM;
+		goto out1;
+	}
+
 	/*
 	 * Allow direct access to the PC debug port (it is often used for I/O
 	 * delays, but the vmexits simply slow things down).
 	 */
-	iova = kmap(vmx_io_bitmap_a);
-	memset(iova, 0xff, PAGE_SIZE);
-	clear_bit(0x80, iova);
+	va = kmap(vmx_io_bitmap_a);
+	memset(va, 0xff, PAGE_SIZE);
+	clear_bit(0x80, va);
 	kunmap(vmx_io_bitmap_a);
 
-	iova = kmap(vmx_io_bitmap_b);
-	memset(iova, 0xff, PAGE_SIZE);
+	va = kmap(vmx_io_bitmap_b);
+	memset(va, 0xff, PAGE_SIZE);
 	kunmap(vmx_io_bitmap_b);
 
+	va = kmap(vmx_msr_bitmap);
+	memset(va, 0xff, PAGE_SIZE);
+	kunmap(vmx_msr_bitmap);
+
+	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out1;
+		goto out2;
+
+	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE);
+	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE);
+	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS);
+	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
+	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
 
 	if (bypass_guest_pf)
 		kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
 
 	return 0;
 
+out2:
+	__free_page(vmx_msr_bitmap);
 out1:
 	__free_page(vmx_io_bitmap_b);
 out:
@@ -2670,6 +2859,7 @@ out:
 
 static void __exit vmx_exit(void)
 {
+	__free_page(vmx_msr_bitmap);
 	__free_page(vmx_io_bitmap_b);
 	__free_page(vmx_io_bitmap_a);
 
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index d52ae8d7303d..5dff4606b988 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -49,6 +49,7 @@
  * Definitions of Secondary Processor-Based VM-Execution Controls.
  */
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_VPID              0x00000020
 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
 
 
@@ -65,6 +66,7 @@
 
 /* VMCS Encodings */
 enum vmcs_field {
+	VIRTUAL_PROCESSOR_ID            = 0x00000000,
 	GUEST_ES_SELECTOR               = 0x00000800,
 	GUEST_CS_SELECTOR               = 0x00000802,
 	GUEST_SS_SELECTOR               = 0x00000804,
@@ -231,12 +233,12 @@ enum vmcs_field {
  */
 #define INTR_INFO_VECTOR_MASK           0xff            /* 7:0 */
 #define INTR_INFO_INTR_TYPE_MASK        0x700           /* 10:8 */
-#define INTR_INFO_DELIEVER_CODE_MASK    0x800           /* 11 */
+#define INTR_INFO_DELIVER_CODE_MASK     0x800           /* 11 */
 #define INTR_INFO_VALID_MASK            0x80000000      /* 31 */
 
 #define VECTORING_INFO_VECTOR_MASK           	INTR_INFO_VECTOR_MASK
 #define VECTORING_INFO_TYPE_MASK        	INTR_INFO_INTR_TYPE_MASK
-#define VECTORING_INFO_DELIEVER_CODE_MASK    	INTR_INFO_DELIEVER_CODE_MASK
+#define VECTORING_INFO_DELIVER_CODE_MASK    	INTR_INFO_DELIVER_CODE_MASK
 #define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
@@ -321,4 +323,8 @@ enum vmcs_field {
 
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
 
+#define VMX_NR_VPIDS				(1 << 16)
+#define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
+#define VMX_VPID_EXTENT_ALL_CONTEXT		2
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b01552bd1f1..0ce556372a4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -15,10 +15,12 @@
  */
 
 #include <linux/kvm_host.h>
-#include "segment_descriptor.h"
 #include "irq.h"
 #include "mmu.h"
+#include "i8254.h"
+#include "tss.h"
 
+#include <linux/clocksource.h>
 #include <linux/kvm.h>
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
@@ -28,6 +30,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/msr.h>
+#include <asm/desc.h>
 
 #define MAX_IO_MSRS 256
 #define CR0_RESERVED_BITS						\
@@ -41,7 +44,15 @@
 			  | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
 
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
-#define EFER_RESERVED_BITS 0xfffffffffffff2fe
+/* EFER defaults:
+ * - enable syscall per default because its emulated by KVM
+ * - enable LME and LMA per default on 64 bit KVM
+ */
+#ifdef CONFIG_X86_64
+static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffafeULL;
+#else
+static u64 __read_mostly efer_reserved_bits = 0xfffffffffffffffeULL;
+#endif
 
 #define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
@@ -63,6 +74,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "irq_window", VCPU_STAT(irq_window_exits) },
 	{ "halt_exits", VCPU_STAT(halt_exits) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
 	{ "irq_exits", VCPU_STAT(irq_exits) },
 	{ "host_state_reload", VCPU_STAT(host_state_reload) },
@@ -78,6 +90,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmu_recycled", VM_STAT(mmu_recycled) },
 	{ "mmu_cache_miss", VM_STAT(mmu_cache_miss) },
 	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
+	{ "largepages", VM_STAT(lpages) },
 	{ NULL }
 };
 
@@ -85,7 +98,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 unsigned long segment_base(u16 selector)
 {
 	struct descriptor_table gdt;
-	struct segment_descriptor *d;
+	struct desc_struct *d;
 	unsigned long table_base;
 	unsigned long v;
 
@@ -101,13 +114,12 @@ unsigned long segment_base(u16 selector)
 		asm("sldt %0" : "=g"(ldt_selector));
 		table_base = segment_base(ldt_selector);
 	}
-	d = (struct segment_descriptor *)(table_base + (selector & ~7));
-	v = d->base_low | ((unsigned long)d->base_mid << 16) |
-		((unsigned long)d->base_high << 24);
+	d = (struct desc_struct *)(table_base + (selector & ~7));
+	v = d->base0 | ((unsigned long)d->base1 << 16) |
+		((unsigned long)d->base2 << 24);
 #ifdef CONFIG_X86_64
-	if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
-		v |= ((unsigned long) \
-		      ((struct segment_descriptor_64 *)d)->base_higher) << 32;
+	if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
+		v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32;
 #endif
 	return v;
 }
@@ -145,11 +157,16 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 			   u32 error_code)
 {
 	++vcpu->stat.pf_guest;
-	if (vcpu->arch.exception.pending && vcpu->arch.exception.nr == PF_VECTOR) {
-		printk(KERN_DEBUG "kvm: inject_page_fault:"
-		       " double fault 0x%lx\n", addr);
-		vcpu->arch.exception.nr = DF_VECTOR;
-		vcpu->arch.exception.error_code = 0;
+	if (vcpu->arch.exception.pending) {
+		if (vcpu->arch.exception.nr == PF_VECTOR) {
+			printk(KERN_DEBUG "kvm: inject_page_fault:"
+					" double fault 0x%lx\n", addr);
+			vcpu->arch.exception.nr = DF_VECTOR;
+			vcpu->arch.exception.error_code = 0;
+		} else if (vcpu->arch.exception.nr == DF_VECTOR) {
+			/* triple fault -> shutdown */
+			set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests);
+		}
 		return;
 	}
 	vcpu->arch.cr2 = addr;
@@ -184,7 +201,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 	int ret;
 	u64 pdpte[ARRAY_SIZE(vcpu->arch.pdptrs)];
 
-	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
 				  offset * sizeof(u64), sizeof(pdpte));
 	if (ret < 0) {
@@ -201,10 +217,10 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 
 	memcpy(vcpu->arch.pdptrs, pdpte, sizeof(vcpu->arch.pdptrs));
 out:
-	up_read(&vcpu->kvm->slots_lock);
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(load_pdptrs);
 
 static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 {
@@ -215,18 +231,16 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu)
 	if (is_long_mode(vcpu) || !is_pae(vcpu))
 		return false;
 
-	down_read(&vcpu->kvm->slots_lock);
 	r = kvm_read_guest(vcpu->kvm, vcpu->arch.cr3 & ~31u, pdpte, sizeof(pdpte));
 	if (r < 0)
 		goto out;
 	changed = memcmp(pdpte, vcpu->arch.pdptrs, sizeof(pdpte)) != 0;
 out:
-	up_read(&vcpu->kvm->slots_lock);
 
 	return changed;
 }
 
-void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
 	if (cr0 & CR0_RESERVED_BITS) {
 		printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
@@ -284,15 +298,18 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	kvm_mmu_reset_context(vcpu);
 	return;
 }
-EXPORT_SYMBOL_GPL(set_cr0);
+EXPORT_SYMBOL_GPL(kvm_set_cr0);
 
-void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
+void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
 {
-	set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+	kvm_set_cr0(vcpu, (vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f));
+	KVMTRACE_1D(LMSW, vcpu,
+		    (u32)((vcpu->arch.cr0 & ~0x0ful) | (msw & 0x0f)),
+		    handler);
 }
-EXPORT_SYMBOL_GPL(lmsw);
+EXPORT_SYMBOL_GPL(kvm_lmsw);
 
-void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 {
 	if (cr4 & CR4_RESERVED_BITS) {
 		printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n");
@@ -323,9 +340,9 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
 	vcpu->arch.cr4 = cr4;
 	kvm_mmu_reset_context(vcpu);
 }
-EXPORT_SYMBOL_GPL(set_cr4);
+EXPORT_SYMBOL_GPL(kvm_set_cr4);
 
-void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
 	if (cr3 == vcpu->arch.cr3 && !pdptrs_changed(vcpu)) {
 		kvm_mmu_flush_tlb(vcpu);
@@ -359,7 +376,6 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		 */
 	}
 
-	down_read(&vcpu->kvm->slots_lock);
 	/*
 	 * Does the new cr3 value map to physical memory? (Note, we
 	 * catch an invalid cr3 even in real-mode, because it would
@@ -375,11 +391,10 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 		vcpu->arch.cr3 = cr3;
 		vcpu->arch.mmu.new_cr3(vcpu);
 	}
-	up_read(&vcpu->kvm->slots_lock);
 }
-EXPORT_SYMBOL_GPL(set_cr3);
+EXPORT_SYMBOL_GPL(kvm_set_cr3);
 
-void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
+void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	if (cr8 & CR8_RESERVED_BITS) {
 		printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8);
@@ -391,16 +406,16 @@ void set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8)
 	else
 		vcpu->arch.cr8 = cr8;
 }
-EXPORT_SYMBOL_GPL(set_cr8);
+EXPORT_SYMBOL_GPL(kvm_set_cr8);
 
-unsigned long get_cr8(struct kvm_vcpu *vcpu)
+unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 {
 	if (irqchip_in_kernel(vcpu->kvm))
 		return kvm_lapic_get_cr8(vcpu);
 	else
 		return vcpu->arch.cr8;
 }
-EXPORT_SYMBOL_GPL(get_cr8);
+EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@ -415,7 +430,8 @@ static u32 msrs_to_save[] = {
 #ifdef CONFIG_X86_64
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
-	MSR_IA32_TIME_STAMP_COUNTER,
+	MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
+	MSR_IA32_PERF_STATUS,
 };
 
 static unsigned num_msrs_to_save;
@@ -424,11 +440,9 @@ static u32 emulated_msrs[] = {
 	MSR_IA32_MISC_ENABLE,
 };
 
-#ifdef CONFIG_X86_64
-
 static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 {
-	if (efer & EFER_RESERVED_BITS) {
+	if (efer & efer_reserved_bits) {
 		printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
 		       efer);
 		kvm_inject_gp(vcpu, 0);
@@ -450,7 +464,12 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	vcpu->arch.shadow_efer = efer;
 }
 
-#endif
+void kvm_enable_efer_bits(u64 mask)
+{
+       efer_reserved_bits &= ~mask;
+}
+EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
+
 
 /*
  * Writes msr value into into the appropriate "register".
@@ -470,26 +489,86 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
 	return kvm_set_msr(vcpu, index, *data);
 }
 
+static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+{
+	static int version;
+	struct kvm_wall_clock wc;
+	struct timespec wc_ts;
+
+	if (!wall_clock)
+		return;
+
+	version++;
+
+	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+
+	wc_ts = current_kernel_time();
+	wc.wc_sec = wc_ts.tv_sec;
+	wc.wc_nsec = wc_ts.tv_nsec;
+	wc.wc_version = version;
+
+	kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
+
+	version++;
+	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+}
+
+static void kvm_write_guest_time(struct kvm_vcpu *v)
+{
+	struct timespec ts;
+	unsigned long flags;
+	struct kvm_vcpu_arch *vcpu = &v->arch;
+	void *shared_kaddr;
+
+	if ((!vcpu->time_page))
+		return;
+
+	/* Keep irq disabled to prevent changes to the clock */
+	local_irq_save(flags);
+	kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
+			  &vcpu->hv_clock.tsc_timestamp);
+	ktime_get_ts(&ts);
+	local_irq_restore(flags);
+
+	/* With all the info we got, fill in the values */
+
+	vcpu->hv_clock.system_time = ts.tv_nsec +
+				     (NSEC_PER_SEC * (u64)ts.tv_sec);
+	/*
+	 * The interface expects us to write an even number signaling that the
+	 * update is finished. Since the guest won't see the intermediate
+	 * state, we just write "2" at the end
+	 */
+	vcpu->hv_clock.version = 2;
+
+	shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
+
+	memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
+		sizeof(vcpu->hv_clock));
+
+	kunmap_atomic(shared_kaddr, KM_USER0);
+
+	mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+}
+
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 {
 	switch (msr) {
-#ifdef CONFIG_X86_64
 	case MSR_EFER:
 		set_efer(vcpu, data);
 		break;
-#endif
 	case MSR_IA32_MC0_STATUS:
 		pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-		       __FUNCTION__, data);
+		       __func__, data);
 		break;
 	case MSR_IA32_MCG_STATUS:
 		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-			__FUNCTION__, data);
+			__func__, data);
 		break;
 	case MSR_IA32_MCG_CTL:
 		pr_unimpl(vcpu, "%s: MSR_IA32_MCG_CTL 0x%llx, nop\n",
-			__FUNCTION__, data);
+			__func__, data);
 		break;
 	case MSR_IA32_UCODE_REV:
 	case MSR_IA32_UCODE_WRITE:
@@ -501,6 +580,42 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 	case MSR_IA32_MISC_ENABLE:
 		vcpu->arch.ia32_misc_enable_msr = data;
 		break;
+	case MSR_KVM_WALL_CLOCK:
+		vcpu->kvm->arch.wall_clock = data;
+		kvm_write_wall_clock(vcpu->kvm, data);
+		break;
+	case MSR_KVM_SYSTEM_TIME: {
+		if (vcpu->arch.time_page) {
+			kvm_release_page_dirty(vcpu->arch.time_page);
+			vcpu->arch.time_page = NULL;
+		}
+
+		vcpu->arch.time = data;
+
+		/* we verify if the enable bit is set... */
+		if (!(data & 1))
+			break;
+
+		/* ...but clean it before doing the actual write */
+		vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
+
+		vcpu->arch.hv_clock.tsc_to_system_mul =
+					clocksource_khz2mult(tsc_khz, 22);
+		vcpu->arch.hv_clock.tsc_shift = 22;
+
+		down_read(&current->mm->mmap_sem);
+		vcpu->arch.time_page =
+				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+		up_read(&current->mm->mmap_sem);
+
+		if (is_error_page(vcpu->arch.time_page)) {
+			kvm_release_page_clean(vcpu->arch.time_page);
+			vcpu->arch.time_page = NULL;
+		}
+
+		kvm_write_guest_time(vcpu);
+		break;
+	}
 	default:
 		pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", msr, data);
 		return 1;
@@ -540,7 +655,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MC0_MISC+12:
 	case MSR_IA32_MC0_MISC+16:
 	case MSR_IA32_UCODE_REV:
-	case MSR_IA32_PERF_STATUS:
 	case MSR_IA32_EBL_CR_POWERON:
 		/* MTRR registers */
 	case 0xfe:
@@ -556,11 +670,21 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case MSR_IA32_MISC_ENABLE:
 		data = vcpu->arch.ia32_misc_enable_msr;
 		break;
-#ifdef CONFIG_X86_64
+	case MSR_IA32_PERF_STATUS:
+		/* TSC increment by tick */
+		data = 1000ULL;
+		/* CPU multiplier */
+		data |= (((uint64_t)4ULL) << 40);
+		break;
 	case MSR_EFER:
 		data = vcpu->arch.shadow_efer;
 		break;
-#endif
+	case MSR_KVM_WALL_CLOCK:
+		data = vcpu->kvm->arch.wall_clock;
+		break;
+	case MSR_KVM_SYSTEM_TIME:
+		data = vcpu->arch.time;
+		break;
 	default:
 		pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -584,9 +708,11 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
 
 	vcpu_load(vcpu);
 
+	down_read(&vcpu->kvm->slots_lock);
 	for (i = 0; i < msrs->nmsrs; ++i)
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
+	up_read(&vcpu->kvm->slots_lock);
 
 	vcpu_put(vcpu);
 
@@ -688,11 +814,24 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SET_TSS_ADDR:
 	case KVM_CAP_EXT_CPUID:
+	case KVM_CAP_CLOCKSOURCE:
+	case KVM_CAP_PIT:
+	case KVM_CAP_NOP_IO_DELAY:
+	case KVM_CAP_MP_STATE:
 		r = 1;
 		break;
 	case KVM_CAP_VAPIC:
 		r = !kvm_x86_ops->cpu_has_accelerated_tpr();
 		break;
+	case KVM_CAP_NR_VCPUS:
+		r = KVM_MAX_VCPUS;
+		break;
+	case KVM_CAP_NR_MEMSLOTS:
+		r = KVM_MEMORY_SLOTS;
+		break;
+	case KVM_CAP_PV_MMU:
+		r = !tdp_enabled;
+		break;
 	default:
 		r = 0;
 		break;
@@ -763,6 +902,7 @@ out:
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvm_x86_ops->vcpu_load(vcpu, cpu);
+	kvm_write_guest_time(vcpu);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -958,32 +1098,32 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	}
 	/* function 4 and 0xb have additional index. */
 	case 4: {
-		int index, cache_type;
+		int i, cache_type;
 
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* read more entries until cache_type is zero */
-		for (index = 1; *nent < maxnent; ++index) {
-			cache_type = entry[index - 1].eax & 0x1f;
+		for (i = 1; *nent < maxnent; ++i) {
+			cache_type = entry[i - 1].eax & 0x1f;
 			if (!cache_type)
 				break;
-			do_cpuid_1_ent(&entry[index], function, index);
-			entry[index].flags |=
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 			++*nent;
 		}
 		break;
 	}
 	case 0xb: {
-		int index, level_type;
+		int i, level_type;
 
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		/* read more entries until level_type is zero */
-		for (index = 1; *nent < maxnent; ++index) {
-			level_type = entry[index - 1].ecx & 0xff;
+		for (i = 1; *nent < maxnent; ++i) {
+			level_type = entry[i - 1].ecx & 0xff;
 			if (!level_type)
 				break;
-			do_cpuid_1_ent(&entry[index], function, index);
-			entry[index].flags |=
+			do_cpuid_1_ent(&entry[i], function, i);
+			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 			++*nent;
 		}
@@ -1365,6 +1505,23 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 	return r;
 }
 
+static int kvm_vm_ioctl_get_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+{
+	int r = 0;
+
+	memcpy(ps, &kvm->arch.vpit->pit_state, sizeof(struct kvm_pit_state));
+	return r;
+}
+
+static int kvm_vm_ioctl_set_pit(struct kvm *kvm, struct kvm_pit_state *ps)
+{
+	int r = 0;
+
+	memcpy(&kvm->arch.vpit->pit_state, ps, sizeof(struct kvm_pit_state));
+	kvm_pit_load_count(kvm, 0, ps->channels[0].count);
+	return r;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -1457,6 +1614,12 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		} else
 			goto out;
 		break;
+	case KVM_CREATE_PIT:
+		r = -ENOMEM;
+		kvm->arch.vpit = kvm_create_pit(kvm);
+		if (kvm->arch.vpit)
+			r = 0;
+		break;
 	case KVM_IRQ_LINE: {
 		struct kvm_irq_level irq_event;
 
@@ -1512,6 +1675,37 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_GET_PIT: {
+		struct kvm_pit_state ps;
+		r = -EFAULT;
+		if (copy_from_user(&ps, argp, sizeof ps))
+			goto out;
+		r = -ENXIO;
+		if (!kvm->arch.vpit)
+			goto out;
+		r = kvm_vm_ioctl_get_pit(kvm, &ps);
+		if (r)
+			goto out;
+		r = -EFAULT;
+		if (copy_to_user(argp, &ps, sizeof ps))
+			goto out;
+		r = 0;
+		break;
+	}
+	case KVM_SET_PIT: {
+		struct kvm_pit_state ps;
+		r = -EFAULT;
+		if (copy_from_user(&ps, argp, sizeof ps))
+			goto out;
+		r = -ENXIO;
+		if (!kvm->arch.vpit)
+			goto out;
+		r = kvm_vm_ioctl_set_pit(kvm, &ps);
+		if (r)
+			goto out;
+		r = 0;
+		break;
+	}
 	default:
 		;
 	}
@@ -1570,7 +1764,6 @@ int emulator_read_std(unsigned long addr,
 	void *data = val;
 	int r = X86EMUL_CONTINUE;
 
-	down_read(&vcpu->kvm->slots_lock);
 	while (bytes) {
 		gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 		unsigned offset = addr & (PAGE_SIZE-1);
@@ -1592,7 +1785,6 @@ int emulator_read_std(unsigned long addr,
 		addr += tocopy;
 	}
 out:
-	up_read(&vcpu->kvm->slots_lock);
 	return r;
 }
 EXPORT_SYMBOL_GPL(emulator_read_std);
@@ -1611,9 +1803,7 @@ static int emulator_read_emulated(unsigned long addr,
 		return X86EMUL_CONTINUE;
 	}
 
-	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&vcpu->kvm->slots_lock);
 
 	/* For APIC access vmexit */
 	if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -1646,19 +1836,15 @@ mmio:
 	return X86EMUL_UNHANDLEABLE;
 }
 
-static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
-			       const void *val, int bytes)
+int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
+			  const void *val, int bytes)
 {
 	int ret;
 
-	down_read(&vcpu->kvm->slots_lock);
 	ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes);
-	if (ret < 0) {
-		up_read(&vcpu->kvm->slots_lock);
+	if (ret < 0)
 		return 0;
-	}
 	kvm_mmu_pte_write(vcpu, gpa, val, bytes);
-	up_read(&vcpu->kvm->slots_lock);
 	return 1;
 }
 
@@ -1670,9 +1856,7 @@ static int emulator_write_emulated_onepage(unsigned long addr,
 	struct kvm_io_device *mmio_dev;
 	gpa_t                 gpa;
 
-	down_read(&vcpu->kvm->slots_lock);
 	gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
-	up_read(&vcpu->kvm->slots_lock);
 
 	if (gpa == UNMAPPED_GVA) {
 		kvm_inject_page_fault(vcpu, addr, 2);
@@ -1749,7 +1933,6 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 		char *kaddr;
 		u64 val;
 
-		down_read(&vcpu->kvm->slots_lock);
 		gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
 
 		if (gpa == UNMAPPED_GVA ||
@@ -1769,9 +1952,8 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
 		set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val);
 		kunmap_atomic(kaddr, KM_USER0);
 		kvm_release_page_dirty(page);
-	emul_write:
-		up_read(&vcpu->kvm->slots_lock);
 	}
+emul_write:
 #endif
 
 	return emulator_write_emulated(addr, new, bytes, vcpu);
@@ -1802,7 +1984,7 @@ int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 		*dest = kvm_x86_ops->get_dr(vcpu, dr);
 		return X86EMUL_CONTINUE;
 	default:
-		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __FUNCTION__, dr);
+		pr_unimpl(vcpu, "%s: unexpected dr %u\n", __func__, dr);
 		return X86EMUL_UNHANDLEABLE;
 	}
 }
@@ -1840,7 +2022,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
 }
 EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
 
-struct x86_emulate_ops emulate_ops = {
+static struct x86_emulate_ops emulate_ops = {
 	.read_std            = emulator_read_std,
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,
@@ -2091,6 +2273,13 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.guest_page_offset = 0;
 	vcpu->arch.pio.rep = 0;
 
+	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
+		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
+			    handler);
+	else
+		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
+			    handler);
+
 	kvm_x86_ops->cache_regs(vcpu);
 	memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4);
 	kvm_x86_ops->decache_regs(vcpu);
@@ -2129,6 +2318,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 	vcpu->arch.pio.guest_page_offset = offset_in_page(address);
 	vcpu->arch.pio.rep = rep;
 
+	if (vcpu->run->io.direction == KVM_EXIT_IO_IN)
+		KVMTRACE_2D(IO_READ, vcpu, vcpu->run->io.port, (u32)size,
+			    handler);
+	else
+		KVMTRACE_2D(IO_WRITE, vcpu, vcpu->run->io.port, (u32)size,
+			    handler);
+
 	if (!count) {
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 		return 1;
@@ -2163,10 +2359,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 		kvm_x86_ops->skip_emulated_instruction(vcpu);
 
 	for (i = 0; i < nr_pages; ++i) {
-		down_read(&vcpu->kvm->slots_lock);
 		page = gva_to_page(vcpu, address + i * PAGE_SIZE);
 		vcpu->arch.pio.guest_pages[i] = page;
-		up_read(&vcpu->kvm->slots_lock);
 		if (!page) {
 			kvm_inject_gp(vcpu, 0);
 			free_pio_guest_pages(vcpu);
@@ -2238,10 +2432,13 @@ void kvm_arch_exit(void)
 int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.halt_exits;
+	KVMTRACE_0D(HLT, vcpu, handler);
 	if (irqchip_in_kernel(vcpu->kvm)) {
-		vcpu->arch.mp_state = VCPU_MP_STATE_HALTED;
+		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+		up_read(&vcpu->kvm->slots_lock);
 		kvm_vcpu_block(vcpu);
-		if (vcpu->arch.mp_state != VCPU_MP_STATE_RUNNABLE)
+		down_read(&vcpu->kvm->slots_lock);
+		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
 			return -EINTR;
 		return 1;
 	} else {
@@ -2251,9 +2448,19 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+static inline gpa_t hc_gpa(struct kvm_vcpu *vcpu, unsigned long a0,
+			   unsigned long a1)
+{
+	if (is_long_mode(vcpu))
+		return a0;
+	else
+		return a0 | ((gpa_t)a1 << 32);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
+	int r = 1;
 
 	kvm_x86_ops->cache_regs(vcpu);
 
@@ -2263,6 +2470,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	a2 = vcpu->arch.regs[VCPU_REGS_RDX];
 	a3 = vcpu->arch.regs[VCPU_REGS_RSI];
 
+	KVMTRACE_1D(VMMCALL, vcpu, (u32)nr, handler);
+
 	if (!is_long_mode(vcpu)) {
 		nr &= 0xFFFFFFFF;
 		a0 &= 0xFFFFFFFF;
@@ -2275,13 +2484,17 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 	case KVM_HC_VAPIC_POLL_IRQ:
 		ret = 0;
 		break;
+	case KVM_HC_MMU_OP:
+		r = kvm_pv_mmu_op(vcpu, a0, hc_gpa(vcpu, a1, a2), &ret);
+		break;
 	default:
 		ret = -KVM_ENOSYS;
 		break;
 	}
 	vcpu->arch.regs[VCPU_REGS_RAX] = ret;
 	kvm_x86_ops->decache_regs(vcpu);
-	return 0;
+	++vcpu->stat.hypercalls;
+	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
 
@@ -2329,7 +2542,7 @@ void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
 void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
 		   unsigned long *rflags)
 {
-	lmsw(vcpu, msw);
+	kvm_lmsw(vcpu, msw);
 	*rflags = kvm_x86_ops->get_rflags(vcpu);
 }
 
@@ -2346,9 +2559,9 @@ unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
 	case 4:
 		return vcpu->arch.cr4;
 	case 8:
-		return get_cr8(vcpu);
+		return kvm_get_cr8(vcpu);
 	default:
-		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
+		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
 		return 0;
 	}
 }
@@ -2358,23 +2571,23 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 {
 	switch (cr) {
 	case 0:
-		set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
+		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
 		*rflags = kvm_x86_ops->get_rflags(vcpu);
 		break;
 	case 2:
 		vcpu->arch.cr2 = val;
 		break;
 	case 3:
-		set_cr3(vcpu, val);
+		kvm_set_cr3(vcpu, val);
 		break;
 	case 4:
-		set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
+		kvm_set_cr4(vcpu, mk_cr_64(vcpu->arch.cr4, val));
 		break;
 	case 8:
-		set_cr8(vcpu, val & 0xfUL);
+		kvm_set_cr8(vcpu, val & 0xfUL);
 		break;
 	default:
-		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
+		vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr);
 	}
 }
 
@@ -2447,6 +2660,11 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 	}
 	kvm_x86_ops->decache_regs(vcpu);
 	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	KVMTRACE_5D(CPUID, vcpu, function,
+		    (u32)vcpu->arch.regs[VCPU_REGS_RAX],
+		    (u32)vcpu->arch.regs[VCPU_REGS_RBX],
+		    (u32)vcpu->arch.regs[VCPU_REGS_RCX],
+		    (u32)vcpu->arch.regs[VCPU_REGS_RDX], handler);
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
@@ -2469,7 +2687,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu,
 			      struct kvm_run *kvm_run)
 {
 	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
-	kvm_run->cr8 = get_cr8(vcpu);
+	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
 	if (irqchip_in_kernel(vcpu->kvm))
 		kvm_run->ready_for_interrupt_injection = 1;
@@ -2509,16 +2727,17 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
 
-	if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED)) {
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
 		pr_debug("vcpu %d received sipi with vector # %x\n",
 		       vcpu->vcpu_id, vcpu->arch.sipi_vector);
 		kvm_lapic_reset(vcpu);
 		r = kvm_x86_ops->vcpu_reset(vcpu);
 		if (r)
 			return r;
-		vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	}
 
+	down_read(&vcpu->kvm->slots_lock);
 	vapic_enter(vcpu);
 
 preempted:
@@ -2526,6 +2745,10 @@ preempted:
 		kvm_x86_ops->guest_debug_pre(vcpu);
 
 again:
+	if (vcpu->requests)
+		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
+			kvm_mmu_unload(vcpu);
+
 	r = kvm_mmu_reload(vcpu);
 	if (unlikely(r))
 		goto out;
@@ -2539,6 +2762,11 @@ again:
 			r = 0;
 			goto out;
 		}
+		if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
+			kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
+			r = 0;
+			goto out;
+		}
 	}
 
 	kvm_inject_pending_timer_irqs(vcpu);
@@ -2557,6 +2785,14 @@ again:
 		goto out;
 	}
 
+	if (vcpu->requests)
+		if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
+			local_irq_enable();
+			preempt_enable();
+			r = 1;
+			goto out;
+		}
+
 	if (signal_pending(current)) {
 		local_irq_enable();
 		preempt_enable();
@@ -2566,6 +2802,13 @@ again:
 		goto out;
 	}
 
+	vcpu->guest_mode = 1;
+	/*
+	 * Make sure that guest_mode assignment won't happen after
+	 * testing the pending IRQ vector bitmap.
+	 */
+	smp_wmb();
+
 	if (vcpu->arch.exception.pending)
 		__queue_exception(vcpu);
 	else if (irqchip_in_kernel(vcpu->kvm))
@@ -2575,13 +2818,15 @@ again:
 
 	kvm_lapic_sync_to_vapic(vcpu);
 
-	vcpu->guest_mode = 1;
+	up_read(&vcpu->kvm->slots_lock);
+
 	kvm_guest_enter();
 
 	if (vcpu->requests)
 		if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
 			kvm_x86_ops->tlb_flush(vcpu);
 
+	KVMTRACE_0D(VMENTRY, vcpu, entryexit);
 	kvm_x86_ops->run(vcpu, kvm_run);
 
 	vcpu->guest_mode = 0;
@@ -2601,6 +2846,8 @@ again:
 
 	preempt_enable();
 
+	down_read(&vcpu->kvm->slots_lock);
+
 	/*
 	 * Profile KVM exit RIPs:
 	 */
@@ -2628,14 +2875,18 @@ again:
 	}
 
 out:
+	up_read(&vcpu->kvm->slots_lock);
 	if (r > 0) {
 		kvm_resched(vcpu);
+		down_read(&vcpu->kvm->slots_lock);
 		goto preempted;
 	}
 
 	post_kvm_run_save(vcpu, kvm_run);
 
+	down_read(&vcpu->kvm->slots_lock);
 	vapic_exit(vcpu);
+	up_read(&vcpu->kvm->slots_lock);
 
 	return r;
 }
@@ -2647,7 +2898,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	vcpu_load(vcpu);
 
-	if (unlikely(vcpu->arch.mp_state == VCPU_MP_STATE_UNINITIALIZED)) {
+	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
 		kvm_vcpu_block(vcpu);
 		vcpu_put(vcpu);
 		return -EAGAIN;
@@ -2658,7 +2909,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
 	/* re-sync apic's tpr */
 	if (!irqchip_in_kernel(vcpu->kvm))
-		set_cr8(vcpu, kvm_run->cr8);
+		kvm_set_cr8(vcpu, kvm_run->cr8);
 
 	if (vcpu->arch.pio.cur_count) {
 		r = complete_pio(vcpu);
@@ -2670,9 +2921,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
 		vcpu->mmio_read_completed = 1;
 		vcpu->mmio_needed = 0;
+
+		down_read(&vcpu->kvm->slots_lock);
 		r = emulate_instruction(vcpu, kvm_run,
 					vcpu->arch.mmio_fault_cr2, 0,
 					EMULTYPE_NO_DECODE);
+		up_read(&vcpu->kvm->slots_lock);
 		if (r == EMULATE_DO_MMIO) {
 			/*
 			 * Read-modify-write.  Back to userspace.
@@ -2773,7 +3027,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 static void get_segment(struct kvm_vcpu *vcpu,
 			struct kvm_segment *var, int seg)
 {
-	return kvm_x86_ops->get_segment(vcpu, var, seg);
+	kvm_x86_ops->get_segment(vcpu, var, seg);
 }
 
 void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l)
@@ -2816,7 +3070,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	sregs->cr2 = vcpu->arch.cr2;
 	sregs->cr3 = vcpu->arch.cr3;
 	sregs->cr4 = vcpu->arch.cr4;
-	sregs->cr8 = get_cr8(vcpu);
+	sregs->cr8 = kvm_get_cr8(vcpu);
 	sregs->efer = vcpu->arch.shadow_efer;
 	sregs->apic_base = kvm_get_apic_base(vcpu);
 
@@ -2836,12 +3090,438 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	vcpu_load(vcpu);
+	mp_state->mp_state = vcpu->arch.mp_state;
+	vcpu_put(vcpu);
+	return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
+				    struct kvm_mp_state *mp_state)
+{
+	vcpu_load(vcpu);
+	vcpu->arch.mp_state = mp_state->mp_state;
+	vcpu_put(vcpu);
+	return 0;
+}
+
 static void set_segment(struct kvm_vcpu *vcpu,
 			struct kvm_segment *var, int seg)
 {
-	return kvm_x86_ops->set_segment(vcpu, var, seg);
+	kvm_x86_ops->set_segment(vcpu, var, seg);
+}
+
+static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector,
+				   struct kvm_segment *kvm_desct)
+{
+	kvm_desct->base = seg_desc->base0;
+	kvm_desct->base |= seg_desc->base1 << 16;
+	kvm_desct->base |= seg_desc->base2 << 24;
+	kvm_desct->limit = seg_desc->limit0;
+	kvm_desct->limit |= seg_desc->limit << 16;
+	kvm_desct->selector = selector;
+	kvm_desct->type = seg_desc->type;
+	kvm_desct->present = seg_desc->p;
+	kvm_desct->dpl = seg_desc->dpl;
+	kvm_desct->db = seg_desc->d;
+	kvm_desct->s = seg_desc->s;
+	kvm_desct->l = seg_desc->l;
+	kvm_desct->g = seg_desc->g;
+	kvm_desct->avl = seg_desc->avl;
+	if (!selector)
+		kvm_desct->unusable = 1;
+	else
+		kvm_desct->unusable = 0;
+	kvm_desct->padding = 0;
+}
+
+static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu,
+					   u16 selector,
+					   struct descriptor_table *dtable)
+{
+	if (selector & 1 << 2) {
+		struct kvm_segment kvm_seg;
+
+		get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR);
+
+		if (kvm_seg.unusable)
+			dtable->limit = 0;
+		else
+			dtable->limit = kvm_seg.limit;
+		dtable->base = kvm_seg.base;
+	}
+	else
+		kvm_x86_ops->get_gdt(vcpu, dtable);
+}
+
+/* allowed just for 8 bytes segments */
+static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+					 struct desc_struct *seg_desc)
+{
+	struct descriptor_table dtable;
+	u16 index = selector >> 3;
+
+	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+
+	if (dtable.limit < index * 8 + 7) {
+		kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
+		return 1;
+	}
+	return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
+}
+
+/* allowed just for 8 bytes segments */
+static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+					 struct desc_struct *seg_desc)
+{
+	struct descriptor_table dtable;
+	u16 index = selector >> 3;
+
+	get_segment_descritptor_dtable(vcpu, selector, &dtable);
+
+	if (dtable.limit < index * 8 + 7)
+		return 1;
+	return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8);
+}
+
+static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
+			     struct desc_struct *seg_desc)
+{
+	u32 base_addr;
+
+	base_addr = seg_desc->base0;
+	base_addr |= (seg_desc->base1 << 16);
+	base_addr |= (seg_desc->base2 << 24);
+
+	return base_addr;
+}
+
+static int load_tss_segment32(struct kvm_vcpu *vcpu,
+			      struct desc_struct *seg_desc,
+			      struct tss_segment_32 *tss)
+{
+	u32 base_addr;
+
+	base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+	return kvm_read_guest(vcpu->kvm, base_addr, tss,
+			      sizeof(struct tss_segment_32));
+}
+
+static int save_tss_segment32(struct kvm_vcpu *vcpu,
+			      struct desc_struct *seg_desc,
+			      struct tss_segment_32 *tss)
+{
+	u32 base_addr;
+
+	base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+	return kvm_write_guest(vcpu->kvm, base_addr, tss,
+			       sizeof(struct tss_segment_32));
+}
+
+static int load_tss_segment16(struct kvm_vcpu *vcpu,
+			      struct desc_struct *seg_desc,
+			      struct tss_segment_16 *tss)
+{
+	u32 base_addr;
+
+	base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+	return kvm_read_guest(vcpu->kvm, base_addr, tss,
+			      sizeof(struct tss_segment_16));
+}
+
+static int save_tss_segment16(struct kvm_vcpu *vcpu,
+			      struct desc_struct *seg_desc,
+			      struct tss_segment_16 *tss)
+{
+	u32 base_addr;
+
+	base_addr = get_tss_base_addr(vcpu, seg_desc);
+
+	return kvm_write_guest(vcpu->kvm, base_addr, tss,
+			       sizeof(struct tss_segment_16));
+}
+
+static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
+{
+	struct kvm_segment kvm_seg;
+
+	get_segment(vcpu, &kvm_seg, seg);
+	return kvm_seg.selector;
+}
+
+static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu,
+						u16 selector,
+						struct kvm_segment *kvm_seg)
+{
+	struct desc_struct seg_desc;
+
+	if (load_guest_segment_descriptor(vcpu, selector, &seg_desc))
+		return 1;
+	seg_desct_to_kvm_desct(&seg_desc, selector, kvm_seg);
+	return 0;
+}
+
+static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
+				   int type_bits, int seg)
+{
+	struct kvm_segment kvm_seg;
+
+	if (load_segment_descriptor_to_kvm_desct(vcpu, selector, &kvm_seg))
+		return 1;
+	kvm_seg.type |= type_bits;
+
+	if (seg != VCPU_SREG_SS && seg != VCPU_SREG_CS &&
+	    seg != VCPU_SREG_LDTR)
+		if (!kvm_seg.s)
+			kvm_seg.unusable = 1;
+
+	set_segment(vcpu, &kvm_seg, seg);
+	return 0;
+}
+
+static void save_state_to_tss32(struct kvm_vcpu *vcpu,
+				struct tss_segment_32 *tss)
+{
+	tss->cr3 = vcpu->arch.cr3;
+	tss->eip = vcpu->arch.rip;
+	tss->eflags = kvm_x86_ops->get_rflags(vcpu);
+	tss->eax = vcpu->arch.regs[VCPU_REGS_RAX];
+	tss->ecx = vcpu->arch.regs[VCPU_REGS_RCX];
+	tss->edx = vcpu->arch.regs[VCPU_REGS_RDX];
+	tss->ebx = vcpu->arch.regs[VCPU_REGS_RBX];
+	tss->esp = vcpu->arch.regs[VCPU_REGS_RSP];
+	tss->ebp = vcpu->arch.regs[VCPU_REGS_RBP];
+	tss->esi = vcpu->arch.regs[VCPU_REGS_RSI];
+	tss->edi = vcpu->arch.regs[VCPU_REGS_RDI];
+
+	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
+	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
+	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
+	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
+	tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS);
+	tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS);
+	tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR);
+	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
+}
+
+static int load_state_from_tss32(struct kvm_vcpu *vcpu,
+				  struct tss_segment_32 *tss)
+{
+	kvm_set_cr3(vcpu, tss->cr3);
+
+	vcpu->arch.rip = tss->eip;
+	kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
+
+	vcpu->arch.regs[VCPU_REGS_RAX] = tss->eax;
+	vcpu->arch.regs[VCPU_REGS_RCX] = tss->ecx;
+	vcpu->arch.regs[VCPU_REGS_RDX] = tss->edx;
+	vcpu->arch.regs[VCPU_REGS_RBX] = tss->ebx;
+	vcpu->arch.regs[VCPU_REGS_RSP] = tss->esp;
+	vcpu->arch.regs[VCPU_REGS_RBP] = tss->ebp;
+	vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi;
+	vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi;
+
+	if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS))
+		return 1;
+	return 0;
+}
+
+static void save_state_to_tss16(struct kvm_vcpu *vcpu,
+				struct tss_segment_16 *tss)
+{
+	tss->ip = vcpu->arch.rip;
+	tss->flag = kvm_x86_ops->get_rflags(vcpu);
+	tss->ax = vcpu->arch.regs[VCPU_REGS_RAX];
+	tss->cx = vcpu->arch.regs[VCPU_REGS_RCX];
+	tss->dx = vcpu->arch.regs[VCPU_REGS_RDX];
+	tss->bx = vcpu->arch.regs[VCPU_REGS_RBX];
+	tss->sp = vcpu->arch.regs[VCPU_REGS_RSP];
+	tss->bp = vcpu->arch.regs[VCPU_REGS_RBP];
+	tss->si = vcpu->arch.regs[VCPU_REGS_RSI];
+	tss->di = vcpu->arch.regs[VCPU_REGS_RDI];
+
+	tss->es = get_segment_selector(vcpu, VCPU_SREG_ES);
+	tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS);
+	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
+	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
+	tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
+	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
+}
+
+static int load_state_from_tss16(struct kvm_vcpu *vcpu,
+				 struct tss_segment_16 *tss)
+{
+	vcpu->arch.rip = tss->ip;
+	kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
+	vcpu->arch.regs[VCPU_REGS_RAX] = tss->ax;
+	vcpu->arch.regs[VCPU_REGS_RCX] = tss->cx;
+	vcpu->arch.regs[VCPU_REGS_RDX] = tss->dx;
+	vcpu->arch.regs[VCPU_REGS_RBX] = tss->bx;
+	vcpu->arch.regs[VCPU_REGS_RSP] = tss->sp;
+	vcpu->arch.regs[VCPU_REGS_RBP] = tss->bp;
+	vcpu->arch.regs[VCPU_REGS_RSI] = tss->si;
+	vcpu->arch.regs[VCPU_REGS_RDI] = tss->di;
+
+	if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS))
+		return 1;
+
+	if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS))
+		return 1;
+	return 0;
+}
+
+int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
+		       struct desc_struct *cseg_desc,
+		       struct desc_struct *nseg_desc)
+{
+	struct tss_segment_16 tss_segment_16;
+	int ret = 0;
+
+	if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16))
+		goto out;
+
+	save_state_to_tss16(vcpu, &tss_segment_16);
+	save_tss_segment16(vcpu, cseg_desc, &tss_segment_16);
+
+	if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16))
+		goto out;
+	if (load_state_from_tss16(vcpu, &tss_segment_16))
+		goto out;
+
+	ret = 1;
+out:
+	return ret;
+}
+
+int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
+		       struct desc_struct *cseg_desc,
+		       struct desc_struct *nseg_desc)
+{
+	struct tss_segment_32 tss_segment_32;
+	int ret = 0;
+
+	if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32))
+		goto out;
+
+	save_state_to_tss32(vcpu, &tss_segment_32);
+	save_tss_segment32(vcpu, cseg_desc, &tss_segment_32);
+
+	if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32))
+		goto out;
+	if (load_state_from_tss32(vcpu, &tss_segment_32))
+		goto out;
+
+	ret = 1;
+out:
+	return ret;
 }
 
+int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
+{
+	struct kvm_segment tr_seg;
+	struct desc_struct cseg_desc;
+	struct desc_struct nseg_desc;
+	int ret = 0;
+
+	get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+
+	if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc))
+		goto out;
+
+	if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc))
+		goto out;
+
+
+	if (reason != TASK_SWITCH_IRET) {
+		int cpl;
+
+		cpl = kvm_x86_ops->get_cpl(vcpu);
+		if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) {
+			kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
+			return 1;
+		}
+	}
+
+	if (!nseg_desc.p || (nseg_desc.limit0 | nseg_desc.limit << 16) < 0x67) {
+		kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc);
+		return 1;
+	}
+
+	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
+		cseg_desc.type &= ~(1 << 8); //clear the B flag
+		save_guest_segment_descriptor(vcpu, tr_seg.selector,
+					      &cseg_desc);
+	}
+
+	if (reason == TASK_SWITCH_IRET) {
+		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
+		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
+	}
+
+	kvm_x86_ops->skip_emulated_instruction(vcpu);
+	kvm_x86_ops->cache_regs(vcpu);
+
+	if (nseg_desc.type & 8)
+		ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc,
+					 &nseg_desc);
+	else
+		ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc,
+					 &nseg_desc);
+
+	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
+		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
+		kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
+	}
+
+	if (reason != TASK_SWITCH_IRET) {
+		nseg_desc.type |= (1 << 8);
+		save_guest_segment_descriptor(vcpu, tss_selector,
+					      &nseg_desc);
+	}
+
+	kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS);
+	seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg);
+	tr_seg.type = 11;
+	set_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+out:
+	kvm_x86_ops->decache_regs(vcpu);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_task_switch);
+
 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 				  struct kvm_sregs *sregs)
 {
@@ -2862,12 +3542,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3;
 	vcpu->arch.cr3 = sregs->cr3;
 
-	set_cr8(vcpu, sregs->cr8);
+	kvm_set_cr8(vcpu, sregs->cr8);
 
 	mmu_reset_needed |= vcpu->arch.shadow_efer != sregs->efer;
-#ifdef CONFIG_X86_64
 	kvm_x86_ops->set_efer(vcpu, sregs->efer);
-#endif
 	kvm_set_apic_base(vcpu, sregs->apic_base);
 
 	kvm_x86_ops->decache_cr4_guest_bits(vcpu);
@@ -3141,9 +3819,9 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.mmu.root_hpa = INVALID_PAGE;
 	if (!irqchip_in_kernel(kvm) || vcpu->vcpu_id == 0)
-		vcpu->arch.mp_state = VCPU_MP_STATE_RUNNABLE;
+		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 	else
-		vcpu->arch.mp_state = VCPU_MP_STATE_UNINITIALIZED;
+		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
 
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
@@ -3175,7 +3853,9 @@ fail:
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	kvm_free_lapic(vcpu);
+	down_read(&vcpu->kvm->slots_lock);
 	kvm_mmu_destroy(vcpu);
+	up_read(&vcpu->kvm->slots_lock);
 	free_page((unsigned long)vcpu->arch.pio_data);
 }
 
@@ -3219,10 +3899,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+	kvm_free_pit(kvm);
 	kfree(kvm->arch.vpic);
 	kfree(kvm->arch.vioapic);
 	kvm_free_vcpus(kvm);
 	kvm_free_physmem(kvm);
+	if (kvm->arch.apic_access_page)
+		put_page(kvm->arch.apic_access_page);
 	kfree(kvm);
 }
 
@@ -3278,8 +3961,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.mp_state == VCPU_MP_STATE_RUNNABLE
-	       || vcpu->arch.mp_state == VCPU_MP_STATE_SIPI_RECEIVED;
+	return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
+	       || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED;
 }
 
 static void vcpu_kick_intr(void *info)
@@ -3293,11 +3976,17 @@ static void vcpu_kick_intr(void *info)
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
 {
 	int ipi_pcpu = vcpu->cpu;
+	int cpu = get_cpu();
 
 	if (waitqueue_active(&vcpu->wq)) {
 		wake_up_interruptible(&vcpu->wq);
 		++vcpu->stat.halt_wakeup;
 	}
-	if (vcpu->guest_mode)
+	/*
+	 * We may be called synchronously with irqs disabled in guest mode,
+	 * So need not to call smp_call_function_single() in that case.
+	 */
+	if (vcpu->guest_mode && vcpu->cpu != cpu)
 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0);
+	put_cpu();
 }
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 79586003397a..2ca08386f993 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -65,6 +65,14 @@
 #define MemAbs      (1<<9)      /* Memory operand is absolute displacement */
 #define String      (1<<10)     /* String instruction (rep capable) */
 #define Stack       (1<<11)     /* Stack instruction (push/pop) */
+#define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
+#define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
+#define GroupMask   0xff        /* Group number stored in bits 0:7 */
+
+enum {
+	Group1_80, Group1_81, Group1_82, Group1_83,
+	Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
+};
 
 static u16 opcode_table[256] = {
 	/* 0x00 - 0x07 */
@@ -123,14 +131,14 @@ static u16 opcode_table[256] = {
 	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 	ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
 	/* 0x80 - 0x87 */
-	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
-	ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
+	Group | Group1_80, Group | Group1_81,
+	Group | Group1_82, Group | Group1_83,
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
 	/* 0x88 - 0x8F */
 	ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
 	ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
-	0, ModRM | DstReg, 0, DstMem | SrcNone | ModRM | Mov | Stack,
+	0, ModRM | DstReg, 0, Group | Group1A,
 	/* 0x90 - 0x9F */
 	0, 0, 0, 0, 0, 0, 0, 0,
 	0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
@@ -164,16 +172,15 @@ static u16 opcode_table[256] = {
 	0, 0, 0, 0,
 	/* 0xF0 - 0xF7 */
 	0, 0, 0, 0,
-	ImplicitOps, ImplicitOps,
-	ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
+	ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
 	/* 0xF8 - 0xFF */
 	ImplicitOps, 0, ImplicitOps, ImplicitOps,
-	0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
+	0, 0, Group | Group4, Group | Group5,
 };
 
 static u16 twobyte_table[256] = {
 	/* 0x00 - 0x0F */
-	0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
+	0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0,
 	ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
 	/* 0x10 - 0x1F */
 	0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
@@ -229,6 +236,56 @@ static u16 twobyte_table[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
+static u16 group_table[] = {
+	[Group1_80*8] =
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	[Group1_81*8] =
+	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+	DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
+	[Group1_82*8] =
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
+	[Group1_83*8] =
+	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+	DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
+	[Group1A*8] =
+	DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
+	[Group3_Byte*8] =
+	ByteOp | SrcImm | DstMem | ModRM, 0,
+	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+	0, 0, 0, 0,
+	[Group3*8] =
+	DstMem | SrcImm | ModRM | SrcImm, 0,
+	DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+	0, 0, 0, 0,
+	[Group4*8] =
+	ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
+	0, 0, 0, 0, 0, 0,
+	[Group5*8] =
+	DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
+	SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
+	[Group7*8] =
+	0, 0, ModRM | SrcMem, ModRM | SrcMem,
+	SrcNone | ModRM | DstMem | Mov, 0,
+	SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
+};
+
+static u16 group2_table[] = {
+	[Group7*8] =
+	SrcNone | ModRM, 0, 0, 0,
+	SrcNone | ModRM | DstMem | Mov, 0,
+	SrcMem16 | ModRM | Mov, 0,
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_OF (1<<11)
 #define EFLG_DF (1<<10)
@@ -317,7 +374,7 @@ static u16 twobyte_table[256] = {
 
 #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \
 	do {								     \
-		unsigned long _tmp;					     \
+		unsigned long __tmp;					     \
 		switch ((_dst).bytes) {				             \
 		case 1:							     \
 			__asm__ __volatile__ (				     \
@@ -325,7 +382,7 @@ static u16 twobyte_table[256] = {
 				_op"b %"_bx"3,%1; "			     \
 				_POST_EFLAGS("0", "4", "2")		     \
 				: "=m" (_eflags), "=m" ((_dst).val),	     \
-				  "=&r" (_tmp)				     \
+				  "=&r" (__tmp)				     \
 				: _by ((_src).val), "i" (EFLAGS_MASK));      \
 			break;						     \
 		default:						     \
@@ -426,29 +483,40 @@ static u16 twobyte_table[256] = {
 	(_type)_x;							\
 })
 
+static inline unsigned long ad_mask(struct decode_cache *c)
+{
+	return (1UL << (c->ad_bytes << 3)) - 1;
+}
+
 /* Access/update address held in a register, based on addressing mode. */
-#define address_mask(reg)						\
-	((c->ad_bytes == sizeof(unsigned long)) ? 			\
-		(reg) :	((reg) & ((1UL << (c->ad_bytes << 3)) - 1)))
-#define register_address(base, reg)                                     \
-	((base) + address_mask(reg))
-#define register_address_increment(reg, inc)                            \
-	do {								\
-		/* signed type ensures sign extension to long */        \
-		int _inc = (inc);					\
-		if (c->ad_bytes == sizeof(unsigned long))		\
-			(reg) += _inc;					\
-		else							\
-			(reg) = ((reg) & 				\
-				 ~((1UL << (c->ad_bytes << 3)) - 1)) |	\
-				(((reg) + _inc) &			\
-				 ((1UL << (c->ad_bytes << 3)) - 1));	\
-	} while (0)
+static inline unsigned long
+address_mask(struct decode_cache *c, unsigned long reg)
+{
+	if (c->ad_bytes == sizeof(unsigned long))
+		return reg;
+	else
+		return reg & ad_mask(c);
+}
 
-#define JMP_REL(rel) 							\
-	do {								\
-		register_address_increment(c->eip, rel);		\
-	} while (0)
+static inline unsigned long
+register_address(struct decode_cache *c, unsigned long base, unsigned long reg)
+{
+	return base + address_mask(c, reg);
+}
+
+static inline void
+register_address_increment(struct decode_cache *c, unsigned long *reg, int inc)
+{
+	if (c->ad_bytes == sizeof(unsigned long))
+		*reg += inc;
+	else
+		*reg = (*reg & ~ad_mask(c)) | ((*reg + inc) & ad_mask(c));
+}
+
+static inline void jmp_rel(struct decode_cache *c, int rel)
+{
+	register_address_increment(c, &c->eip, rel);
+}
 
 static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
 			      struct x86_emulate_ops *ops,
@@ -763,7 +831,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 	struct decode_cache *c = &ctxt->decode;
 	int rc = 0;
 	int mode = ctxt->mode;
-	int def_op_bytes, def_ad_bytes;
+	int def_op_bytes, def_ad_bytes, group;
 
 	/* Shadow copy of register state. Committed on successful emulation. */
 
@@ -864,12 +932,24 @@ done_prefixes:
 			c->b = insn_fetch(u8, 1, c->eip);
 			c->d = twobyte_table[c->b];
 		}
+	}
 
-		/* Unrecognised? */
-		if (c->d == 0) {
-			DPRINTF("Cannot emulate %02x\n", c->b);
-			return -1;
-		}
+	if (c->d & Group) {
+		group = c->d & GroupMask;
+		c->modrm = insn_fetch(u8, 1, c->eip);
+		--c->eip;
+
+		group = (group << 3) + ((c->modrm >> 3) & 7);
+		if ((c->d & GroupDual) && (c->modrm >> 6) == 3)
+			c->d = group2_table[group];
+		else
+			c->d = group_table[group];
+	}
+
+	/* Unrecognised? */
+	if (c->d == 0) {
+		DPRINTF("Cannot emulate %02x\n", c->b);
+		return -1;
 	}
 
 	if (mode == X86EMUL_MODE_PROT64 && (c->d & Stack))
@@ -924,6 +1004,7 @@ done_prefixes:
 		 */
 		if ((c->d & ModRM) && c->modrm_mod == 3) {
 			c->src.type = OP_REG;
+			c->src.val = c->modrm_val;
 			break;
 		}
 		c->src.type = OP_MEM;
@@ -967,6 +1048,7 @@ done_prefixes:
 	case DstMem:
 		if ((c->d & ModRM) && c->modrm_mod == 3) {
 			c->dst.type = OP_REG;
+			c->dst.val = c->dst.orig_val = c->modrm_val;
 			break;
 		}
 		c->dst.type = OP_MEM;
@@ -984,8 +1066,8 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
 	c->dst.type  = OP_MEM;
 	c->dst.bytes = c->op_bytes;
 	c->dst.val = c->src.val;
-	register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes);
-	c->dst.ptr = (void *) register_address(ctxt->ss_base,
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes);
+	c->dst.ptr = (void *) register_address(c, ctxt->ss_base,
 					       c->regs[VCPU_REGS_RSP]);
 }
 
@@ -995,13 +1077,13 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
 	struct decode_cache *c = &ctxt->decode;
 	int rc;
 
-	rc = ops->read_std(register_address(ctxt->ss_base,
+	rc = ops->read_std(register_address(c, ctxt->ss_base,
 					    c->regs[VCPU_REGS_RSP]),
 			   &c->dst.val, c->dst.bytes, ctxt->vcpu);
 	if (rc != 0)
 		return rc;
 
-	register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes);
+	register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes);
 
 	return 0;
 }
@@ -1043,26 +1125,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
 
 	switch (c->modrm_reg) {
 	case 0 ... 1:	/* test */
-		/*
-		 * Special case in Grp3: test has an immediate
-		 * source operand.
-		 */
-		c->src.type = OP_IMM;
-		c->src.ptr = (unsigned long *)c->eip;
-		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-		if (c->src.bytes == 8)
-			c->src.bytes = 4;
-		switch (c->src.bytes) {
-		case 1:
-			c->src.val = insn_fetch(s8, 1, c->eip);
-			break;
-		case 2:
-			c->src.val = insn_fetch(s16, 2, c->eip);
-			break;
-		case 4:
-			c->src.val = insn_fetch(s32, 4, c->eip);
-			break;
-		}
 		emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
 		break;
 	case 2:	/* not */
@@ -1076,7 +1138,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
 		rc = X86EMUL_UNHANDLEABLE;
 		break;
 	}
-done:
 	return rc;
 }
 
@@ -1084,7 +1145,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
 			       struct x86_emulate_ops *ops)
 {
 	struct decode_cache *c = &ctxt->decode;
-	int rc;
 
 	switch (c->modrm_reg) {
 	case 0:	/* inc */
@@ -1094,36 +1154,11 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
 		emulate_1op("dec", c->dst, ctxt->eflags);
 		break;
 	case 4: /* jmp abs */
-		if (c->b == 0xff)
-			c->eip = c->dst.val;
-		else {
-			DPRINTF("Cannot emulate %02x\n", c->b);
-			return X86EMUL_UNHANDLEABLE;
-		}
+		c->eip = c->src.val;
 		break;
 	case 6:	/* push */
-
-		/* 64-bit mode: PUSH always pushes a 64-bit operand. */
-
-		if (ctxt->mode == X86EMUL_MODE_PROT64) {
-			c->dst.bytes = 8;
-			rc = ops->read_std((unsigned long)c->dst.ptr,
-					   &c->dst.val, 8, ctxt->vcpu);
-			if (rc != 0)
-				return rc;
-		}
-		register_address_increment(c->regs[VCPU_REGS_RSP],
-					   -c->dst.bytes);
-		rc = ops->write_emulated(register_address(ctxt->ss_base,
-				    c->regs[VCPU_REGS_RSP]), &c->dst.val,
-				    c->dst.bytes, ctxt->vcpu);
-		if (rc != 0)
-			return rc;
-		c->dst.type = OP_NONE;
+		emulate_push(ctxt);
 		break;
-	default:
-		DPRINTF("Cannot emulate %02x\n", c->b);
-		return X86EMUL_UNHANDLEABLE;
 	}
 	return 0;
 }
@@ -1361,19 +1396,19 @@ special_insn:
 		c->dst.type  = OP_MEM;
 		c->dst.bytes = c->op_bytes;
 		c->dst.val = c->src.val;
-		register_address_increment(c->regs[VCPU_REGS_RSP],
+		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
 					   -c->op_bytes);
 		c->dst.ptr = (void *) register_address(
-			ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
+			c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]);
 		break;
 	case 0x58 ... 0x5f: /* pop reg */
 	pop_instruction:
-		if ((rc = ops->read_std(register_address(ctxt->ss_base,
+		if ((rc = ops->read_std(register_address(c, ctxt->ss_base,
 			c->regs[VCPU_REGS_RSP]), c->dst.ptr,
 			c->op_bytes, ctxt->vcpu)) != 0)
 			goto done;
 
-		register_address_increment(c->regs[VCPU_REGS_RSP],
+		register_address_increment(c, &c->regs[VCPU_REGS_RSP],
 					   c->op_bytes);
 		c->dst.type = OP_NONE;	/* Disable writeback. */
 		break;
@@ -1393,9 +1428,9 @@ special_insn:
 				1,
 				(c->d & ByteOp) ? 1 : c->op_bytes,
 				c->rep_prefix ?
-				address_mask(c->regs[VCPU_REGS_RCX]) : 1,
+				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
 				(ctxt->eflags & EFLG_DF),
-				register_address(ctxt->es_base,
+				register_address(c, ctxt->es_base,
 						 c->regs[VCPU_REGS_RDI]),
 				c->rep_prefix,
 				c->regs[VCPU_REGS_RDX]) == 0) {
@@ -1409,9 +1444,9 @@ special_insn:
 				0,
 				(c->d & ByteOp) ? 1 : c->op_bytes,
 				c->rep_prefix ?
-				address_mask(c->regs[VCPU_REGS_RCX]) : 1,
+				address_mask(c, c->regs[VCPU_REGS_RCX]) : 1,
 				(ctxt->eflags & EFLG_DF),
-				register_address(c->override_base ?
+				register_address(c, c->override_base ?
 							*c->override_base :
 							ctxt->ds_base,
 						 c->regs[VCPU_REGS_RSI]),
@@ -1425,7 +1460,7 @@ special_insn:
 		int rel = insn_fetch(s8, 1, c->eip);
 
 		if (test_cc(c->b, ctxt->eflags))
-			JMP_REL(rel);
+			jmp_rel(c, rel);
 		break;
 	}
 	case 0x80 ... 0x83:	/* Grp1 */
@@ -1477,7 +1512,7 @@ special_insn:
 	case 0x88 ... 0x8b:	/* mov */
 		goto mov;
 	case 0x8d: /* lea r16/r32, m */
-		c->dst.val = c->modrm_val;
+		c->dst.val = c->modrm_ea;
 		break;
 	case 0x8f:		/* pop (sole member of Grp1a) */
 		rc = emulate_grp1a(ctxt, ops);
@@ -1501,27 +1536,27 @@ special_insn:
 	case 0xa4 ... 0xa5:	/* movs */
 		c->dst.type = OP_MEM;
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-		c->dst.ptr = (unsigned long *)register_address(
+		c->dst.ptr = (unsigned long *)register_address(c,
 						   ctxt->es_base,
 						   c->regs[VCPU_REGS_RDI]);
-		if ((rc = ops->read_emulated(register_address(
+		if ((rc = ops->read_emulated(register_address(c,
 		      c->override_base ? *c->override_base :
 					ctxt->ds_base,
 					c->regs[VCPU_REGS_RSI]),
 					&c->dst.val,
 					c->dst.bytes, ctxt->vcpu)) != 0)
 			goto done;
-		register_address_increment(c->regs[VCPU_REGS_RSI],
+		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
 							   : c->dst.bytes);
-		register_address_increment(c->regs[VCPU_REGS_RDI],
+		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
 							   : c->dst.bytes);
 		break;
 	case 0xa6 ... 0xa7:	/* cmps */
 		c->src.type = OP_NONE; /* Disable writeback. */
 		c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-		c->src.ptr = (unsigned long *)register_address(
+		c->src.ptr = (unsigned long *)register_address(c,
 				c->override_base ? *c->override_base :
 						   ctxt->ds_base,
 						   c->regs[VCPU_REGS_RSI]);
@@ -1533,7 +1568,7 @@ special_insn:
 
 		c->dst.type = OP_NONE; /* Disable writeback. */
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-		c->dst.ptr = (unsigned long *)register_address(
+		c->dst.ptr = (unsigned long *)register_address(c,
 						   ctxt->es_base,
 						   c->regs[VCPU_REGS_RDI]);
 		if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
@@ -1546,10 +1581,10 @@ special_insn:
 
 		emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags);
 
-		register_address_increment(c->regs[VCPU_REGS_RSI],
+		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
 				       (ctxt->eflags & EFLG_DF) ? -c->src.bytes
 								  : c->src.bytes);
-		register_address_increment(c->regs[VCPU_REGS_RDI],
+		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
 								  : c->dst.bytes);
 
@@ -1557,11 +1592,11 @@ special_insn:
 	case 0xaa ... 0xab:	/* stos */
 		c->dst.type = OP_MEM;
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-		c->dst.ptr = (unsigned long *)register_address(
+		c->dst.ptr = (unsigned long *)register_address(c,
 						   ctxt->es_base,
 						   c->regs[VCPU_REGS_RDI]);
 		c->dst.val = c->regs[VCPU_REGS_RAX];
-		register_address_increment(c->regs[VCPU_REGS_RDI],
+		register_address_increment(c, &c->regs[VCPU_REGS_RDI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
 							   : c->dst.bytes);
 		break;
@@ -1569,7 +1604,7 @@ special_insn:
 		c->dst.type = OP_REG;
 		c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
 		c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-		if ((rc = ops->read_emulated(register_address(
+		if ((rc = ops->read_emulated(register_address(c,
 				c->override_base ? *c->override_base :
 						   ctxt->ds_base,
 						 c->regs[VCPU_REGS_RSI]),
@@ -1577,7 +1612,7 @@ special_insn:
 						 c->dst.bytes,
 						 ctxt->vcpu)) != 0)
 			goto done;
-		register_address_increment(c->regs[VCPU_REGS_RSI],
+		register_address_increment(c, &c->regs[VCPU_REGS_RSI],
 				       (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
 							   : c->dst.bytes);
 		break;
@@ -1616,14 +1651,14 @@ special_insn:
 			goto cannot_emulate;
 		}
 		c->src.val = (unsigned long) c->eip;
-		JMP_REL(rel);
+		jmp_rel(c, rel);
 		c->op_bytes = c->ad_bytes;
 		emulate_push(ctxt);
 		break;
 	}
 	case 0xe9: /* jmp rel */
 	case 0xeb: /* jmp rel short */
-		JMP_REL(c->src.val);
+		jmp_rel(c, c->src.val);
 		c->dst.type = OP_NONE; /* Disable writeback. */
 		break;
 	case 0xf4:              /* hlt */
@@ -1690,6 +1725,8 @@ twobyte_insn:
 				goto done;
 
 			kvm_emulate_hypercall(ctxt->vcpu);
+			/* Disable writeback. */
+			c->dst.type = OP_NONE;
 			break;
 		case 2: /* lgdt */
 			rc = read_descriptor(ctxt, ops, c->src.ptr,
@@ -1697,6 +1734,8 @@ twobyte_insn:
 			if (rc)
 				goto done;
 			realmode_lgdt(ctxt->vcpu, size, address);
+			/* Disable writeback. */
+			c->dst.type = OP_NONE;
 			break;
 		case 3: /* lidt/vmmcall */
 			if (c->modrm_mod == 3 && c->modrm_rm == 1) {
@@ -1712,27 +1751,25 @@ twobyte_insn:
 					goto done;
 				realmode_lidt(ctxt->vcpu, size, address);
 			}
+			/* Disable writeback. */
+			c->dst.type = OP_NONE;
 			break;
 		case 4: /* smsw */
-			if (c->modrm_mod != 3)
-				goto cannot_emulate;
-			*(u16 *)&c->regs[c->modrm_rm]
-				= realmode_get_cr(ctxt->vcpu, 0);
+			c->dst.bytes = 2;
+			c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
 			break;
 		case 6: /* lmsw */
-			if (c->modrm_mod != 3)
-				goto cannot_emulate;
-			realmode_lmsw(ctxt->vcpu, (u16)c->modrm_val,
-						  &ctxt->eflags);
+			realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
+				      &ctxt->eflags);
 			break;
 		case 7: /* invlpg*/
 			emulate_invlpg(ctxt->vcpu, memop);
+			/* Disable writeback. */
+			c->dst.type = OP_NONE;
 			break;
 		default:
 			goto cannot_emulate;
 		}
-		/* Disable writeback. */
-		c->dst.type = OP_NONE;
 		break;
 	case 0x06:
 		emulate_clts(ctxt->vcpu);
@@ -1823,7 +1860,7 @@ twobyte_insn:
 			goto cannot_emulate;
 		}
 		if (test_cc(c->b, ctxt->eflags))
-			JMP_REL(rel);
+			jmp_rel(c, rel);
 		c->dst.type = OP_NONE;
 		break;
 	}
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 25df1c1989fe..76f60f52a885 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -11,7 +11,7 @@ lib-y += memcpy_$(BITS).o
 ifeq ($(CONFIG_X86_32),y)
         lib-y += checksum_32.o
         lib-y += strstr_32.o
-        lib-y += bitops_32.o semaphore_32.o string_32.o
+        lib-y += semaphore_32.o string_32.o
 
         lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
 else
@@ -21,7 +21,6 @@ else
 
         lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
         lib-y += thunk_64.o clear_page_64.o copy_page_64.o
-        lib-y += bitops_64.o
         lib-y += memmove_64.o memset_64.o
         lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o
 endif
diff --git a/arch/x86/lib/bitops_32.c b/arch/x86/lib/bitops_32.c
deleted file mode 100644
index b65440459859..000000000000
--- a/arch/x86/lib/bitops_32.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <linux/bitops.h>
-#include <linux/module.h>
-
-/**
- * find_next_bit - find the next set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_bit(const unsigned long *addr, int size, int offset)
-{
-	const unsigned long *p = addr + (offset >> 5);
-	int set = 0, bit = offset & 31, res;
-
-	if (bit) {
-		/*
-		 * Look for nonzero in the first 32 bits:
-		 */
-		__asm__("bsfl %1,%0\n\t"
-			"jne 1f\n\t"
-			"movl $32, %0\n"
-			"1:"
-			: "=r" (set)
-			: "r" (*p >> bit));
-		if (set < (32 - bit))
-			return set + offset;
-		set = 32 - bit;
-		p++;
-	}
-	/*
-	 * No set bit yet, search remaining full words for a bit
-	 */
-	res = find_first_bit (p, size - 32 * (p - addr));
-	return (offset + set + res);
-}
-EXPORT_SYMBOL(find_next_bit);
-
-/**
- * find_next_zero_bit - find the first zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-int find_next_zero_bit(const unsigned long *addr, int size, int offset)
-{
-	const unsigned long *p = addr + (offset >> 5);
-	int set = 0, bit = offset & 31, res;
-
-	if (bit) {
-		/*
-		 * Look for zero in the first 32 bits.
-		 */
-		__asm__("bsfl %1,%0\n\t"
-			"jne 1f\n\t"
-			"movl $32, %0\n"
-			"1:"
-			: "=r" (set)
-			: "r" (~(*p >> bit)));
-		if (set < (32 - bit))
-			return set + offset;
-		set = 32 - bit;
-		p++;
-	}
-	/*
-	 * No zero yet, search remaining full bytes for a zero
-	 */
-	res = find_first_zero_bit(p, size - 32 * (p - addr));
-	return (offset + set + res);
-}
-EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86/lib/bitops_64.c b/arch/x86/lib/bitops_64.c
deleted file mode 100644
index 0e8f491e6ccc..000000000000
--- a/arch/x86/lib/bitops_64.c
+++ /dev/null
@@ -1,175 +0,0 @@
-#include <linux/bitops.h>
-
-#undef find_first_zero_bit
-#undef find_next_zero_bit
-#undef find_first_bit
-#undef find_next_bit
-
-static inline long
-__find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-	long d0, d1, d2;
-	long res;
-
-	/*
-	 * We must test the size in words, not in bits, because
-	 * otherwise incoming sizes in the range -63..-1 will not run
-	 * any scasq instructions, and then the flags used by the je
-	 * instruction will have whatever random value was in place
-	 * before.  Nobody should call us like that, but
-	 * find_next_zero_bit() does when offset and size are at the
-	 * same word and it fails to find a zero itself.
-	 */
-	size += 63;
-	size >>= 6;
-	if (!size)
-		return 0;
-	asm volatile(
-		"  repe; scasq\n"
-		"  je 1f\n"
-		"  xorq -8(%%rdi),%%rax\n"
-		"  subq $8,%%rdi\n"
-		"  bsfq %%rax,%%rdx\n"
-		"1:  subq %[addr],%%rdi\n"
-		"  shlq $3,%%rdi\n"
-		"  addq %%rdi,%%rdx"
-		:"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2)
-		:"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL),
-		 [addr] "S" (addr) : "memory");
-	/*
-	 * Any register would do for [addr] above, but GCC tends to
-	 * prefer rbx over rsi, even though rsi is readily available
-	 * and doesn't have to be saved.
-	 */
-	return res;
-}
-
-/**
- * find_first_zero_bit - find the first zero bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first zero bit, not the number of the byte
- * containing a bit.
- */
-long find_first_zero_bit(const unsigned long * addr, unsigned long size)
-{
-	return __find_first_zero_bit (addr, size);
-}
-
-/**
- * find_next_zero_bit - find the next zero bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_zero_bit (const unsigned long * addr, long size, long offset)
-{
-	const unsigned long * p = addr + (offset >> 6);
-	unsigned long set = 0;
-	unsigned long res, bit = offset&63;
-
-	if (bit) {
-		/*
-		 * Look for zero in first word
-		 */
-		asm("bsfq %1,%0\n\t"
-		    "cmoveq %2,%0"
-		    : "=r" (set)
-		    : "r" (~(*p >> bit)), "r"(64L));
-		if (set < (64 - bit))
-			return set + offset;
-		set = 64 - bit;
-		p++;
-	}
-	/*
-	 * No zero yet, search remaining full words for a zero
-	 */
-	res = __find_first_zero_bit (p, size - 64 * (p - addr));
-
-	return (offset + set + res);
-}
-
-static inline long
-__find_first_bit(const unsigned long * addr, unsigned long size)
-{
-	long d0, d1;
-	long res;
-
-	/*
-	 * We must test the size in words, not in bits, because
-	 * otherwise incoming sizes in the range -63..-1 will not run
-	 * any scasq instructions, and then the flags used by the jz
-	 * instruction will have whatever random value was in place
-	 * before.  Nobody should call us like that, but
-	 * find_next_bit() does when offset and size are at the same
-	 * word and it fails to find a one itself.
-	 */
-	size += 63;
-	size >>= 6;
-	if (!size)
-		return 0;
-	asm volatile(
-		"   repe; scasq\n"
-		"   jz 1f\n"
-		"   subq $8,%%rdi\n"
-		"   bsfq (%%rdi),%%rax\n"
-		"1: subq %[addr],%%rdi\n"
-		"   shlq $3,%%rdi\n"
-		"   addq %%rdi,%%rax"
-		:"=a" (res), "=&c" (d0), "=&D" (d1)
-		:"0" (0ULL), "1" (size), "2" (addr),
-		 [addr] "r" (addr) : "memory");
-	return res;
-}
-
-/**
- * find_first_bit - find the first set bit in a memory region
- * @addr: The address to start the search at
- * @size: The maximum size to search
- *
- * Returns the bit-number of the first set bit, not the number of the byte
- * containing a bit.
- */
-long find_first_bit(const unsigned long * addr, unsigned long size)
-{
-	return __find_first_bit(addr,size);
-}
-
-/**
- * find_next_bit - find the first set bit in a memory region
- * @addr: The address to base the search on
- * @offset: The bitnumber to start searching at
- * @size: The maximum size to search
- */
-long find_next_bit(const unsigned long * addr, long size, long offset)
-{
-	const unsigned long * p = addr + (offset >> 6);
-	unsigned long set = 0, bit = offset & 63, res;
-
-	if (bit) {
-		/*
-		 * Look for nonzero in the first 64 bits:
-		 */
-		asm("bsfq %1,%0\n\t"
-		    "cmoveq %2,%0\n\t"
-		    : "=r" (set)
-		    : "r" (*p >> bit), "r" (64L));
-		if (set < (64 - bit))
-			return set + offset;
-		set = 64 - bit;
-		p++;
-	}
-	/*
-	 * No set bit yet, search remaining full words for a bit
-	 */
-	res = __find_first_bit (p, size - 64 * (p - addr));
-	return (offset + set + res);
-}
-
-#include <linux/module.h>
-
-EXPORT_SYMBOL(find_next_bit);
-EXPORT_SYMBOL(find_first_bit);
-EXPORT_SYMBOL(find_first_zero_bit);
-EXPORT_SYMBOL(find_next_zero_bit);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index d05722121d24..8acbf0cdf1a5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -113,7 +113,7 @@ static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
 	for_each_online_cpu(cpu) {
 		if (cpuset & (1 << cpu)) {
 #ifdef VOYAGER_DEBUG
-			if (!cpu_isset(cpu, cpu_online_map))
+			if (!cpu_online(cpu))
 				VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
 					"cpu_online_map\n",
 					hard_smp_processor_id(), cpi, cpu));
@@ -543,8 +543,8 @@ static void __init do_boot_cpu(__u8 cpu)
 		hijack_source.idt.Offset, stack_start.sp));
 
 	/* init lowmem identity mapping */
-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 	flush_tlb_all();
 
 	if (quad_boot) {
@@ -683,9 +683,9 @@ void __init smp_boot_cpus(void)
 	 * Code added from smpboot.c */
 	{
 		unsigned long bogosum = 0;
-		for (i = 0; i < NR_CPUS; i++)
-			if (cpu_isset(i, cpu_online_map))
-				bogosum += cpu_data(i).loops_per_jiffy;
+
+		for_each_online_cpu(i)
+			bogosum += cpu_data(i).loops_per_jiffy;
 		printk(KERN_INFO "Total of %d processors activated "
 		       "(%lu.%02lu BogoMIPS).\n",
 		       cpucount + 1, bogosum / (500000 / HZ),
@@ -1838,7 +1838,7 @@ static int __cpuinit voyager_cpu_up(unsigned int cpu)
 		return -EIO;
 	/* Unleash the CPU! */
 	cpu_set(cpu, smp_commenced_mask);
-	while (!cpu_isset(cpu, cpu_online_map))
+	while (!cpu_online(cpu))
 		mb();
 	return 0;
 }
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 20941d2954e2..b7b3e4c7cfc9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
 obj-y	:=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o
+	    pat.o pgtable.o
 
 obj-$(CONFIG_X86_32)		+= pgtable_32.o
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd7..de236e419cb5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 
-		paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
 		BUG_ON(pmd_table != pmd_offset(pud, 0));
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 		}
 
-		paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
 		BUG_ON(page_table != pte_offset_kernel(pmd, 0));
 	}
@@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr)
 	return 0;
 }
 
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	if (pagenr <= 256)
+		return 1;
+	if (!page_is_ram(pagenr))
+		return 1;
+	return 0;
+}
+
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
@@ -268,47 +287,17 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 	pkmap_page_table = pte;
 }
 
-static void __meminit free_new_highpage(struct page *page)
-{
-	init_page_count(page);
-	__free_page(page);
-	totalhigh_pages++;
-}
-
 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
 	if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
 		ClearPageReserved(page);
-		free_new_highpage(page);
+		init_page_count(page);
+		__free_page(page);
+		totalhigh_pages++;
 	} else
 		SetPageReserved(page);
 }
 
-static int __meminit
-add_one_highpage_hotplug(struct page *page, unsigned long pfn)
-{
-	free_new_highpage(page);
-	totalram_pages++;
-#ifdef CONFIG_FLATMEM
-	max_mapnr = max(pfn, max_mapnr);
-#endif
-	num_physpages++;
-
-	return 0;
-}
-
-/*
- * Not currently handling the NUMA case.
- * Assuming single node and all memory that
- * has been added dynamically that would be
- * onlined here is in HIGHMEM.
- */
-void __meminit online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	add_one_highpage_hotplug(page, page_to_pfn(page));
-}
-
 #ifndef CONFIG_NUMA
 static void __init set_highmem_pages_init(int bad_ppro)
 {
@@ -365,7 +354,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
 
 		pte_clear(NULL, va, pte);
 	}
-	paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
+	paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
@@ -457,7 +446,7 @@ void zap_low_mappings(void)
 	 * Note that "pgd_clear()" doesn't do it for
 	 * us, because pgd_clear() is a no-op on i386.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+	for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
 #ifdef CONFIG_X86_PAE
 		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
@@ -547,9 +536,9 @@ void __init paging_init(void)
 
 /*
  * Test if the WP bit works in supervisor mode. It isn't supported on 386's
- * and also on some strange 486's (NexGen etc.). All 586+'s are OK. This
- * used to involve black magic jumps to work around some nasty CPU bugs,
- * but fortunately the switch to using exceptions got rid of all that.
+ * and also on some strange 486's. All 586+'s are OK. This used to involve
+ * black magic jumps to work around some nasty CPU bugs, but fortunately the
+ * switch to using exceptions got rid of all that.
  */
 static void __init test_wp_bit(void)
 {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ff7906a9a4d..32ba13b0f818 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -135,7 +135,7 @@ static __init void *spp_getpage(void)
 	return ptr;
 }
 
-static __init void
+static void
 set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 {
 	pgd_t *pgd;
@@ -173,7 +173,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
 
 	pte = pte_offset_kernel(pmd, vaddr);
-	if (!pte_none(*pte) &&
+	if (!pte_none(*pte) && pte_val(new_pte) &&
 	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
 		pte_ERROR(*pte);
 	set_pte(pte, new_pte);
@@ -214,8 +214,7 @@ void __init cleanup_highmap(void)
 }
 
 /* NOTE: this is meant to be run only at boot */
-void __init
-__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 	unsigned long address = __fix_to_virt(idx);
 
@@ -621,15 +620,6 @@ void __init paging_init(void)
 /*
  * Memory hotplug specific functions
  */
-void online_page(struct page *page)
-{
-	ClearPageReserved(page);
-	init_page_count(page);
-	__free_page(page);
-	totalram_pages++;
-	num_physpages++;
-}
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * Memory is added always to NORMAL zone. This means you will never get
@@ -664,6 +654,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	if (pagenr <= 256)
+		return 1;
+	if (!page_is_ram(pagenr))
+		return 1;
+	return 0;
+}
+
+
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
 			 kcore_modules, kcore_vsyscall;
 
@@ -791,7 +801,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 {
 #ifdef CONFIG_NUMA
-	int nid = phys_to_nid(phys);
+	int nid, next_nid;
 #endif
 	unsigned long pfn = phys >> PAGE_SHIFT;
 
@@ -810,10 +820,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
 
 	/* Should check here against the e820 map to avoid double free */
 #ifdef CONFIG_NUMA
-	reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+	nid = phys_to_nid(phys);
+	next_nid = phys_to_nid(phys + len - 1);
+	if (nid == next_nid)
+		reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
+	else
+		reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #else
 	reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
 #endif
+
 	if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
 		dma_reserve += len / PAGE_SIZE;
 		set_dma_reserve(dma_reserve);
@@ -907,6 +923,10 @@ const char *arch_vma_name(struct vm_area_struct *vma)
 /*
  * Initialise the sparsemem vmemmap using huge-pages at the PMD level.
  */
+static long __meminitdata addr_start, addr_end;
+static void __meminitdata *p_start, *p_end;
+static int __meminitdata node_start;
+
 int __meminit
 vmemmap_populate(struct page *start_page, unsigned long size, int node)
 {
@@ -941,12 +961,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
 							PAGE_KERNEL_LARGE);
 			set_pmd(pmd, __pmd(pte_val(entry)));
 
-			printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n",
-				addr, addr + PMD_SIZE - 1, p, node);
+			/* check to see if we have contiguous blocks */
+			if (p_end != p || node_start != node) {
+				if (p_start)
+					printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+						addr_start, addr_end-1, p_start, p_end-1, node_start);
+				addr_start = addr;
+				node_start = node;
+				p_start = p;
+			}
+			addr_end = addr + PMD_SIZE;
+			p_end = p + PMD_SIZE;
 		} else {
 			vmemmap_verify((pte_t *)pmd, node, addr, next);
 		}
 	}
 	return 0;
 }
+
+void __meminit vmemmap_populate_print_last(void)
+{
+	if (p_start) {
+		printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+			addr_start, addr_end-1, p_start, p_end-1, node_start);
+		p_start = NULL;
+		p_end = NULL;
+		node_start = 0;
+	}
+}
 #endif
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24d..804de18abcc2 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -117,8 +117,8 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size,
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
-			       unsigned long prot_val)
+static void __iomem *__ioremap_caller(resource_size_t phys_addr,
+		unsigned long size, unsigned long prot_val, void *caller)
 {
 	unsigned long pfn, offset, vaddr;
 	resource_size_t last_addr;
@@ -212,7 +212,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
 	/*
 	 * Ok, go for it..
 	 */
-	area = get_vm_area(size, VM_IOREMAP);
+	area = get_vm_area_caller(size, VM_IOREMAP, caller);
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
@@ -255,7 +255,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size,
  */
 void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, _PAGE_CACHE_UC);
+	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_UC,
+				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_nocache);
 
@@ -272,7 +273,8 @@ EXPORT_SYMBOL(ioremap_nocache);
 void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
 {
 	if (pat_wc_enabled)
-		return __ioremap(phys_addr, size, _PAGE_CACHE_WC);
+		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+					__builtin_return_address(0));
 	else
 		return ioremap_nocache(phys_addr, size);
 }
@@ -280,7 +282,8 @@ EXPORT_SYMBOL(ioremap_wc);
 
 void __iomem *ioremap_cache(resource_size_t phys_addr, unsigned long size)
 {
-	return __ioremap(phys_addr, size, _PAGE_CACHE_WB);
+	return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WB,
+				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(ioremap_cache);
 
@@ -336,6 +339,35 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+void *xlate_dev_mem_ptr(unsigned long phys)
+{
+	void *addr;
+	unsigned long start = phys & PAGE_MASK;
+
+	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
+	if (page_is_ram(start >> PAGE_SHIFT))
+		return __va(phys);
+
+	addr = (void *)ioremap(start, PAGE_SIZE);
+	if (addr)
+		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+
+	return addr;
+}
+
+void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
+{
+	if (page_is_ram(phys >> PAGE_SHIFT))
+		return;
+
+	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
+	return;
+}
+
 #ifdef CONFIG_X86_32
 
 int __initdata early_ioremap_debug;
@@ -407,7 +439,7 @@ void __init early_ioremap_clear(void)
 
 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	pmd_clear(pmd);
-	paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
+	paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
 	__flush_tlb_all();
 }
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 9a6892200b27..c5066d519e5d 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	unsigned long bootmap_start, nodedata_phys;
 	void *bootmap;
 	const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE);
+	int nid;
 
 	start = round_up(start, ZONE_ALIGN);
 
@@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 	NODE_DATA(nodeid)->node_start_pfn = start_pfn;
 	NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
 
-	/* Find a place for the bootmem map */
+	/*
+	 * Find a place for the bootmem map
+	 * nodedata_phys could be on other nodes by alloc_bootmem,
+	 * so need to sure bootmap_start not to be small, otherwise
+	 * early_node_mem will get that with find_e820_area instead
+	 * of alloc_bootmem, that could clash with reserved range
+	 */
 	bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn);
-	bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+	nid = phys_to_nid(nodedata_phys);
+	if (nid == nodeid)
+		bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE);
+	else
+		bootmap_start = round_up(start, PAGE_SIZE);
 	/*
 	 * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
 	 * to use that to align to PAGE_SIZE
@@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
 
 	free_bootmem_with_active_regions(nodeid, end);
 
-	reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
-			BOOTMEM_DEFAULT);
-	reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
-			bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+	/*
+	 * convert early reserve to bootmem reserve earlier
+	 * otherwise early_node_mem could use early reserved mem
+	 * on previous node
+	 */
+	early_res_to_bootmem(start, end);
+
+	/*
+	 * in some case early_node_mem could use alloc_bootmem
+	 * to get range on other node, don't reserve that again
+	 */
+	if (nid != nodeid)
+		printk(KERN_INFO "    NODE_DATA(%d) on node %d\n", nodeid, nid);
+	else
+		reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
+					pgdat_size, BOOTMEM_DEFAULT);
+	nid = phys_to_nid(bootmap_start);
+	if (nid != nodeid)
+		printk(KERN_INFO "    bootmap(%d) on node %d\n", nodeid, nid);
+	else
+		reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
+				 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
+
 #ifdef CONFIG_ACPI_NUMA
 	srat_reserve_add_area(nodeid);
 #endif
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c29ebd037254..bd5e05c654dc 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		goto out_unlock;
 
 	pbase = (pte_t *)page_address(base);
-#ifdef CONFIG_X86_32
-	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-#endif
+	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 72c0f6097402..277446cd30b6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,16 +11,19 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/fs.h>
+#include <linux/bootmem.h>
 
 #include <asm/msr.h>
 #include <asm/tlbflush.h>
 #include <asm/processor.h>
+#include <asm/page.h>
 #include <asm/pgtable.h>
 #include <asm/pat.h>
 #include <asm/e820.h>
 #include <asm/cacheflush.h>
 #include <asm/fcntl.h>
 #include <asm/mtrr.h>
+#include <asm/io.h>
 
 int pat_wc_enabled = 1;
 
@@ -190,6 +193,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
 	return 0;
 }
 
+/*
+ * req_type typically has one of the:
+ * - _PAGE_CACHE_WB
+ * - _PAGE_CACHE_WC
+ * - _PAGE_CACHE_UC_MINUS
+ * - _PAGE_CACHE_UC
+ *
+ * req_type will have a special case value '-1', when requester want to inherit
+ * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
+ *
+ * If ret_type is NULL, function will return an error if it cannot reserve the
+ * region with req_type. If ret_type is non-null, function will return
+ * available type in ret_type in case of no error. In case of any error
+ * it will return a negative return value.
+ */
 int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 			unsigned long *ret_type)
 {
@@ -200,9 +218,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 
 	/* Only track when pat_wc_enabled */
 	if (!pat_wc_enabled) {
-		if (ret_type)
-			*ret_type = req_type;
-
+		/* This is identical to page table setting without PAT */
+		if (ret_type) {
+			if (req_type == -1) {
+				*ret_type = _PAGE_CACHE_WB;
+			} else {
+				*ret_type = req_type;
+			}
+		}
 		return 0;
 	}
 
@@ -214,8 +237,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		return 0;
 	}
 
-	req_type &= _PAGE_CACHE_MASK;
-	err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+	if (req_type == -1) {
+		/*
+		 * Special case where caller wants to inherit from mtrr or
+		 * existing pat mapping, defaulting to UC_MINUS in case of
+		 * no match.
+		 */
+		u8 mtrr_type = mtrr_type_lookup(start, end);
+		if (mtrr_type == 0xFE) { /* MTRR match error */
+			err = -1;
+		}
+
+		if (mtrr_type == MTRR_TYPE_WRBACK) {
+			req_type = _PAGE_CACHE_WB;
+			actual_type = _PAGE_CACHE_WB;
+		} else {
+			req_type = _PAGE_CACHE_UC_MINUS;
+			actual_type = _PAGE_CACHE_UC_MINUS;
+		}
+	} else {
+		req_type &= _PAGE_CACHE_MASK;
+		err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+	}
+
 	if (err) {
 		if (ret_type)
 			*ret_type = actual_type;
@@ -241,7 +285,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		struct memtype *saved_ptr;
 
 		if (parse->start >= end) {
-			printk("New Entry\n");
+			pr_debug("New Entry\n");
 			list_add(&new_entry->nd, parse->nd.prev);
 			new_entry = NULL;
 			break;
@@ -291,7 +335,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 				break;
 			}
 
-			printk("Overlap at 0x%Lx-0x%Lx\n",
+			pr_debug("Overlap at 0x%Lx-0x%Lx\n",
 			       saved_ptr->start, saved_ptr->end);
 			/* No conflict. Go ahead and add this new entry */
 			list_add(&new_entry->nd, saved_ptr->nd.prev);
@@ -343,8 +387,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 				break;
 			}
 
-			printk("Overlap at 0x%Lx-0x%Lx\n",
-			       saved_ptr->start, saved_ptr->end);
+			pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
+				 saved_ptr->start, saved_ptr->end);
 			/* No conflict. Go ahead and add this new entry */
 			list_add(&new_entry->nd, &saved_ptr->nd);
 			new_entry = NULL;
@@ -353,7 +397,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	}
 
 	if (err) {
-		printk(
+		printk(KERN_INFO
 	"reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
 			start, end, cattr_name(new_entry->type),
 			cattr_name(req_type));
@@ -365,16 +409,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	if (new_entry) {
 		/* No conflict. Not yet added to the list. Add to the tail */
 		list_add_tail(&new_entry->nd, &memtype_list);
-		printk("New Entry\n");
-  	}
+		pr_debug("New Entry\n");
+	}
 
 	if (ret_type) {
-		printk(
+		pr_debug(
 	"reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
 			start, end, cattr_name(actual_type),
 			cattr_name(req_type), cattr_name(*ret_type));
 	} else {
-		printk(
+		pr_debug(
 	"reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
 			start, end, cattr_name(actual_type),
 			cattr_name(req_type));
@@ -411,11 +455,142 @@ int free_memtype(u64 start, u64 end)
 	spin_unlock(&memtype_lock);
 
 	if (err) {
-		printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n",
+		printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
 			current->comm, current->pid, start, end);
 	}
 
-	printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end);
+	pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
 	return err;
 }
 
+
+/*
+ * /dev/mem mmap interface. The memtype used for mapping varies:
+ * - Use UC for mappings with O_SYNC flag
+ * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
+ *   inherit the memtype from existing mapping.
+ * - Else use UC_MINUS memtype (for backward compatibility with existing
+ *   X drivers.
+ */
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				unsigned long size, pgprot_t vma_prot)
+{
+	return vma_prot;
+}
+
+#ifdef CONFIG_NONPROMISC_DEVMEM
+/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+	return 1;
+}
+#else
+static inline int range_is_allowed(unsigned long pfn, unsigned long size)
+{
+	u64 from = ((u64)pfn) << PAGE_SHIFT;
+	u64 to = from + size;
+	u64 cursor = from;
+
+	while (cursor < to) {
+		if (!devmem_is_allowed(pfn)) {
+			printk(KERN_INFO
+		"Program %s tried to access /dev/mem between %Lx->%Lx.\n",
+				current->comm, from, to);
+			return 0;
+		}
+		cursor += PAGE_SIZE;
+		pfn++;
+	}
+	return 1;
+}
+#endif /* CONFIG_NONPROMISC_DEVMEM */
+
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+				unsigned long size, pgprot_t *vma_prot)
+{
+	u64 offset = ((u64) pfn) << PAGE_SHIFT;
+	unsigned long flags = _PAGE_CACHE_UC_MINUS;
+	int retval;
+
+	if (!range_is_allowed(pfn, size))
+		return 0;
+
+	if (file->f_flags & O_SYNC) {
+		flags = _PAGE_CACHE_UC;
+	}
+
+#ifdef CONFIG_X86_32
+	/*
+	 * On the PPro and successors, the MTRRs are used to set
+	 * memory types for physical addresses outside main memory,
+	 * so blindly setting UC or PWT on those pages is wrong.
+	 * For Pentiums and earlier, the surround logic should disable
+	 * caching for the high addresses through the KEN pin, but
+	 * we maintain the tradition of paranoia in this code.
+	 */
+	if (!pat_wc_enabled &&
+	    ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
+		test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
+		test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
+		test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
+	   (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+		flags = _PAGE_CACHE_UC;
+	}
+#endif
+
+	/*
+	 * With O_SYNC, we can only take UC mapping. Fail if we cannot.
+	 * Without O_SYNC, we want to get
+	 * - WB for WB-able memory and no other conflicting mappings
+	 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
+	 * - Inherit from confliting mappings otherwise
+	 */
+	if (flags != _PAGE_CACHE_UC_MINUS) {
+		retval = reserve_memtype(offset, offset + size, flags, NULL);
+	} else {
+		retval = reserve_memtype(offset, offset + size, -1, &flags);
+	}
+
+	if (retval < 0)
+		return 0;
+
+	if (pfn <= max_pfn_mapped &&
+            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
+		free_memtype(offset, offset + size);
+		printk(KERN_INFO
+		"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
+			current->comm, current->pid,
+			cattr_name(flags),
+			offset, offset + size);
+		return 0;
+	}
+
+	*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
+			     flags);
+	return 1;
+}
+
+void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
+{
+	u64 addr = (u64)pfn << PAGE_SHIFT;
+	unsigned long flags;
+	unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+
+	reserve_memtype(addr, addr + size, want_flags, &flags);
+	if (flags != want_flags) {
+		printk(KERN_INFO
+		"%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
+			current->comm, current->pid,
+			cattr_name(want_flags),
+			addr, addr + size,
+			cattr_name(flags));
+	}
+}
+
+void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
+{
+	u64 addr = (u64)pfn << PAGE_SHIFT;
+
+	free_memtype(addr, addr + size);
+}
+
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
new file mode 100644
index 000000000000..50159764f694
--- /dev/null
+++ b/arch/x86/mm/pgtable.c
@@ -0,0 +1,276 @@
+#include <linux/mm.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *pte;
+
+#ifdef CONFIG_HIGHPTE
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+#else
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+#endif
+	if (pte)
+		pgtable_page_ctor(pte);
+	return pte;
+}
+
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+	pgtable_page_dtor(pte);
+	paravirt_release_pte(page_to_pfn(pte));
+	tlb_remove_page(tlb, pte);
+}
+
+#if PAGETABLE_LEVELS > 2
+void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
+	tlb_remove_page(tlb, virt_to_page(pmd));
+}
+
+#if PAGETABLE_LEVELS > 3
+void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+{
+	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
+	tlb_remove_page(tlb, virt_to_page(pud));
+}
+#endif	/* PAGETABLE_LEVELS > 3 */
+#endif	/* PAGETABLE_LEVELS > 2 */
+
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	list_add(&page->lru, &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	list_del(&page->lru);
+}
+
+#define UNSHARED_PTRS_PER_PGD				\
+	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
+
+static void pgd_ctor(void *p)
+{
+	pgd_t *pgd = p;
+	unsigned long flags;
+
+	/* Clear usermode parts of PGD */
+	memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
+
+	spin_lock_irqsave(&pgd_lock, flags);
+
+	/* If the pgd points to a shared pagetable level (either the
+	   ptes in non-PAE, or shared PMD in PAE), then just copy the
+	   references from swapper_pg_dir. */
+	if (PAGETABLE_LEVELS == 2 ||
+	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
+	    PAGETABLE_LEVELS == 4) {
+		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
+				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+				KERNEL_PGD_PTRS);
+		paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
+					 __pa(swapper_pg_dir) >> PAGE_SHIFT,
+					 KERNEL_PGD_BOUNDARY,
+					 KERNEL_PGD_PTRS);
+	}
+
+	/* list required to sync kernel mapping updates */
+	if (!SHARED_KERNEL_PMD)
+		pgd_list_add(pgd);
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void pgd_dtor(void *pgd)
+{
+	unsigned long flags; /* can be called from interrupt context */
+
+	if (SHARED_KERNEL_PMD)
+		return;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * -- wli
+ */
+
+#ifdef CONFIG_X86_PAE
+/*
+ * Mop up any pmd pages which may still be attached to the pgd.
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
+ * preallocate which never got a corresponding vma will need to be
+ * freed manually.
+ */
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+{
+	int i;
+
+	for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+		pgd_t pgd = pgdp[i];
+
+		if (pgd_val(pgd) != 0) {
+			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+
+			pgdp[i] = native_make_pgd(0);
+
+			paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
+			pmd_free(mm, pmd);
+		}
+	}
+}
+
+/*
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+ * updating the top-level pagetable entries to guarantee the
+ * processor notices the update.  Since this is expensive, and
+ * all 4 top-level entries are used almost immediately in a
+ * new process's life, we just pre-populate them here.
+ *
+ * Also, if we're in a paravirt environment where the kernel pmd is
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+ * and initialize the kernel pmds here.
+ */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+	pud_t *pud;
+	unsigned long addr;
+	int i;
+
+	pud = pud_offset(pgd, 0);
+ 	for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+	     i++, pud++, addr += PUD_SIZE) {
+		pmd_t *pmd = pmd_alloc_one(mm, addr);
+
+		if (!pmd) {
+			pgd_mop_up_pmds(mm, pgd);
+			return 0;
+		}
+
+		if (i >= KERNEL_PGD_BOUNDARY)
+			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+			       sizeof(pmd_t) * PTRS_PER_PMD);
+
+		pud_populate(mm, pud, pmd);
+	}
+
+	return 1;
+}
+
+void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+{
+	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
+
+	/* Note: almost everything apart from _PAGE_PRESENT is
+	   reserved at the pmd (PDPT) level. */
+	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+
+	/*
+	 * According to Intel App note "TLBs, Paging-Structure Caches,
+	 * and Their Invalidation", April 2007, document 317080-001,
+	 * section 8.1: in PAE mode we explicitly have to flush the
+	 * TLB via cr3 if the top-level pgd is changed...
+	 */
+	if (mm == current->active_mm)
+		write_cr3(read_cr3());
+}
+#else  /* !CONFIG_X86_PAE */
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+	return 1;
+}
+
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
+{
+}
+#endif	/* CONFIG_X86_PAE */
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+
+	/* so that alloc_pmd can use it */
+	mm->pgd = pgd;
+	if (pgd)
+		pgd_ctor(pgd);
+
+	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+		pgd_dtor(pgd);
+		free_page((unsigned long)pgd);
+		pgd = NULL;
+	}
+
+	return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	pgd_mop_up_pmds(mm, pgd);
+	pgd_dtor(pgd);
+	free_page((unsigned long)pgd);
+}
+
+int ptep_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pte_t *ptep,
+			  pte_t entry, int dirty)
+{
+	int changed = !pte_same(*ptep, entry);
+
+	if (changed && dirty) {
+		*ptep = entry;
+		pte_update_defer(vma->vm_mm, address, ptep);
+		flush_tlb_page(vma, address);
+	}
+
+	return changed;
+}
+
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long addr, pte_t *ptep)
+{
+	int ret = 0;
+
+	if (pte_young(*ptep))
+		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+					 &ptep->pte);
+
+	if (ret)
+		pte_update(vma->vm_mm, addr, ptep);
+
+	return ret;
+}
+
+int ptep_clear_flush_young(struct vm_area_struct *vma,
+			   unsigned long address, pte_t *ptep)
+{
+	int young;
+
+	young = ptep_test_and_clear_young(vma, address, ptep);
+	if (young)
+		flush_tlb_page(vma, address);
+
+	return young;
+}
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6fb9e7c6893f..9ee007be9142 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,210 +173,6 @@ void reserve_top_address(unsigned long reserve)
 	__VMALLOC_RESERVE += reserve;
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
-	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	struct page *pte;
-
-#ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-#else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-#endif
-	if (pte)
-		pgtable_page_ctor(pte);
-	return pte;
-}
-
-/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
- * against pageattr.c; it is the unique case in which a valid change
- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
- * vmalloc faults work because attached pagetables are never freed.
- * -- wli
- */
-static inline void pgd_list_add(pgd_t *pgd)
-{
-	struct page *page = virt_to_page(pgd);
-
-	list_add(&page->lru, &pgd_list);
-}
-
-static inline void pgd_list_del(pgd_t *pgd)
-{
-	struct page *page = virt_to_page(pgd);
-
-	list_del(&page->lru);
-}
-
-#define UNSHARED_PTRS_PER_PGD				\
-	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
-static void pgd_ctor(void *p)
-{
-	pgd_t *pgd = p;
-	unsigned long flags;
-
-	/* Clear usermode parts of PGD */
-	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-
-	spin_lock_irqsave(&pgd_lock, flags);
-
-	/* If the pgd points to a shared pagetable level (either the
-	   ptes in non-PAE, or shared PMD in PAE), then just copy the
-	   references from swapper_pg_dir. */
-	if (PAGETABLE_LEVELS == 2 ||
-	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
-		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
-				swapper_pg_dir + USER_PTRS_PER_PGD,
-				KERNEL_PGD_PTRS);
-		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-					__pa(swapper_pg_dir) >> PAGE_SHIFT,
-					USER_PTRS_PER_PGD,
-					KERNEL_PGD_PTRS);
-	}
-
-	/* list required to sync kernel mapping updates */
-	if (!SHARED_KERNEL_PMD)
-		pgd_list_add(pgd);
-
-	spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-static void pgd_dtor(void *pgd)
-{
-	unsigned long flags; /* can be called from interrupt context */
-
-	if (SHARED_KERNEL_PMD)
-		return;
-
-	spin_lock_irqsave(&pgd_lock, flags);
-	pgd_list_del(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-#ifdef CONFIG_X86_PAE
-/*
- * Mop up any pmd pages which may still be attached to the pgd.
- * Normally they will be freed by munmap/exit_mmap, but any pmd we
- * preallocate which never got a corresponding vma will need to be
- * freed manually.
- */
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
-{
-	int i;
-
-	for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
-		pgd_t pgd = pgdp[i];
-
-		if (pgd_val(pgd) != 0) {
-			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
-
-			pgdp[i] = native_make_pgd(0);
-
-			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
-			pmd_free(mm, pmd);
-		}
-	}
-}
-
-/*
- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
- * updating the top-level pagetable entries to guarantee the
- * processor notices the update.  Since this is expensive, and
- * all 4 top-level entries are used almost immediately in a
- * new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
- */
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-{
-	pud_t *pud;
-	unsigned long addr;
-	int i;
-
-	pud = pud_offset(pgd, 0);
- 	for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
-	     i++, pud++, addr += PUD_SIZE) {
-		pmd_t *pmd = pmd_alloc_one(mm, addr);
-
-		if (!pmd) {
-			pgd_mop_up_pmds(mm, pgd);
-			return 0;
-		}
-
-		if (i >= USER_PTRS_PER_PGD)
-			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
-			       sizeof(pmd_t) * PTRS_PER_PMD);
-
-		pud_populate(mm, pud, pmd);
-	}
-
-	return 1;
-}
-#else  /* !CONFIG_X86_PAE */
-/* No need to prepopulate any pagetable entries in non-PAE modes. */
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-{
-	return 1;
-}
-
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
-{
-}
-#endif	/* CONFIG_X86_PAE */
-
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-
-	/* so that alloc_pd can use it */
-	mm->pgd = pgd;
-	if (pgd)
-		pgd_ctor(pgd);
-
-	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
-		pgd_dtor(pgd);
-		free_page((unsigned long)pgd);
-		pgd = NULL;
-	}
-
-	return pgd;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	pgd_mop_up_pmds(mm, pgd);
-	pgd_dtor(pgd);
-	free_page((unsigned long)pgd);
-}
-
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
-{
-	pgtable_page_dtor(pte);
-	paravirt_release_pt(page_to_pfn(pte));
-	tlb_remove_page(tlb, pte);
-}
-
-#ifdef CONFIG_X86_PAE
-
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
-{
-	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-	tlb_remove_page(tlb, virt_to_page(pmd));
-}
-
-#endif
-
 int pmd_bad(pmd_t pmd)
 {
 	WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 4b1620a1529e..1d3aa6b87181 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -1,2 +1,10 @@
-	.section ".vdso","a"
+#include <linux/init.h>
+
+__INITDATA
+
+	.globl vdso_start, vdso_end
+vdso_start:
 	.incbin "arch/x86/vdso/vdso.so"
+vdso_end:
+
+__FINIT
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 4d5f2649bee4..2e641be2737e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
 	bool "Xen guest support"
 	select PARAVIRT
 	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 343df246bd3e..3d8df981d5fd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y		:= enlighten.o setup.o features.o multicalls.o mmu.o \
-			events.o time.o manage.o xen-asm.o
+obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
+			time.o manage.o xen-asm.o grant-table.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0388220cf97..c8a56e457d61 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -155,7 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	if (*ax == 1)
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-			    (1 << X86_FEATURE_SEP)  |  /* disable SEP */
+			    (1 << X86_FEATURE_MCE)  |  /* disable MCE */
+			    (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
@@ -531,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val)
 static void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
-	struct multicall_space mcs = xen_mc_entry(sizeof(*op));
+	struct multicall_space mcs;
+
+	preempt_disable();
+
+	mcs = xen_mc_entry(sizeof(*op));
 
 	op = mcs.args;
 	op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
 }
 
 static void xen_flush_tlb_single(unsigned long addr)
 {
 	struct mmuext_op *op;
-	struct multicall_space mcs = xen_mc_entry(sizeof(*op));
+	struct multicall_space mcs;
+
+	preempt_disable();
 
+	mcs = xen_mc_entry(sizeof(*op));
 	op = mcs.args;
 	op->cmd = MMUEXT_INVLPG_LOCAL;
 	op->arg1.linear_addr = addr & PAGE_MASK;
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
 }
 
 static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
@@ -655,15 +667,17 @@ static void xen_write_cr3(unsigned long cr3)
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
+static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
 {
+#ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
+#endif
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
-/* Early release_pt assumes that all pts are pinned, since there's
+/* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pt_init(u32 pfn)
+static void xen_release_pte_init(u32 pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -697,12 +711,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
 	}
 }
 
-static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PTE);
 }
 
-static void xen_alloc_pd(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PMD);
 }
@@ -722,12 +736,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
 	}
 }
 
-static void xen_release_pt(u32 pfn)
+static void xen_release_pte(u32 pfn)
 {
 	xen_release_ptpage(pfn, PT_PTE);
 }
 
-static void xen_release_pd(u32 pfn)
+static void xen_release_pmd(u32 pfn)
 {
 	xen_release_ptpage(pfn, PT_PMD);
 }
@@ -849,10 +863,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 {
 	/* This will work as long as patching hasn't happened yet
 	   (which it hasn't) */
-	pv_mmu_ops.alloc_pt = xen_alloc_pt;
-	pv_mmu_ops.alloc_pd = xen_alloc_pd;
-	pv_mmu_ops.release_pt = xen_release_pt;
-	pv_mmu_ops.release_pd = xen_release_pd;
+	pv_mmu_ops.alloc_pte = xen_alloc_pte;
+	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+	pv_mmu_ops.release_pte = xen_release_pte;
+	pv_mmu_ops.release_pmd = xen_release_pmd;
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	setup_shared_info();
@@ -994,7 +1008,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.read_pmc = native_read_pmc,
 
 	.iret = xen_iret,
-	.irq_enable_syscall_ret = NULL,  /* never called */
+	.irq_enable_syscall_ret = xen_sysexit,
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
@@ -1059,11 +1073,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pte_update = paravirt_nop,
 	.pte_update_defer = paravirt_nop,
 
-	.alloc_pt = xen_alloc_pt_init,
-	.release_pt = xen_release_pt_init,
-	.alloc_pd = xen_alloc_pt_init,
-	.alloc_pd_clone = paravirt_nop,
-	.release_pd = xen_release_pt_init,
+	.alloc_pte = xen_alloc_pte_init,
+	.release_pte = xen_release_pte_init,
+	.alloc_pmd = xen_alloc_pte_init,
+	.alloc_pmd_clone = paravirt_nop,
+	.release_pmd = xen_release_pte_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
deleted file mode 100644
index dcf613e17581..000000000000
--- a/arch/x86/xen/events.c
+++ /dev/null
@@ -1,591 +0,0 @@
-/*
- * Xen event channels
- *
- * Xen models interrupts with abstract event channels.  Because each
- * domain gets 1024 event channels, but NR_IRQ is not that large, we
- * must dynamically map irqs<->event channels.  The event channels
- * interface with the rest of the kernel by defining a xen interrupt
- * chip.  When an event is recieved, it is mapped to an irq and sent
- * through the normal interrupt processing path.
- *
- * There are four kinds of events which can be mapped to an event
- * channel:
- *
- * 1. Inter-domain notifications.  This includes all the virtual
- *    device events, since they're driven by front-ends in another domain
- *    (typically dom0).
- * 2. VIRQs, typically used for timers.  These are per-cpu events.
- * 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/linkage.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <asm/ptrace.h>
-#include <asm/irq.h>
-#include <asm/sync_bitops.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/events.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/event_channel.h>
-
-#include "xen-ops.h"
-
-/*
- * This lock protects updates to the following mapping and reference-count
- * arrays. The lock does not need to be acquired to read the mapping tables.
- */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
-
-/* IRQ <-> VIRQ mapping. */
-static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
-
-/* IRQ <-> IPI mapping */
-static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
-
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
-struct packed_irq
-{
-	unsigned short evtchn;
-	unsigned char index;
-	unsigned char type;
-};
-
-static struct packed_irq irq_info[NR_IRQS];
-
-/* Binding types. */
-enum {
-	IRQT_UNBOUND,
-	IRQT_PIRQ,
-	IRQT_VIRQ,
-	IRQT_IPI,
-	IRQT_EVTCHN
-};
-
-/* Convenient shorthand for packed representation of an unbound IRQ. */
-#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
-
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
-	[0 ... NR_EVENT_CHANNELS-1] = -1
-};
-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
-static u8 cpu_evtchn[NR_EVENT_CHANNELS];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-/* Xen will never allocate port zero for any purpose. */
-#define VALID_EVTCHN(chn)	((chn) != 0)
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
-	(void)HYPERVISOR_xen_version(0, NULL);
-}
-EXPORT_SYMBOL_GPL(force_evtchn_callback);
-
-static struct irq_chip xen_dynamic_chip;
-
-/* Constructor for packed IRQ information. */
-static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
-{
-	return (struct packed_irq) { evtchn, index, type };
-}
-
-/*
- * Accessors for packed IRQ information.
- */
-static inline unsigned int evtchn_from_irq(int irq)
-{
-	return irq_info[irq].evtchn;
-}
-
-static inline unsigned int index_from_irq(int irq)
-{
-	return irq_info[irq].index;
-}
-
-static inline unsigned int type_from_irq(int irq)
-{
-	return irq_info[irq].type;
-}
-
-static inline unsigned long active_evtchns(unsigned int cpu,
-					   struct shared_info *sh,
-					   unsigned int idx)
-{
-	return (sh->evtchn_pending[idx] &
-		cpu_evtchn_mask[cpu][idx] &
-		~sh->evtchn_mask[idx]);
-}
-
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
-{
-	int irq = evtchn_to_irq[chn];
-
-	BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-#endif
-
-	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
-	__set_bit(chn, cpu_evtchn_mask[cpu]);
-
-	cpu_evtchn[chn] = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-#ifdef CONFIG_SMP
-	int i;
-	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].affinity = cpumask_of_cpu(0);
-#endif
-
-	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
-	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
-}
-
-static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
-{
-	return cpu_evtchn[evtchn];
-}
-
-static inline void clear_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_clear_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline void set_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, &s->evtchn_pending[0]);
-}
-
-
-/**
- * notify_remote_via_irq - send event to remote end of event channel via irq
- * @irq: irq of event channel to send event to
- *
- * Unlike notify_remote_via_evtchn(), this is safe to use across
- * save/restore. Notifications on a broken connection are silently
- * dropped.
- */
-void notify_remote_via_irq(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		notify_remote_via_evtchn(evtchn);
-}
-EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-
-static void mask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, &s->evtchn_mask[0]);
-}
-
-static void unmask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	unsigned int cpu = get_cpu();
-
-	BUG_ON(!irqs_disabled());
-
-	/* Slow path (hypercall) if this is a non-local port. */
-	if (unlikely(cpu != cpu_from_evtchn(port))) {
-		struct evtchn_unmask unmask = { .port = port };
-		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-	} else {
-		struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-
-		sync_clear_bit(port, &s->evtchn_mask[0]);
-
-		/*
-		 * The following is basically the equivalent of
-		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-		 * the interrupt edge' if the channel is masked.
-		 */
-		if (sync_test_bit(port, &s->evtchn_pending[0]) &&
-		    !sync_test_and_set_bit(port / BITS_PER_LONG,
-					   &vcpu_info->evtchn_pending_sel))
-			vcpu_info->evtchn_upcall_pending = 1;
-	}
-
-	put_cpu();
-}
-
-static int find_unbound_irq(void)
-{
-	int irq;
-
-	/* Only allocate from dynirq range */
-	for (irq = 0; irq < NR_IRQS; irq++)
-		if (irq_bindcount[irq] == 0)
-			break;
-
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
-	return irq;
-}
-
-int bind_evtchn_to_irq(unsigned int evtchn)
-{
-	int irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = evtchn_to_irq[evtchn];
-
-	if (irq == -1) {
-		irq = find_unbound_irq();
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "event");
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
-	}
-
-	irq_bindcount[irq]++;
-
-	spin_unlock(&irq_mapping_update_lock);
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
-
-static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
-{
-	struct evtchn_bind_ipi bind_ipi;
-	int evtchn, irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = per_cpu(ipi_to_irq, cpu)[ipi];
-	if (irq == -1) {
-		irq = find_unbound_irq();
-		if (irq < 0)
-			goto out;
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "ipi");
-
-		bind_ipi.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
-						&bind_ipi) != 0)
-			BUG();
-		evtchn = bind_ipi.port;
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
-		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-
- out:
-	spin_unlock(&irq_mapping_update_lock);
-	return irq;
-}
-
-
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
-{
-	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = per_cpu(virq_to_irq, cpu)[virq];
-
-	if (irq == -1) {
-		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
-
-		irq = find_unbound_irq();
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "virq");
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-
-		per_cpu(virq_to_irq, cpu)[virq] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-
-	spin_unlock(&irq_mapping_update_lock);
-
-	return irq;
-}
-
-static void unbind_from_irq(unsigned int irq)
-{
-	struct evtchn_close close;
-	int evtchn = evtchn_from_irq(irq);
-
-	spin_lock(&irq_mapping_update_lock);
-
-	if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
-		close.port = evtchn;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-			BUG();
-
-		switch (type_from_irq(irq)) {
-		case IRQT_VIRQ:
-			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
-				[index_from_irq(irq)] = -1;
-			break;
-		default:
-			break;
-		}
-
-		/* Closed ports are implicitly re-bound to VCPU0. */
-		bind_evtchn_to_cpu(evtchn, 0);
-
-		evtchn_to_irq[evtchn] = -1;
-		irq_info[irq] = IRQ_UNBOUND;
-
-		dynamic_irq_init(irq);
-	}
-
-	spin_unlock(&irq_mapping_update_lock);
-}
-
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
-			      irq_handler_t handler,
-			      unsigned long irqflags,
-			      const char *devname, void *dev_id)
-{
-	unsigned int irq;
-	int retval;
-
-	irq = bind_evtchn_to_irq(evtchn);
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-
-int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
-			    irq_handler_t handler,
-			    unsigned long irqflags, const char *devname, void *dev_id)
-{
-	unsigned int irq;
-	int retval;
-
-	irq = bind_virq_to_irq(virq, cpu);
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
-
-int bind_ipi_to_irqhandler(enum ipi_vector ipi,
-			   unsigned int cpu,
-			   irq_handler_t handler,
-			   unsigned long irqflags,
-			   const char *devname,
-			   void *dev_id)
-{
-	int irq, retval;
-
-	irq = bind_ipi_to_irq(ipi, cpu);
-	if (irq < 0)
-		return irq;
-
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-
-void unbind_from_irqhandler(unsigned int irq, void *dev_id)
-{
-	free_irq(irq, dev_id);
-	unbind_from_irq(irq);
-}
-EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
-
-void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
-{
-	int irq = per_cpu(ipi_to_irq, cpu)[vector];
-	BUG_ON(irq < 0);
-	notify_remote_via_irq(irq);
-}
-
-
-/*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
- */
-void xen_evtchn_do_upcall(struct pt_regs *regs)
-{
-	int cpu = get_cpu();
-	struct shared_info *s = HYPERVISOR_shared_info;
-	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-	unsigned long pending_words;
-
-	vcpu_info->evtchn_upcall_pending = 0;
-
-	/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
-	pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
-	while (pending_words != 0) {
-		unsigned long pending_bits;
-		int word_idx = __ffs(pending_words);
-		pending_words &= ~(1UL << word_idx);
-
-		while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
-			int bit_idx = __ffs(pending_bits);
-			int port = (word_idx * BITS_PER_LONG) + bit_idx;
-			int irq = evtchn_to_irq[port];
-
-			if (irq != -1) {
-				regs->orig_ax = ~irq;
-				do_IRQ(regs);
-			}
-		}
-	}
-
-	put_cpu();
-}
-
-/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
-{
-	struct evtchn_bind_vcpu bind_vcpu;
-	int evtchn = evtchn_from_irq(irq);
-
-	if (!VALID_EVTCHN(evtchn))
-		return;
-
-	/* Send future instances of this interrupt to other vcpu. */
-	bind_vcpu.port = evtchn;
-	bind_vcpu.vcpu = tcpu;
-
-	/*
-	 * If this fails, it usually just indicates that we're dealing with a
-	 * virq or IPI channel, which don't actually need to be rebound. Ignore
-	 * it, but don't do the xenlinux-level rebind in that case.
-	 */
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
-		bind_evtchn_to_cpu(evtchn, tcpu);
-}
-
-
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
-{
-	unsigned tcpu = first_cpu(dest);
-	rebind_irq_to_cpu(irq, tcpu);
-}
-
-static void enable_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		unmask_evtchn(evtchn);
-}
-
-static void disable_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		mask_evtchn(evtchn);
-}
-
-static void ack_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	move_native_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
-}
-
-static int retrigger_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-	int ret = 0;
-
-	if (VALID_EVTCHN(evtchn)) {
-		set_evtchn(evtchn);
-		ret = 1;
-	}
-
-	return ret;
-}
-
-static struct irq_chip xen_dynamic_chip __read_mostly = {
-	.name		= "xen-dyn",
-	.mask		= disable_dynirq,
-	.unmask		= enable_dynirq,
-	.ack		= ack_dynirq,
-	.set_affinity	= set_affinity_irq,
-	.retrigger	= retrigger_dynirq,
-};
-
-void __init xen_init_IRQ(void)
-{
-	int i;
-
-	init_evtchn_cpu_bindings();
-
-	/* No event channels are 'live' right now. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
-		mask_evtchn(i);
-
-	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_bindcount[i] = 0;
-
-	irq_ctx_init(smp_processor_id());
-}
diff --git a/arch/x86/xen/features.c b/arch/x86/xen/features.c
deleted file mode 100644
index 0707714e40d6..000000000000
--- a/arch/x86/xen/features.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/******************************************************************************
- * features.c
- *
- * Xen feature flags.
- *
- * Copyright (c) 2006, Ian Campbell, XenSource Inc.
- */
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/module.h>
-#include <asm/xen/hypervisor.h>
-#include <xen/features.h>
-
-u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
-EXPORT_SYMBOL_GPL(xen_features);
-
-void xen_setup_features(void)
-{
-	struct xen_feature_info fi;
-	int i, j;
-
-	for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
-		fi.submap_idx = i;
-		if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
-			break;
-		for (j = 0; j < 32; j++)
-			xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
-	}
-}
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
new file mode 100644
index 000000000000..49ba9b5224d1
--- /dev/null
+++ b/arch/x86/xen/grant-table.c
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * grant_table.c
+ * x86 specific part
+ *
+ * Granting foreign access to our memory reservation.
+ *
+ * Copyright (c) 2005-2006, Christopher Clark
+ * Copyright (c) 2004-2005, K A Fraser
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan. Split out x86 specific part.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <xen/interface/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <asm/pgtable.h>
+
+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+		      unsigned long addr, void *data)
+{
+	unsigned long **frames = (unsigned long **)data;
+
+	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+			unsigned long addr, void *data)
+{
+
+	set_pte_at(&init_mm, addr, pte, __pte(0));
+	return 0;
+}
+
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   struct grant_entry **__shared)
+{
+	int rc;
+	struct grant_entry *shared = *__shared;
+
+	if (shared == NULL) {
+		struct vm_struct *area =
+			xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+		BUG_ON(area == NULL);
+		shared = area->addr;
+		*__shared = shared;
+	}
+
+	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn, &frames);
+	return rc;
+}
+
+void arch_gnttab_unmap_shared(struct grant_entry *shared,
+			      unsigned long nr_gframes)
+{
+	apply_to_page_range(&init_mm, (unsigned long)shared,
+			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+}
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a054ef2a3da..126766d43aea 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
+	/* updates to init_mm may be done without lock */
+	if (mm == &init_mm)
+		preempt_disable();
+
 	if (mm == current->mm || mm == &init_mm) {
 		if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
 			struct multicall_space mcs;
@@ -163,14 +167,61 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 
 			MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
-			return;
+			goto out;
 		} else
 			if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
-				return;
+				goto out;
 	}
 	xen_set_pte(ptep, pteval);
+
+out:
+	if (mm == &init_mm)
+		preempt_enable();
+}
+
+pteval_t xen_pte_val(pte_t pte)
+{
+	pteval_t ret = pte.pte;
+
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+
+	return ret;
+}
+
+pgdval_t xen_pgd_val(pgd_t pgd)
+{
+	pgdval_t ret = pgd.pgd;
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+	return ret;
+}
+
+pte_t xen_make_pte(pteval_t pte)
+{
+	if (pte & _PAGE_PRESENT) {
+		pte = phys_to_machine(XPADDR(pte)).maddr;
+		pte &= ~(_PAGE_PCD | _PAGE_PWT);
+	}
+
+	return (pte_t){ .pte = pte };
 }
 
+pgd_t xen_make_pgd(pgdval_t pgd)
+{
+	if (pgd & _PAGE_PRESENT)
+		pgd = phys_to_machine(XPADDR(pgd)).maddr;
+
+	return (pgd_t){ pgd };
+}
+
+pmdval_t xen_pmd_val(pmd_t pmd)
+{
+	pmdval_t ret = native_pmd_val(pmd);
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+	return ret;
+}
 #ifdef CONFIG_X86_PAE
 void xen_set_pud(pud_t *ptr, pud_t val)
 {
@@ -214,100 +265,18 @@ void xen_pmd_clear(pmd_t *pmdp)
 	xen_set_pmd(pmdp, __pmd(0));
 }
 
-unsigned long long xen_pte_val(pte_t pte)
+pmd_t xen_make_pmd(pmdval_t pmd)
 {
-	unsigned long long ret = 0;
-
-	if (pte.pte_low) {
-		ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
-	}
-
-	return ret;
-}
-
-unsigned long long xen_pmd_val(pmd_t pmd)
-{
-	unsigned long long ret = pmd.pmd;
-	if (ret)
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
-	return ret;
-}
-
-unsigned long long xen_pgd_val(pgd_t pgd)
-{
-	unsigned long long ret = pgd.pgd;
-	if (ret)
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
-	return ret;
-}
-
-pte_t xen_make_pte(unsigned long long pte)
-{
-	if (pte & _PAGE_PRESENT) {
-		pte = phys_to_machine(XPADDR(pte)).maddr;
-		pte &= ~(_PAGE_PCD | _PAGE_PWT);
-	}
-
-	return (pte_t){ .pte = pte };
-}
-
-pmd_t xen_make_pmd(unsigned long long pmd)
-{
-	if (pmd & 1)
+	if (pmd & _PAGE_PRESENT)
 		pmd = phys_to_machine(XPADDR(pmd)).maddr;
 
-	return (pmd_t){ pmd };
-}
-
-pgd_t xen_make_pgd(unsigned long long pgd)
-{
-	if (pgd & _PAGE_PRESENT)
-		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-	return (pgd_t){ pgd };
+	return native_make_pmd(pmd);
 }
 #else  /* !PAE */
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
 }
-
-unsigned long xen_pte_val(pte_t pte)
-{
-	unsigned long ret = pte.pte_low;
-
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr;
-
-	return ret;
-}
-
-unsigned long xen_pgd_val(pgd_t pgd)
-{
-	unsigned long ret = pgd.pgd;
-	if (ret)
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
-	return ret;
-}
-
-pte_t xen_make_pte(unsigned long pte)
-{
-	if (pte & _PAGE_PRESENT) {
-		pte = phys_to_machine(XPADDR(pte)).maddr;
-		pte &= ~(_PAGE_PCD | _PAGE_PWT);
-	}
-
-	return (pte_t){ pte };
-}
-
-pgd_t xen_make_pgd(unsigned long pgd)
-{
-	if (pgd & _PAGE_PRESENT)
-		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-	return (pgd_t){ pgd };
-}
 #endif	/* CONFIG_X86_PAE */
 
 /*
@@ -418,7 +387,7 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
 
 static int pin_page(struct page *page, enum pt_level level)
 {
-	unsigned pgfl = test_and_set_bit(PG_pinned, &page->flags);
+	unsigned pgfl = TestSetPagePinned(page);
 	int flush;
 
 	if (pgfl)
@@ -499,7 +468,7 @@ void __init xen_mark_init_mm_pinned(void)
 
 static int unpin_page(struct page *page, enum pt_level level)
 {
-	unsigned pgfl = test_and_clear_bit(PG_pinned, &page->flags);
+	unsigned pgfl = TestClearPagePinned(page);
 
 	if (pgfl && !PageHighMem(page)) {
 		void *pt = lowmem_page_address(page);
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2341492bf7a0..82517e4a752a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
 
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
+void xen_enable_sysenter(void)
+{
+	int cpu = smp_processor_id();
+	extern void xen_sysenter_target(void);
+	/* Mask events on entry, even though they get enabled immediately */
+	static struct callback_register sysenter = {
+		.type = CALLBACKTYPE_sysenter,
+		.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+		.flags = CALLBACKF_mask_events,
+	};
+
+	if (!boot_cpu_has(X86_FEATURE_SEP) ||
+	    HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
+	}
+}
+
 void __init xen_arch_setup(void)
 {
 	struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
 	HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
 				 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
 
+	xen_enable_sysenter();
+
 	set_iopl.iopl = 1;
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 	if (rc != 0)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e340ff92f6b6..94e69000f982 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,8 +36,9 @@
 #include "mmu.h"
 
 static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq);
-static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, resched_irq) = -1;
+static DEFINE_PER_CPU(int, callfunc_irq) = -1;
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 /*
  * Structure and data for smp_call_function(). This is designed to minimise
@@ -72,6 +73,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
 	int cpu = smp_processor_id();
 
 	cpu_init();
+	xen_enable_sysenter();
 
 	preempt_disable();
 	per_cpu(cpu_state, cpu) = CPU_ONLINE;
@@ -88,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
 static int xen_smp_intr_init(unsigned int cpu)
 {
 	int rc;
-	const char *resched_name, *callfunc_name;
-
-	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
+	const char *resched_name, *callfunc_name, *debug_name;
 
 	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -114,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(callfunc_irq, cpu) = rc;
 
+	debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+	rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
+				     IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
+				     debug_name, NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(debug_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -121,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
 	if (per_cpu(callfunc_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
+	if (per_cpu(debug_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
 	return rc;
 }
 
@@ -183,7 +193,7 @@ void __init xen_smp_prepare_cpus(unsigned int max_cpus)
 
 	/* Restrict the possible_map according to max_cpus. */
 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
-		for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
+		for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--)
 			continue;
 		cpu_clear(cpu, cpu_possible_map);
 	}
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index fe161ed4b01e..2497a30f41de 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -108,6 +108,20 @@ ENDPATCH(xen_restore_fl_direct)
 	RELOC(xen_restore_fl_direct, 2b+1)
 
 /*
+	We can't use sysexit directly, because we're not running in ring0.
+	But we can easily fake it up using iret.  Assuming xen_sysexit
+	is jumped to with a standard stack frame, we can just strip it
+	back to a standard iret frame and use iret.
+ */
+ENTRY(xen_sysexit)
+	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
+	orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
+	lea PT_EIP(%esp), %esp
+
+	jmp xen_iret
+ENDPROC(xen_sysexit)
+
+/*
 	This is run where a normal iret would be run, with the same stack setup:
 	      8: eflags
 	      4: cs
@@ -184,8 +198,12 @@ iret_restore_end:
 	   region is OK. */
 	je xen_hypervisor_callback
 
-	iret
+1:	iret
 xen_iret_end_crit:
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
 
 hyper_iret:
 	/* put this out of line since its very rarely used */
@@ -219,9 +237,7 @@ hyper_iret:
 	 ds		}  SAVE_ALL state
 	 eax		}
 	  :		:
-	 ebx		}
-	----------------
-	 return addr	 <- esp
+	 ebx		}<- esp
 	----------------
 
    In order to deliver the nested exception properly, we need to shift
@@ -236,10 +252,8 @@ hyper_iret:
    it's usermode state which we eventually need to restore.
  */
 ENTRY(xen_iret_crit_fixup)
-	/* offsets +4 for return address */
-
 	/*
-	   Paranoia: Make sure we're really coming from userspace.
+	   Paranoia: Make sure we're really coming from kernel space.
 	   One could imagine a case where userspace jumps into the
 	   critical range address, but just before the CPU delivers a GP,
 	   it decides to deliver an interrupt instead.  Unlikely?
@@ -248,32 +262,32 @@ ENTRY(xen_iret_crit_fixup)
 	   jump instruction itself, not the destination, but some virtual
 	   environments get this wrong.
 	 */
-	movl PT_CS+4(%esp), %ecx
+	movl PT_CS(%esp), %ecx
 	andl $SEGMENT_RPL_MASK, %ecx
 	cmpl $USER_RPL, %ecx
 	je 2f
 
-	lea PT_ORIG_EAX+4(%esp), %esi
-	lea PT_EFLAGS+4(%esp), %edi
+	lea PT_ORIG_EAX(%esp), %esi
+	lea PT_EFLAGS(%esp), %edi
 
 	/* If eip is before iret_restore_end then stack
 	   hasn't been restored yet. */
 	cmp $iret_restore_end, %eax
 	jae 1f
 
-	movl 0+4(%edi),%eax		/* copy EAX */
-	movl %eax, PT_EAX+4(%esp)
+	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
+	movl %eax, PT_EAX(%esp)
 
 	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
 
 	/* set up the copy */
 1:	std
-	mov $(PT_EIP+4) / 4, %ecx	/* copy ret+saved regs up to orig_eax */
+	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
 	rep movsl
 	cld
 
 	lea 4(%edi),%esp		/* point esp to new frame */
-2:	ret
+2:	jmp xen_do_upcall
 
 
 /*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 956a491ea998..f1063ae08037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,8 @@
 #define XEN_OPS_H
 
 #include <linux/init.h>
+#include <linux/irqreturn.h>
+#include <xen/xen-ops.h>
 
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
@@ -9,7 +11,6 @@ extern const char xen_failsafe_callback[];
 
 void xen_copy_trap_info(struct trap_info *traps);
 
-DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DECLARE_PER_CPU(unsigned long, xen_cr3);
 DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
@@ -19,6 +20,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
+void xen_enable_sysenter(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
@@ -28,6 +30,8 @@ unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
 
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
+
 bool xen_vcpu_stolen(int vcpu);
 
 void xen_mark_init_mm_pinned(void);
@@ -64,4 +68,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 void xen_iret(void);
+void xen_sysexit(void);
+
 #endif /* XEN_OPS_H */