From e3239ff92a17976ac5d26fa0fe40ef3a9daf2523 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 14:06:41 +1000
Subject: memblock: Rename memblock_region to memblock_type and
 memblock_property to memblock_region

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_utils_64.c          |  2 +-
 arch/powerpc/mm/mem.c                    | 26 +++++++++++++-------------
 arch/powerpc/platforms/embedded6xx/wii.c |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 09dffe6efa46..b1a3784744db 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -660,7 +660,7 @@ static void __init htab_initialize(void)
 
 	/* create bolted the linear mapping in the hash table */
 	for (i=0; i < memblock.memory.cnt; i++) {
-		base = (unsigned long)__va(memblock.memory.region[i].base);
+		base = (unsigned long)__va(memblock.memory.regions[i].base);
 		size = memblock.memory.region[i].size;
 
 		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 1a84a8d00005..a33f5c186fb7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -86,10 +86,10 @@ int page_is_ram(unsigned long pfn)
 	for (i=0; i < memblock.memory.cnt; i++) {
 		unsigned long base;
 
-		base = memblock.memory.region[i].base;
+		base = memblock.memory.regions[i].base;
 
 		if ((paddr >= base) &&
-			(paddr < (base + memblock.memory.region[i].size))) {
+			(paddr < (base + memblock.memory.regions[i].size))) {
 			return 1;
 		}
 	}
@@ -149,7 +149,7 @@ int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
-	struct memblock_property res;
+	struct memblock_region res;
 	unsigned long pfn, len;
 	u64 end;
 	int ret = -1;
@@ -206,7 +206,7 @@ void __init do_init_bootmem(void)
 	/* Add active regions with valid PFNs */
 	for (i = 0; i < memblock.memory.cnt; i++) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
+		start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT;
 		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
 		add_active_range(0, start_pfn, end_pfn);
 	}
@@ -219,16 +219,16 @@ void __init do_init_bootmem(void)
 
 	/* reserve the sections we're already using */
 	for (i = 0; i < memblock.reserved.cnt; i++) {
-		unsigned long addr = memblock.reserved.region[i].base +
+		unsigned long addr = memblock.reserved.regions[i].base +
 				     memblock_size_bytes(&memblock.reserved, i) - 1;
 		if (addr < lowmem_end_addr)
-			reserve_bootmem(memblock.reserved.region[i].base,
+			reserve_bootmem(memblock.reserved.regions[i].base,
 					memblock_size_bytes(&memblock.reserved, i),
 					BOOTMEM_DEFAULT);
-		else if (memblock.reserved.region[i].base < lowmem_end_addr) {
+		else if (memblock.reserved.regions[i].base < lowmem_end_addr) {
 			unsigned long adjusted_size = lowmem_end_addr -
-				      memblock.reserved.region[i].base;
-			reserve_bootmem(memblock.reserved.region[i].base,
+				      memblock.reserved.regions[i].base;
+			reserve_bootmem(memblock.reserved.regions[i].base,
 					adjusted_size, BOOTMEM_DEFAULT);
 		}
 	}
@@ -237,7 +237,7 @@ void __init do_init_bootmem(void)
 
 	/* reserve the sections we're already using */
 	for (i = 0; i < memblock.reserved.cnt; i++)
-		reserve_bootmem(memblock.reserved.region[i].base,
+		reserve_bootmem(memblock.reserved.regions[i].base,
 				memblock_size_bytes(&memblock.reserved, i),
 				BOOTMEM_DEFAULT);
 
@@ -257,10 +257,10 @@ static int __init mark_nonram_nosave(void)
 
 	for (i = 0; i < memblock.memory.cnt - 1; i++) {
 		memblock_region_max_pfn =
-			(memblock.memory.region[i].base >> PAGE_SHIFT) +
-			(memblock.memory.region[i].size >> PAGE_SHIFT);
+			(memblock.memory.regions[i].base >> PAGE_SHIFT) +
+			(memblock.memory.regions[i].size >> PAGE_SHIFT);
 		memblock_next_region_start_pfn =
-			memblock.memory.region[i+1].base >> PAGE_SHIFT;
+			memblock.memory.regions[i+1].base >> PAGE_SHIFT;
 
 		if (memblock_region_max_pfn < memblock_next_region_start_pfn)
 			register_nosave_region(memblock_region_max_pfn,
diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 5cdcc7c8d973..8450c29e9b2f 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x)
 
 void __init wii_memory_fixups(void)
 {
-	struct memblock_property *p = memblock.memory.region;
+	struct memblock_region *p = memblock.memory.region;
 
 	/*
 	 * This is part of a workaround to allow the use of two
-- 
cgit v1.2.3


From 28be7072ce54b82642ebff6a80d474d4c6a6a7fd Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 4 Aug 2010 13:43:53 +1000
Subject: memblock/powerpc: Use new accessors

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_utils_64.c |  8 ++--
 arch/powerpc/mm/mem.c           | 92 +++++++++++++++--------------------------
 arch/powerpc/mm/numa.c          | 17 ++++----
 3 files changed, 46 insertions(+), 71 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b1a3784744db..4072b871497d 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -588,7 +588,7 @@ static void __init htab_initialize(void)
 	unsigned long pteg_count;
 	unsigned long prot;
 	unsigned long base = 0, size = 0, limit;
-	int i;
+	struct memblock_region *reg;
 
 	DBG(" -> htab_initialize()\n");
 
@@ -659,9 +659,9 @@ static void __init htab_initialize(void)
 	 */
 
 	/* create bolted the linear mapping in the hash table */
-	for (i=0; i < memblock.memory.cnt; i++) {
-		base = (unsigned long)__va(memblock.memory.regions[i].base);
-		size = memblock.memory.region[i].size;
+	for_each_memblock(memory, reg) {
+		base = (unsigned long)__va(reg->base);
+		size = reg->size;
 
 		DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
 		    base, size, prot);
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index a33f5c186fb7..52df5428ece4 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -82,18 +82,11 @@ int page_is_ram(unsigned long pfn)
 	return pfn < max_pfn;
 #else
 	unsigned long paddr = (pfn << PAGE_SHIFT);
-	int i;
-	for (i=0; i < memblock.memory.cnt; i++) {
-		unsigned long base;
+	struct memblock_region *reg;
 
-		base = memblock.memory.regions[i].base;
-
-		if ((paddr >= base) &&
-			(paddr < (base + memblock.memory.regions[i].size))) {
+	for_each_memblock(memory, reg)
+		if (paddr >= reg->base && paddr < (reg->base + reg->size))
 			return 1;
-		}
-	}
-
 	return 0;
 #endif
 }
@@ -149,23 +142,19 @@ int
 walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 		void *arg, int (*func)(unsigned long, unsigned long, void *))
 {
-	struct memblock_region res;
-	unsigned long pfn, len;
-	u64 end;
+	struct memblock_region *reg;
+	unsigned long end_pfn = start_pfn + nr_pages;
+	unsigned long tstart, tend;
 	int ret = -1;
 
-	res.base = (u64) start_pfn << PAGE_SHIFT;
-	res.size = (u64) nr_pages << PAGE_SHIFT;
-
-	end = res.base + res.size - 1;
-	while ((res.base < end) && (memblock_find(&res) >= 0)) {
-		pfn = (unsigned long)(res.base >> PAGE_SHIFT);
-		len = (unsigned long)(res.size >> PAGE_SHIFT);
-		ret = (*func)(pfn, len, arg);
+	for_each_memblock(memory, reg) {
+		tstart = max(start_pfn, memblock_region_base_pfn(reg));
+		tend = min(end_pfn, memblock_region_end_pfn(reg));
+		if (tstart >= tend)
+			continue;
+		ret = (*func)(tstart, tend - tstart, arg);
 		if (ret)
 			break;
-		res.base += (res.size + 1);
-		res.size = (end - res.base + 1);
 	}
 	return ret;
 }
@@ -179,9 +168,9 @@ EXPORT_SYMBOL_GPL(walk_system_ram_range);
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 void __init do_init_bootmem(void)
 {
-	unsigned long i;
 	unsigned long start, bootmap_pages;
 	unsigned long total_pages;
+	struct memblock_region *reg;
 	int boot_mapsize;
 
 	max_low_pfn = max_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -204,10 +193,10 @@ void __init do_init_bootmem(void)
 	boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn);
 
 	/* Add active regions with valid PFNs */
-	for (i = 0; i < memblock.memory.cnt; i++) {
+	for_each_memblock(memory, reg) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock.memory.regions[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
+		start_pfn = memblock_region_base_pfn(reg);
+		end_pfn = memblock_region_end_pfn(reg);
 		add_active_range(0, start_pfn, end_pfn);
 	}
 
@@ -218,29 +207,21 @@ void __init do_init_bootmem(void)
 	free_bootmem_with_active_regions(0, lowmem_end_addr >> PAGE_SHIFT);
 
 	/* reserve the sections we're already using */
-	for (i = 0; i < memblock.reserved.cnt; i++) {
-		unsigned long addr = memblock.reserved.regions[i].base +
-				     memblock_size_bytes(&memblock.reserved, i) - 1;
-		if (addr < lowmem_end_addr)
-			reserve_bootmem(memblock.reserved.regions[i].base,
-					memblock_size_bytes(&memblock.reserved, i),
-					BOOTMEM_DEFAULT);
-		else if (memblock.reserved.regions[i].base < lowmem_end_addr) {
-			unsigned long adjusted_size = lowmem_end_addr -
-				      memblock.reserved.regions[i].base;
-			reserve_bootmem(memblock.reserved.regions[i].base,
-					adjusted_size, BOOTMEM_DEFAULT);
+	for_each_memblock(reserved, reg) {
+		unsigned long top = reg->base + reg->size - 1;
+		if (top < lowmem_end_addr)
+			reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+		else if (reg->base < lowmem_end_addr) {
+			unsigned long trunc_size = lowmem_end_addr - reg->base;
+			reserve_bootmem(reg->base, trunc_size, BOOTMEM_DEFAULT);
 		}
 	}
 #else
 	free_bootmem_with_active_regions(0, max_pfn);
 
 	/* reserve the sections we're already using */
-	for (i = 0; i < memblock.reserved.cnt; i++)
-		reserve_bootmem(memblock.reserved.regions[i].base,
-				memblock_size_bytes(&memblock.reserved, i),
-				BOOTMEM_DEFAULT);
-
+	for_each_memblock(reserved, reg)
+		reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
 #endif
 	/* XXX need to clip this if using highmem? */
 	sparse_memory_present_with_active_regions(0);
@@ -251,22 +232,15 @@ void __init do_init_bootmem(void)
 /* mark pages that don't exist as nosave */
 static int __init mark_nonram_nosave(void)
 {
-	unsigned long memblock_next_region_start_pfn,
-		      memblock_region_max_pfn;
-	int i;
-
-	for (i = 0; i < memblock.memory.cnt - 1; i++) {
-		memblock_region_max_pfn =
-			(memblock.memory.regions[i].base >> PAGE_SHIFT) +
-			(memblock.memory.regions[i].size >> PAGE_SHIFT);
-		memblock_next_region_start_pfn =
-			memblock.memory.regions[i+1].base >> PAGE_SHIFT;
-
-		if (memblock_region_max_pfn < memblock_next_region_start_pfn)
-			register_nosave_region(memblock_region_max_pfn,
-					       memblock_next_region_start_pfn);
+	struct memblock_region *reg, *prev = NULL;
+
+	for_each_memblock(memory, reg) {
+		if (prev &&
+		    memblock_region_end_pfn(prev) < memblock_region_base_pfn(reg))
+			register_nosave_region(memblock_region_end_pfn(prev),
+					       memblock_region_base_pfn(reg));
+		prev = reg;
 	}
-
 	return 0;
 }
 
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index aa731af720c0..9ba9ba1a430d 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -746,16 +746,17 @@ static void __init setup_nonnuma(void)
 	unsigned long top_of_ram = memblock_end_of_DRAM();
 	unsigned long total_ram = memblock_phys_mem_size();
 	unsigned long start_pfn, end_pfn;
-	unsigned int i, nid = 0;
+	unsigned int nid = 0;
+	struct memblock_region *reg;
 
 	printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
 	       top_of_ram, total_ram);
 	printk(KERN_DEBUG "Memory hole size: %ldMB\n",
 	       (top_of_ram - total_ram) >> 20);
 
-	for (i = 0; i < memblock.memory.cnt; ++i) {
-		start_pfn = memblock.memory.region[i].base >> PAGE_SHIFT;
-		end_pfn = start_pfn + memblock_size_pages(&memblock.memory, i);
+	for_each_memblock(memory, reg) {
+		start_pfn = memblock_region_base_pfn(reg);
+		end_pfn = memblock_region_end_pfn(reg);
 
 		fake_numa_create_new_node(end_pfn, &nid);
 		add_active_range(nid, start_pfn, end_pfn);
@@ -891,11 +892,11 @@ static struct notifier_block __cpuinitdata ppc64_numa_nb = {
 static void mark_reserved_regions_for_nid(int nid)
 {
 	struct pglist_data *node = NODE_DATA(nid);
-	int i;
+	struct memblock_region *reg;
 
-	for (i = 0; i < memblock.reserved.cnt; i++) {
-		unsigned long physbase = memblock.reserved.region[i].base;
-		unsigned long size = memblock.reserved.region[i].size;
+	for_each_memblock(reserved, reg) {
+		unsigned long physbase = reg->base;
+		unsigned long size = reg->size;
 		unsigned long start_pfn = physbase >> PAGE_SHIFT;
 		unsigned long end_pfn = PFN_UP(physbase + size);
 		struct node_active_region node_ar;
-- 
cgit v1.2.3


From 27f574c223d2c09610058b3ec7a29582d63a3e06 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:00 -0700
Subject: memblock: Expose MEMBLOCK_ALLOC_ANYWHERE

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/hash_utils_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 4072b871497d..a542ff5ec8a9 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -625,7 +625,7 @@ static void __init htab_initialize(void)
 		if (machine_is(cell))
 			limit = 0x80000000;
 		else
-			limit = 0;
+			limit = MEMBLOCK_ALLOC_ANYWHERE;
 
 		table = memblock_alloc_base(htab_size_bytes, htab_size_bytes, limit);
 
-- 
cgit v1.2.3


From e63075a3c9377536d085bc013cd3fe6323162449 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:01 -0700
Subject: memblock: Introduce default allocation limit and use it to replace
 explicit ones

This introduce memblock.current_limit which is used to limit allocations
from memblock_alloc() or memblock_alloc_base(..., MEMBLOCK_ALLOC_ACCESSIBLE).

The old MEMBLOCK_ALLOC_ANYWHERE changes value from 0 to ~(u64)0 and can still
be used with memblock_alloc_base() to allocate really anywhere.

It is -no-longer- cropped to MEMBLOCK_REAL_LIMIT which disappears.

Note to archs: I'm leaving the default limit to MEMBLOCK_ALLOC_ANYWHERE. I
strongly recommend that you ensure that you set an appropriate limit
during boot in order to guarantee that an memblock_alloc() at any time
results in something that is accessible with a simple __va().

The reason is that a subsequent patch will introduce the ability for
the array to resize itself by reallocating itself. The MEMBLOCK core will
honor the current limit when performing those allocations.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/memblock.h |  7 -------
 arch/powerpc/kernel/prom.c          | 20 +++++++++++++++++++-
 arch/powerpc/kernel/setup_32.c      |  2 +-
 arch/powerpc/mm/40x_mmu.c           |  5 +++--
 arch/powerpc/mm/fsl_booke_mmu.c     |  3 ++-
 arch/powerpc/mm/hash_utils_64.c     |  3 ++-
 arch/powerpc/mm/init_32.c           | 29 +++++++----------------------
 arch/powerpc/mm/ppc_mmu_32.c        |  3 +--
 arch/powerpc/mm/tlb_nohash.c        |  2 ++
 9 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/memblock.h b/arch/powerpc/include/asm/memblock.h
index 3c29728b56b1..43efc345065e 100644
--- a/arch/powerpc/include/asm/memblock.h
+++ b/arch/powerpc/include/asm/memblock.h
@@ -5,11 +5,4 @@
 
 #define MEMBLOCK_DBG(fmt...) udbg_printf(fmt)
 
-#ifdef CONFIG_PPC32
-extern phys_addr_t lowmem_end_addr;
-#define MEMBLOCK_REAL_LIMIT	lowmem_end_addr
-#else
-#define MEMBLOCK_REAL_LIMIT	0
-#endif
-
 #endif /* _ASM_POWERPC_MEMBLOCK_H */
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fed9bf6187d1..3aec0b980f6a 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -98,7 +98,7 @@ static void __init move_device_tree(void)
 
 	if ((memory_limit && (start + size) > memory_limit) ||
 			overlaps_crashkernel(start, size)) {
-		p = __va(memblock_alloc_base(size, PAGE_SIZE, memblock.rmo_size));
+		p = __va(memblock_alloc(size, PAGE_SIZE));
 		memcpy(p, initial_boot_params, size);
 		initial_boot_params = (struct boot_param_header *)p;
 		DBG("Moved device tree to 0x%p\n", p);
@@ -655,6 +655,21 @@ static void __init phyp_dump_reserve_mem(void)
 static inline void __init phyp_dump_reserve_mem(void) {}
 #endif /* CONFIG_PHYP_DUMP  && CONFIG_PPC_RTAS */
 
+static void set_boot_memory_limit(void)
+{
+#ifdef CONFIG_PPC32
+	/* 601 can only access 16MB at the moment */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		memblock_set_current_limit(0x01000000);
+	/* 8xx can only access 8MB at the moment */
+	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
+		memblock_set_current_limit(0x00800000);
+	else
+		memblock_set_current_limit(0x10000000);
+#else
+	memblock_set_current_limit(memblock.rmo_size);
+#endif
+}
 
 void __init early_init_devtree(void *params)
 {
@@ -683,6 +698,7 @@ void __init early_init_devtree(void *params)
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
 	memblock_init();
+
 	of_scan_flat_dt(early_init_dt_scan_root, NULL);
 	of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL);
 
@@ -718,6 +734,8 @@ void __init early_init_devtree(void *params)
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
 
+	set_boot_memory_limit();
+
 	/* We may need to relocate the flat tree, do it now.
 	 * FIXME .. and the initrd too? */
 	move_device_tree();
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index a10ffc85ada7..b7eb1ded3b5f 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -246,7 +246,7 @@ static void __init irqstack_early_init(void)
 	unsigned int i;
 
 	/* interrupt stacks must be in lowmem, we get that for free on ppc32
-	 * as the memblock is limited to lowmem by MEMBLOCK_REAL_LIMIT */
+	 * as the memblock is limited to lowmem by default */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
 			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 1dc2fa5ce1bd..58969b51f454 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/highmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -47,6 +48,7 @@
 #include <asm/bootx.h>
 #include <asm/machdep.h>
 #include <asm/setup.h>
+
 #include "mmu_decl.h"
 
 extern int __map_without_ltlbs;
@@ -139,8 +141,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	 * coverage with normal-sized pages (or other reasons) do not
 	 * attempt to allocate outside the allowed range.
 	 */
-
-	__initial_memory_limit_addr = memstart_addr + mapped;
+	memblock_set_current_limit(memstart_addr + mapped);
 
 	return mapped;
 }
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index cdc7526e9c93..e525f862d759 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -40,6 +40,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/highmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <asm/prom.h>
@@ -212,5 +213,5 @@ void __init adjust_total_lowmem(void)
 	pr_cont("%lu Mb, residual: %dMb\n", tlbcam_sz(tlbcam_index - 1) >> 20,
 	        (unsigned int)((total_lowmem - __max_low_memory) >> 20));
 
-	__initial_memory_limit_addr = memstart_addr + __max_low_memory;
+	memblock_set_current_limit(memstart_addr + __max_low_memory);
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index a542ff5ec8a9..b05890e23813 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -696,7 +696,8 @@ static void __init htab_initialize(void)
 #endif /* CONFIG_U3_DART */
 		BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
 				prot, mmu_linear_psize, mmu_kernel_ssize));
-       }
+	}
+	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 
 	/*
 	 * If we have a memory_limit and we've allocated TCEs then we need to
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 6a6975dc2654..59b208b7ec6f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -91,12 +91,6 @@ int __allow_ioremap_reserved;
 /* max amount of low RAM to map in */
 unsigned long __max_low_memory = MAX_LOW_MEM;
 
-/*
- * address of the limit of what is accessible with initial MMU setup -
- * 256MB usually, but only 16MB on 601.
- */
-phys_addr_t __initial_memory_limit_addr = (phys_addr_t)0x10000000;
-
 /*
  * Check for command-line options that affect what MMU_init will do.
  */
@@ -126,13 +120,6 @@ void __init MMU_init(void)
 	if (ppc_md.progress)
 		ppc_md.progress("MMU:enter", 0x111);
 
-	/* 601 can only access 16MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		__initial_memory_limit_addr = 0x01000000;
-	/* 8xx can only access 8MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
-		__initial_memory_limit_addr = 0x00800000;
-
 	/* parse args from command line */
 	MMU_setup();
 
@@ -190,20 +177,18 @@ void __init MMU_init(void)
 #ifdef CONFIG_BOOTX_TEXT
 	btext_unmap();
 #endif
+
+	/* Shortly after that, the entire linear mapping will be available */
+	memblock_set_current_limit(lowmem_end_addr);
 }
 
 /* This is only called until mem_init is done. */
 void __init *early_get_page(void)
 {
-	void *p;
-
-	if (init_bootmem_done) {
-		p = alloc_bootmem_pages(PAGE_SIZE);
-	} else {
-		p = __va(memblock_alloc_base(PAGE_SIZE, PAGE_SIZE,
-					__initial_memory_limit_addr));
-	}
-	return p;
+	if (init_bootmem_done)
+		return alloc_bootmem_pages(PAGE_SIZE);
+	else
+		return __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
 }
 
 /* Free up now-unused memory */
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index f8a01829d64f..7d34e170e80f 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -223,8 +223,7 @@ void __init MMU_init_hw(void)
 	 * Find some memory for the hash table.
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = __va(memblock_alloc_base(Hash_size, Hash_size,
-				   __initial_memory_limit_addr));
+	Hash = __va(memblock_alloc(Hash_size, Hash_size));
 	cacheable_memzero(Hash, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index d8695b02a968..7ba32e762990 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -432,6 +432,8 @@ static void __early_init_mmu(int boot_cpu)
 	 * the MMU configuration
 	 */
 	mb();
+
+	memblock_set_current_limit(linear_map_top);
 }
 
 void __init early_init_mmu(void)
-- 
cgit v1.2.3


From cd3db0c4ca3d237e7ad20f7107216e575705d2b0 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 6 Jul 2010 15:39:02 -0700
Subject: memblock: Remove rmo_size, burry it in arch/powerpc where it belongs

The RMA (RMO is a misnomer) is a concept specific to ppc64 (in fact
server ppc64 though I hijack it on embedded ppc64 for similar purposes)
and represents the area of memory that can be accessed in real mode
(aka with MMU off), or on embedded, from the exception vectors (which
is bolted in the TLB) which pretty much boils down to the same thing.

We take that out of the generic MEMBLOCK data structure and move it into
arch/powerpc where it belongs, renaming it to "RMA" while at it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu.h  | 12 ++++++++++++
 arch/powerpc/kernel/head_40x.S  |  6 +-----
 arch/powerpc/kernel/paca.c      |  2 +-
 arch/powerpc/kernel/prom.c      | 29 ++++++++---------------------
 arch/powerpc/kernel/rtas.c      |  2 +-
 arch/powerpc/kernel/setup_64.c  |  2 +-
 arch/powerpc/mm/40x_mmu.c       | 14 +++++++++++++-
 arch/powerpc/mm/44x_mmu.c       | 14 ++++++++++++++
 arch/powerpc/mm/fsl_booke_mmu.c |  9 +++++++++
 arch/powerpc/mm/hash_utils_64.c | 22 +++++++++++++++++++++-
 arch/powerpc/mm/init_32.c       | 14 ++++++++++++++
 arch/powerpc/mm/init_64.c       |  1 +
 arch/powerpc/mm/ppc_mmu_32.c    | 15 +++++++++++++++
 arch/powerpc/mm/tlb_nohash.c    | 14 ++++++++++++++
 14 files changed, 125 insertions(+), 31 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 7ebf42ed84a2..bb40a06d3b77 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -2,6 +2,8 @@
 #define _ASM_POWERPC_MMU_H_
 #ifdef __KERNEL__
 
+#include <linux/types.h>
+
 #include <asm/asm-compat.h>
 #include <asm/feature-fixups.h>
 
@@ -82,6 +84,16 @@ extern unsigned int __start___mmu_ftr_fixup, __stop___mmu_ftr_fixup;
 extern void early_init_mmu(void);
 extern void early_init_mmu_secondary(void);
 
+extern void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				       phys_addr_t first_memblock_size);
+
+#ifdef CONFIG_PPC64
+/* This is our real memory area size on ppc64 server, on embedded, we
+ * make it match the size our of bolted TLB area
+ */
+extern u64 ppc64_rma_size;
+#endif /* CONFIG_PPC64 */
+
 #endif /* !__ASSEMBLY__ */
 
 /* The kernel use the constants below to index in the page sizes array.
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a90625f9b485..8278e8bad5a0 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -923,11 +923,7 @@ initial_mmu:
 	mtspr	SPRN_PID,r0
 	sync
 
-	/* Configure and load two entries into TLB slots 62 and 63.
-	 * In case we are pinning TLBs, these are reserved in by the
-	 * other TLB functions.  If not reserving, then it doesn't
-	 * matter where they are loaded.
-	 */
+	/* Configure and load one entry into TLB slots 63 */
 	clrrwi	r4,r4,10		/* Mask off the real page number */
 	ori	r4,r4,(TLB_WR | TLB_EX)	/* Set the write and execute bits */
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 139a773853f4..b9ffd7deeed7 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -117,7 +117,7 @@ void __init allocate_pacas(void)
 	 * the first segment. On iSeries they must be within the area mapped
 	 * by the HV, which is HvPagesToMap * HVPAGESIZE bytes.
 	 */
-	limit = min(0x10000000ULL, memblock.rmo_size);
+	limit = min(0x10000000ULL, ppc64_rma_size);
 	if (firmware_has_feature(FW_FEATURE_ISERIES))
 		limit = min(limit, HvPagesToMap * HVPAGESIZE);
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 3aec0b980f6a..c3c6a8857544 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -66,6 +66,7 @@
 int __initdata iommu_is_off;
 int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
+u64 ppc64_rma_size;
 #endif
 
 static int __init early_parse_mem(char *p)
@@ -492,7 +493,7 @@ static int __init early_init_dt_scan_memory_ppc(unsigned long node,
 
 void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 {
-#if defined(CONFIG_PPC64)
+#ifdef CONFIG_PPC64
 	if (iommu_is_off) {
 		if (base >= 0x80000000ul)
 			return;
@@ -501,9 +502,13 @@ void __init early_init_dt_add_memory_arch(u64 base, u64 size)
 	}
 #endif
 
-	memblock_add(base, size);
-
+	/* First MEMBLOCK added, do some special initializations */
+	if (memstart_addr == ~(phys_addr_t)0)
+		setup_initial_memory_limit(base, size);
 	memstart_addr = min((u64)memstart_addr, base);
+
+	/* Add the chunk to the MEMBLOCK list */
+	memblock_add(base, size);
 }
 
 u64 __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
@@ -655,22 +660,6 @@ static void __init phyp_dump_reserve_mem(void)
 static inline void __init phyp_dump_reserve_mem(void) {}
 #endif /* CONFIG_PHYP_DUMP  && CONFIG_PPC_RTAS */
 
-static void set_boot_memory_limit(void)
-{
-#ifdef CONFIG_PPC32
-	/* 601 can only access 16MB at the moment */
-	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
-		memblock_set_current_limit(0x01000000);
-	/* 8xx can only access 8MB at the moment */
-	else if (PVR_VER(mfspr(SPRN_PVR)) == 0x50)
-		memblock_set_current_limit(0x00800000);
-	else
-		memblock_set_current_limit(0x10000000);
-#else
-	memblock_set_current_limit(memblock.rmo_size);
-#endif
-}
-
 void __init early_init_devtree(void *params)
 {
 	phys_addr_t limit;
@@ -734,8 +723,6 @@ void __init early_init_devtree(void *params)
 
 	DBG("Phys. mem: %llx\n", memblock_phys_mem_size());
 
-	set_boot_memory_limit();
-
 	/* We may need to relocate the flat tree, do it now.
 	 * FIXME .. and the initrd too? */
 	move_device_tree();
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d0516dbee762..1662777be5dd 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -934,7 +934,7 @@ void __init rtas_initialize(void)
 	 */
 #ifdef CONFIG_PPC64
 	if (machine_is(pseries) && firmware_has_feature(FW_FEATURE_LPAR)) {
-		rtas_region = min(memblock.rmo_size, RTAS_INSTANTIATE_MAX);
+		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
 		ibm_suspend_me_token = rtas_token("ibm,suspend-me");
 	}
 #endif
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d135f93cb0f6..4360944b60f0 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -487,7 +487,7 @@ static void __init emergency_stack_init(void)
 	 * bringup, we need to get at them in real mode. This means they
 	 * must also be within the RMO region.
 	 */
-	limit = min(slb0_limit(), memblock.rmo_size);
+	limit = min(slb0_limit(), ppc64_rma_size);
 
 	for_each_possible_cpu(i) {
 		unsigned long sp;
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 58969b51f454..5810967511d4 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -141,7 +141,19 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	 * coverage with normal-sized pages (or other reasons) do not
 	 * attempt to allocate outside the allowed range.
 	 */
-	memblock_set_current_limit(memstart_addr + mapped);
+	memblock_set_current_limit(mapped);
 
 	return mapped;
 }
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 40x can only access 16MB at the moment (see head_40x.S) */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+}
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index d8c6efb32bc6..024acab588fd 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -24,6 +24,8 @@
  */
 
 #include <linux/init.h>
+#include <linux/memblock.h>
+
 #include <asm/mmu.h>
 #include <asm/system.h>
 #include <asm/page.h>
@@ -213,6 +215,18 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 	return total_lowmem;
 }
 
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 44x has a 256M TLB entry pinned at boot */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, PPC_PIN_SIZE));
+}
+
 #ifdef CONFIG_SMP
 void __cpuinit mmu_init_secondary(int cpu)
 {
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index e525f862d759..0be8fe24c54e 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -215,3 +215,12 @@ void __init adjust_total_lowmem(void)
 
 	memblock_set_current_limit(memstart_addr + __max_low_memory);
 }
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	phys_addr_t limit = first_memblock_base + first_memblock_size;
+
+	/* 64M mapped initially according to head_fsl_booke.S */
+	memblock_set_current_limit(min_t(u64, limit, 0x04000000));
+}
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index b05890e23813..83f534d862db 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -649,7 +649,7 @@ static void __init htab_initialize(void)
 #ifdef CONFIG_DEBUG_PAGEALLOC
 	linear_map_hash_count = memblock_end_of_DRAM() >> PAGE_SHIFT;
 	linear_map_hash_slots = __va(memblock_alloc_base(linear_map_hash_count,
-						    1, memblock.rmo_size));
+						    1, ppc64_rma_size));
 	memset(linear_map_hash_slots, 0, linear_map_hash_count);
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
@@ -1248,3 +1248,23 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
 	local_irq_restore(flags);
 }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* On LPAR systems, the first entry is our RMA region,
+	 * non-LPAR 64-bit hash MMU systems don't have a limitation
+	 * on real mode access, but using the first entry works well
+	 * enough. We also clamp it to 1G to avoid some funky things
+	 * such as RTAS bugs etc...
+	 */
+	ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+
+	/* Finally limit subsequent allocations */
+	memblock_set_current_limit(ppc64_rma_size);
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 59b208b7ec6f..742da43b4ab6 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -237,3 +237,17 @@ void free_initrd_mem(unsigned long start, unsigned long end)
 }
 #endif
 
+
+#ifdef CONFIG_8xx /* No 8xx specific .c file to put that in ... */
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 8xx can only access 8MB at the moment */
+	memblock_set_current_limit(min_t(u64, first_memblock_size, 0x00800000));
+}
+#endif /* CONFIG_8xx */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 71f1415e2472..9e081ffbf0f2 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -328,3 +328,4 @@ int __meminit vmemmap_populate(struct page *start_page,
 	return 0;
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 7d34e170e80f..11571e118831 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -271,3 +271,18 @@ void __init MMU_init_hw(void)
 
 	if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
 }
+
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* We don't currently support the first MEMBLOCK not mapping 0
+	 * physical on those processors
+	 */
+	BUG_ON(first_memblock_base != 0);
+
+	/* 601 can only access 16MB at the moment */
+	if (PVR_VER(mfspr(SPRN_PVR)) == 1)
+		memblock_set_current_limit(min_t(u64, first_memblock_size, 0x01000000));
+	else /* Anything else has 256M mapped */
+		memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
+}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 7ba32e762990..a086ed562606 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -446,4 +446,18 @@ void __cpuinit early_init_mmu_secondary(void)
 	__early_init_mmu(0);
 }
 
+void setup_initial_memory_limit(phys_addr_t first_memblock_base,
+				phys_addr_t first_memblock_size)
+{
+	/* On Embedded 64-bit, we adjust the RMA size to match
+	 * the bolted TLB entry. We know for now that only 1G
+	 * entries are supported though that may eventually
+	 * change. We crop it to the size of the first MEMBLOCK to
+	 * avoid going over total available memory just in case...
+	 */
+	ppc64_rma_size = min_t(u64, first_memblock_size, 0x40000000);
+
+	/* Finally limit subsequent allocations */
+	memblock_set_current_limit(ppc64_memblock_base + ppc64_rma_size);
+}
 #endif /* CONFIG_PPC64 */
-- 
cgit v1.2.3


From 4734b594c6ca1be796d30c82d93fdf5160f45124 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 28 Jul 2010 14:31:29 +1000
Subject: memblock: Remove memblock_type.size and add memblock.memory_size
 instead

Right now, both the "memory" and "reserved" memblock_type structures have
a "size" member. It represents the calculated memory size in the former
case and is unused in the latter.

This moves it out to the main memblock structure instead

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/mem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 52df5428ece4..f661f6c527da 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -301,7 +301,7 @@ void __init mem_init(void)
 		swiotlb_init(1);
 #endif
 
-	num_physpages = memblock.memory.size >> PAGE_SHIFT;
+	num_physpages = memblock_phys_mem_size() >> PAGE_SHIFT;
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
 #ifdef CONFIG_NEED_MULTIPLE_NODES
-- 
cgit v1.2.3


From 70791ce9ba68a5921c9905ef05d23f62a90bc10c Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 29 Jun 2010 19:34:05 +0200
Subject: perf: Generalize callchain_store()

callchain_store() is the same on every archs, inline it in
perf_event.h and rename it to perf_callchain_store() to avoid
any collision.

This removes repetitive code.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
 arch/powerpc/kernel/perf_callchain.c | 40 +++++++++++++-----------------------
 1 file changed, 14 insertions(+), 26 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index 95ad9dad298e..a286c2e5a3ea 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -23,18 +23,6 @@
 #include "ppc32.h"
 #endif
 
-/*
- * Store another value in a callchain_entry.
- */
-static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-	unsigned int nr = entry->nr;
-
-	if (nr < PERF_MAX_STACK_DEPTH) {
-		entry->ip[nr] = ip;
-		entry->nr = nr + 1;
-	}
-}
 
 /*
  * Is sp valid as the address of the next kernel stack frame after prev_sp?
@@ -69,8 +57,8 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 
 	lr = regs->link;
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_KERNEL);
-	callchain_store(entry, regs->nip);
+	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+	perf_callchain_store(entry, regs->nip);
 
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
 		return;
@@ -89,7 +77,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			next_ip = regs->nip;
 			lr = regs->link;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_KERNEL);
+			perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 
 		} else {
 			if (level == 0)
@@ -111,7 +99,7 @@ static void perf_callchain_kernel(struct pt_regs *regs,
 			++level;
 		}
 
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		if (!valid_next_sp(next_sp, sp))
 			return;
 		sp = next_sp;
@@ -246,8 +234,8 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 	next_ip = regs->nip;
 	lr = regs->link;
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, next_ip);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, next_ip);
 
 	for (;;) {
 		fp = (unsigned long __user *) sp;
@@ -276,14 +264,14 @@ static void perf_callchain_user_64(struct pt_regs *regs,
 			    read_user_stack_64(&uregs[PT_R1], &sp))
 				return;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_USER);
-			callchain_store(entry, next_ip);
+			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
 			continue;
 		}
 
 		if (level == 0)
 			next_ip = lr;
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		++level;
 		sp = next_sp;
 	}
@@ -447,8 +435,8 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 	next_ip = regs->nip;
 	lr = regs->link;
 	sp = regs->gpr[1];
-	callchain_store(entry, PERF_CONTEXT_USER);
-	callchain_store(entry, next_ip);
+	perf_callchain_store(entry, PERF_CONTEXT_USER);
+	perf_callchain_store(entry, next_ip);
 
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
 		fp = (unsigned int __user *) (unsigned long) sp;
@@ -470,14 +458,14 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 			    read_user_stack_32(&uregs[PT_R1], &sp))
 				return;
 			level = 0;
-			callchain_store(entry, PERF_CONTEXT_USER);
-			callchain_store(entry, next_ip);
+			perf_callchain_store(entry, PERF_CONTEXT_USER);
+			perf_callchain_store(entry, next_ip);
 			continue;
 		}
 
 		if (level == 0)
 			next_ip = lr;
-		callchain_store(entry, next_ip);
+		perf_callchain_store(entry, next_ip);
 		++level;
 		sp = next_sp;
 	}
-- 
cgit v1.2.3


From 56962b4449af34070bb1994621ef4f0265eed4d8 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Jun 2010 23:03:51 +0200
Subject: perf: Generalize some arch callchain code

- Most archs use one callchain buffer per cpu, except x86 that needs
  to deal with NMIs. Provide a default perf_callchain_buffer()
  implementation that x86 overrides.

- Centralize all the kernel/user regs handling and invoke new arch
  handlers from there: perf_callchain_user() / perf_callchain_kernel()
  That avoid all the user_mode(), current->mm checks and so...

- Invert some parameters in perf_callchain_*() helpers: entry to the
  left, regs to the right, following the traditional (dst, src).

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
 arch/powerpc/kernel/perf_callchain.c | 49 +++++++++++-------------------------
 1 file changed, 14 insertions(+), 35 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index a286c2e5a3ea..f7a85ede8407 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -46,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
 	return 0;
 }
 
-static void perf_callchain_kernel(struct pt_regs *regs,
-				  struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
 	unsigned long sp, next_sp;
 	unsigned long next_ip;
@@ -221,8 +221,8 @@ static int sane_signal_64_frame(unsigned long sp)
 		puc == (unsigned long) &sf->uc;
 }
 
-static void perf_callchain_user_64(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_64(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 	unsigned long sp, next_sp;
 	unsigned long next_ip;
@@ -303,8 +303,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
 	return __get_user_inatomic(*ret, ptr);
 }
 
-static inline void perf_callchain_user_64(struct pt_regs *regs,
-					  struct perf_callchain_entry *entry)
+static inline void perf_callchain_user_64(struct perf_callchain_entry *entry,
+					  struct pt_regs *regs)
 {
 }
 
@@ -423,8 +423,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp,
 	return mctx->mc_gregs;
 }
 
-static void perf_callchain_user_32(struct pt_regs *regs,
-				   struct perf_callchain_entry *entry)
+static void perf_callchain_user_32(struct perf_callchain_entry *entry,
+				   struct pt_regs *regs)
 {
 	unsigned int sp, next_sp;
 	unsigned int next_ip;
@@ -471,32 +471,11 @@ static void perf_callchain_user_32(struct pt_regs *regs,
 	}
 }
 
-/*
- * Since we can't get PMU interrupts inside a PMU interrupt handler,
- * we don't need separate irq and nmi entries here.
- */
-static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain);
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-	struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain);
-
-	entry->nr = 0;
-
-	if (!user_mode(regs)) {
-		perf_callchain_kernel(regs, entry);
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-
-	if (regs) {
-		if (current_is_64bit())
-			perf_callchain_user_64(regs, entry);
-		else
-			perf_callchain_user_32(regs, entry);
-	}
-
-	return entry;
+	if (current_is_64bit())
+		perf_callchain_user_64(entry, regs);
+	else
+		perf_callchain_user_32(entry, regs);
 }
-- 
cgit v1.2.3


From f72c1a931e311bb7780fee19e41a89ac42cab50e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 1 Jul 2010 02:31:21 +0200
Subject: perf: Factorize callchain context handling

Store the kernel and user contexts from the generic layer instead
of archs, this gathers some repetitive code.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Tested-by: Will Deacon <will.deacon@arm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Borislav Petkov <bp@amd64.org>
---
 arch/powerpc/kernel/perf_callchain.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c
index f7a85ede8407..d05ae4204bbf 100644
--- a/arch/powerpc/kernel/perf_callchain.c
+++ b/arch/powerpc/kernel/perf_callchain.c
@@ -57,7 +57,6 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 
 	lr = regs->link;
 	sp = regs->gpr[1];
-	perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
 	perf_callchain_store(entry, regs->nip);
 
 	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
@@ -234,7 +233,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry *entry,
 	next_ip = regs->nip;
 	lr = regs->link;
 	sp = regs->gpr[1];
-	perf_callchain_store(entry, PERF_CONTEXT_USER);
 	perf_callchain_store(entry, next_ip);
 
 	for (;;) {
@@ -435,7 +433,6 @@ static void perf_callchain_user_32(struct perf_callchain_entry *entry,
 	next_ip = regs->nip;
 	lr = regs->link;
 	sp = regs->gpr[1];
-	perf_callchain_store(entry, PERF_CONTEXT_USER);
 	perf_callchain_store(entry, next_ip);
 
 	while (entry->nr < PERF_MAX_STACK_DEPTH) {
-- 
cgit v1.2.3


From 59482fe5959675f180469ae95e4fcd0a15920ced Mon Sep 17 00:00:00 2001
From: Akinobu Mita <akinobu.mita@gmail.com>
Date: Tue, 31 Aug 2010 17:43:51 +0900
Subject: powerpc/512x: fix clk_get() return value

clk_get() should return an ERR_PTR value on error, not NULL.

Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/512x/clock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c
index 5b243bd3eb3b..3dc2a8d262b8 100644
--- a/arch/powerpc/platforms/512x/clock.c
+++ b/arch/powerpc/platforms/512x/clock.c
@@ -57,7 +57,7 @@ static struct clk *mpc5121_clk_get(struct device *dev, const char *id)
 	int id_match = 0;
 
 	if (dev == NULL || id == NULL)
-		return NULL;
+		return clk;
 
 	mutex_lock(&clocks_mutex);
 	list_for_each_entry(p, &clocks, node) {
-- 
cgit v1.2.3


From 9b83ecb0a3cf1bf7ecf84359ddcfb9dd49646bf2 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 2 Aug 2010 20:08:34 +0000
Subject: powerpc: Optimise 64bit csum_partial

The main loop of csum_partial runs very slowly on recent POWER CPUs. After some
analysis on both POWER6 and POWER7 I came up with routine below. First we get
the source aligned to a double word, ignoring any odd alignment to keep things
simple. Then we do 64 bytes at a time, with an entry and exit limb of a further
64 bytes. On both POWER6 and POWER7 this should be as fast as we can go since
we are limited by the latency of the adde instructions.

To test this I forced checksumming on over loopback and ran socklib (a
simple TCP benchmark). On a POWER6 575 throughput improved by 11% with
this patch.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/lib/checksum_64.S | 193 ++++++++++++++++++++++++++++++++---------
 1 file changed, 153 insertions(+), 40 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index ef96c6c58efc..404d5a6e3387 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -65,55 +65,168 @@ _GLOBAL(csum_tcpudp_magic)
 	srwi	r3,r3,16
 	blr
 
+#define STACKFRAMESIZE 256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
 /*
  * Computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit).
  *
- * This code assumes at least halfword alignment, though the length
- * can be any number of bytes.  The sum is accumulated in r5.
- *
  * csum_partial(r3=buff, r4=len, r5=sum)
  */
 _GLOBAL(csum_partial)
-        subi	r3,r3,8		/* we'll offset by 8 for the loads */
-        srdi.	r6,r4,3         /* divide by 8 for doubleword count */
-        addic   r5,r5,0         /* clear carry */
-        beq	3f              /* if we're doing < 8 bytes */
-        andi.	r0,r3,2         /* aligned on a word boundary already? */
-        beq+	1f
-        lhz     r6,8(r3)        /* do 2 bytes to get aligned */
-        addi    r3,r3,2
-        subi    r4,r4,2
-        addc    r5,r5,r6
-        srdi.   r6,r4,3         /* recompute number of doublewords */
-        beq     3f              /* any left? */
-1:      mtctr   r6
-2:      ldu     r6,8(r3)        /* main sum loop */
-        adde    r5,r5,r6
-        bdnz    2b
-        andi.	r4,r4,7         /* compute bytes left to sum after doublewords */
-3:	cmpwi	0,r4,4		/* is at least a full word left? */
-	blt	4f
-	lwz	r6,8(r3)	/* sum this word */
+	addic	r0,r5,0			/* clear carry */
+
+	srdi.	r6,r4,3			/* less than 8 bytes? */
+	beq	.Lcsum_tail_word
+
+	/*
+	 * If only halfword aligned, align to a double word. Since odd
+	 * aligned addresses should be rare and they would require more
+	 * work to calculate the correct checksum, we ignore that case
+	 * and take the potential slowdown of unaligned loads.
+	 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
+	beq	.Lcsum_aligned
+
+	li	r7,4
+	sub	r6,r7,r6
+	mtctr	r6
+
+1:
+	lhz	r6,0(r3)		/* align to doubleword */
+	subi	r4,r4,2
+	addi	r3,r3,2
+	adde	r0,r0,r6
+	bdnz	1b
+
+.Lcsum_aligned:
+	/*
+	 * We unroll the loop such that each iteration is 64 bytes with an
+	 * entry and exit limb of 64 bytes, meaning a minimum size of
+	 * 128 bytes.
+	 */
+	srdi.	r6,r4,7
+	beq	.Lcsum_tail_doublewords		/* len < 128 */
+
+	srdi	r6,r4,6
+	subi	r6,r6,1
+	mtctr	r6
+
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(r14)(r1)
+	std	r15,STK_REG(r15)(r1)
+	std	r16,STK_REG(r16)(r1)
+
+	ld	r6,0(r3)
+	ld	r9,8(r3)
+
+	ld	r10,16(r3)
+	ld	r11,24(r3)
+
+	/*
+	 * On POWER6 and POWER7 back to back addes take 2 cycles because of
+	 * the XER dependency. This means the fastest this loop can go is
+	 * 16 cycles per iteration. The scheduling of the loop below has
+	 * been shown to hit this on both POWER6 and POWER7.
+	 */
+	.align 5
+2:
+	adde	r0,r0,r6
+	ld	r12,32(r3)
+	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+	ld	r15,48(r3)
+	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+
+	adde	r0,r0,r11
+
+	adde	r0,r0,r12
+
+	adde	r0,r0,r14
+
+	adde	r0,r0,r15
+	ld	r6,0(r3)
+	ld	r9,8(r3)
+
+	adde	r0,r0,r16
+	ld	r10,16(r3)
+	ld	r11,24(r3)
+	bdnz	2b
+
+
+	adde	r0,r0,r6
+	ld	r12,32(r3)
+	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+	ld	r15,48(r3)
+	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+	adde	r0,r0,r11
+	adde	r0,r0,r12
+	adde	r0,r0,r14
+	adde	r0,r0,r15
+	adde	r0,r0,r16
+
+	ld	r14,STK_REG(r14)(r1)
+	ld	r15,STK_REG(r15)(r1)
+	ld	r16,STK_REG(r16)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	andi.	r4,r4,63
+
+.Lcsum_tail_doublewords:		/* Up to 127 bytes to go */
+	srdi.	r6,r4,3
+	beq	.Lcsum_tail_word
+
+	mtctr	r6
+3:
+	ld	r6,0(r3)
+	addi	r3,r3,8
+	adde	r0,r0,r6
+	bdnz	3b
+
+	andi.	r4,r4,7
+
+.Lcsum_tail_word:			/* Up to 7 bytes to go */
+	srdi.	r6,r4,2
+	beq	.Lcsum_tail_halfword
+
+	lwz	r6,0(r3)
 	addi	r3,r3,4
+	adde	r0,r0,r6
 	subi	r4,r4,4
-	adde	r5,r5,r6
-4:	cmpwi	0,r4,2		/* is at least a halfword left? */
-        blt+	5f
-        lhz     r6,8(r3)        /* sum this halfword */
-        addi    r3,r3,2
-        subi    r4,r4,2
-        adde    r5,r5,r6
-5:	cmpwi	0,r4,1		/* is at least a byte left? */
-        bne+    6f
-        lbz     r6,8(r3)        /* sum this byte */
-        slwi    r6,r6,8         /* this byte is assumed to be the upper byte of a halfword */
-        adde    r5,r5,r6
-6:      addze	r5,r5		/* add in final carry */
-	rldicl  r4,r5,32,0      /* fold two 32-bit halves together */
-        add     r3,r4,r5
-        srdi    r3,r3,32
-        blr
+
+.Lcsum_tail_halfword:			/* Up to 3 bytes to go */
+	srdi.	r6,r4,1
+	beq	.Lcsum_tail_byte
+
+	lhz	r6,0(r3)
+	addi	r3,r3,2
+	adde	r0,r0,r6
+	subi	r4,r4,2
+
+.Lcsum_tail_byte:			/* Up to 1 byte to go */
+	andi.	r6,r4,1
+	beq	.Lcsum_finish
+
+	lbz	r6,0(r3)
+	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
+	adde	r0,r0,r9
+
+.Lcsum_finish:
+	addze	r0,r0			/* add in final carry */
+	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
+	add	r3,r4,r0
+	srdi	r3,r3,32
+	blr
 
 /*
  * Computes the checksum of a memory block at src, length len,
-- 
cgit v1.2.3


From fdd374b62ca4df144c0138359dcffa83df7a0ea8 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 2 Aug 2010 20:09:52 +0000
Subject: powerpc: Optimise 64bit csum_partial_copy_generic and add
 csum_and_copy_from_user

We use the same core loop as the new csum_partial, adding in the
stores and exception handling code. To keep things simple we do all the
exception fixup in csum_and_copy_from_user. This wrapper function is
modelled on the generic checksum code and is careful to always calculate
a complete checksum even if we only copied part of the data to userspace.

To test this I forced checksumming on over loopback and ran socklib (a
simple TCP benchmark). On a POWER6 575 throughput improved by 19% with
this patch. If I forced both the sender and receiver onto the same cpu
(with the hope of shifting the benchmark from being cache bandwidth limited
to cpu limited), adding this patch improved performance by 55%

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/checksum.h     |   7 +
 arch/powerpc/lib/Makefile               |   3 +-
 arch/powerpc/lib/checksum_64.S          | 289 ++++++++++++++++++++++----------
 arch/powerpc/lib/checksum_wrappers_64.c |  65 +++++++
 4 files changed, 276 insertions(+), 88 deletions(-)
 create mode 100644 arch/powerpc/lib/checksum_wrappers_64.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 7cdf358337cf..9ea58c0e7cfb 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -52,12 +52,19 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 extern __wsum csum_partial_copy_generic(const void *src, void *dst,
 					      int len, __wsum sum,
 					      int *src_err, int *dst_err);
+
+#ifdef __powerpc64__
+#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
+				      int len, __wsum sum, int *err_ptr);
+#else
 /*
  * the same as csum_partial, but copies from src to dst while it
  * checksums.
  */
 #define csum_partial_copy_from_user(src, dst, len, sum, errp)   \
         csum_partial_copy_generic((__force const void *)(src), (dst), (len), (sum), (errp), NULL)
+#endif
 
 #define csum_partial_copy_nocheck(src, dst, len, sum)   \
         csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5bb89c828070..ad4a36848f25 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -17,7 +17,8 @@ obj-$(CONFIG_PPC32)	+= div64.o copy_32.o
 obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
-			   memcpy_64.o usercopy_64.o mem_64.o string.o
+			   memcpy_64.o usercopy_64.o mem_64.o string.o \
+			   checksum_wrappers_64.o
 obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
 obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 404d5a6e3387..18245af38aea 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -228,115 +228,230 @@ _GLOBAL(csum_partial)
 	srdi	r3,r3,32
 	blr
 
+
+	.macro source
+100:
+	.section __ex_table,"a"
+	.align 3
+	.llong 100b,.Lsrc_error
+	.previous
+	.endm
+
+	.macro dest
+200:
+	.section __ex_table,"a"
+	.align 3
+	.llong 200b,.Ldest_error
+	.previous
+	.endm
+
 /*
  * Computes the checksum of a memory block at src, length len,
  * and adds in "sum" (32-bit), while copying the block to dst.
  * If an access exception occurs on src or dst, it stores -EFAULT
- * to *src_err or *dst_err respectively, and (for an error on
- * src) zeroes the rest of dst.
- *
- * This code needs to be reworked to take advantage of 64 bit sum+copy.
- * However, due to tokenring halfword alignment problems this will be very
- * tricky.  For now we'll leave it until we instrument it somehow.
+ * to *src_err or *dst_err respectively. The caller must take any action
+ * required in this case (zeroing memory, recalculating partial checksum etc).
  *
  * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err)
  */
 _GLOBAL(csum_partial_copy_generic)
-	addic	r0,r6,0
-	subi	r3,r3,4
-	subi	r4,r4,4
-	srwi.	r6,r5,2
-	beq	3f		/* if we're doing < 4 bytes */
-	andi.	r9,r4,2		/* Align dst to longword boundary */
-	beq+	1f
-81:	lhz	r6,4(r3)	/* do 2 bytes to get aligned */
-	addi	r3,r3,2
+	addic	r0,r6,0			/* clear carry */
+
+	srdi.	r6,r5,3			/* less than 8 bytes? */
+	beq	.Lcopy_tail_word
+
+	/*
+	 * If only halfword aligned, align to a double word. Since odd
+	 * aligned addresses should be rare and they would require more
+	 * work to calculate the correct checksum, we ignore that case
+	 * and take the potential slowdown of unaligned loads.
+	 *
+	 * If the source and destination are relatively unaligned we only
+	 * align the source. This keeps things simple.
+	 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
+	beq	.Lcopy_aligned
+
+	li	r7,4
+	sub	r6,r7,r6
+	mtctr	r6
+
+1:
+source;	lhz	r6,0(r3)		/* align to doubleword */
 	subi	r5,r5,2
-91:	sth	r6,4(r4)
-	addi	r4,r4,2
-	addc	r0,r0,r6
-	srwi.	r6,r5,2		/* # words to do */
-	beq	3f
-1:	mtctr	r6
-82:	lwzu	r6,4(r3)	/* the bdnz has zero overhead, so it should */
-92:	stwu	r6,4(r4)	/* be unnecessary to unroll this loop */
-	adde	r0,r0,r6
-	bdnz	82b
-	andi.	r5,r5,3
-3:	cmpwi	0,r5,2
-	blt+	4f
-83:	lhz	r6,4(r3)
 	addi	r3,r3,2
-	subi	r5,r5,2
-93:	sth	r6,4(r4)
+	adde	r0,r0,r6
+dest;	sth	r6,0(r4)
 	addi	r4,r4,2
+	bdnz	1b
+
+.Lcopy_aligned:
+	/*
+	 * We unroll the loop such that each iteration is 64 bytes with an
+	 * entry and exit limb of 64 bytes, meaning a minimum size of
+	 * 128 bytes.
+	 */
+	srdi.	r6,r5,7
+	beq	.Lcopy_tail_doublewords		/* len < 128 */
+
+	srdi	r6,r5,6
+	subi	r6,r6,1
+	mtctr	r6
+
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(r14)(r1)
+	std	r15,STK_REG(r15)(r1)
+	std	r16,STK_REG(r16)(r1)
+
+source;	ld	r6,0(r3)
+source;	ld	r9,8(r3)
+
+source;	ld	r10,16(r3)
+source;	ld	r11,24(r3)
+
+	/*
+	 * On POWER6 and POWER7 back to back addes take 2 cycles because of
+	 * the XER dependency. This means the fastest this loop can go is
+	 * 16 cycles per iteration. The scheduling of the loop below has
+	 * been shown to hit this on both POWER6 and POWER7.
+	 */
+	.align 5
+2:
+	adde	r0,r0,r6
+source;	ld	r12,32(r3)
+source;	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+source;	ld	r15,48(r3)
+source;	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+dest;	std	r6,0(r4)
+dest;	std	r9,8(r4)
+
+	adde	r0,r0,r11
+dest;	std	r10,16(r4)
+dest;	std	r11,24(r4)
+
+	adde	r0,r0,r12
+dest;	std	r12,32(r4)
+dest;	std	r14,40(r4)
+
+	adde	r0,r0,r14
+dest;	std	r15,48(r4)
+dest;	std	r16,56(r4)
+	addi	r4,r4,64
+
+	adde	r0,r0,r15
+source;	ld	r6,0(r3)
+source;	ld	r9,8(r3)
+
+	adde	r0,r0,r16
+source;	ld	r10,16(r3)
+source;	ld	r11,24(r3)
+	bdnz	2b
+
+
 	adde	r0,r0,r6
-4:	cmpwi	0,r5,1
-	bne+	5f
-84:	lbz	r6,4(r3)
-94:	stb	r6,4(r4)
-	slwi	r6,r6,8		/* Upper byte of word */
+source;	ld	r12,32(r3)
+source;	ld	r14,40(r3)
+
+	adde	r0,r0,r9
+source;	ld	r15,48(r3)
+source;	ld	r16,56(r3)
+	addi	r3,r3,64
+
+	adde	r0,r0,r10
+dest;	std	r6,0(r4)
+dest;	std	r9,8(r4)
+
+	adde	r0,r0,r11
+dest;	std	r10,16(r4)
+dest;	std	r11,24(r4)
+
+	adde	r0,r0,r12
+dest;	std	r12,32(r4)
+dest;	std	r14,40(r4)
+
+	adde	r0,r0,r14
+dest;	std	r15,48(r4)
+dest;	std	r16,56(r4)
+	addi	r4,r4,64
+
+	adde	r0,r0,r15
+	adde	r0,r0,r16
+
+	ld	r14,STK_REG(r14)(r1)
+	ld	r15,STK_REG(r15)(r1)
+	ld	r16,STK_REG(r16)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	andi.	r5,r5,63
+
+.Lcopy_tail_doublewords:		/* Up to 127 bytes to go */
+	srdi.	r6,r5,3
+	beq	.Lcopy_tail_word
+
+	mtctr	r6
+3:
+source;	ld	r6,0(r3)
+	addi	r3,r3,8
 	adde	r0,r0,r6
-5:	addze	r3,r0		/* add in final carry (unlikely with 64-bit regs) */
-        rldicl  r4,r3,32,0      /* fold 64 bit value */
-        add     r3,r4,r3
-        srdi    r3,r3,32
-	blr
+dest;	std	r6,0(r4)
+	addi	r4,r4,8
+	bdnz	3b
 
-/* These shouldn't go in the fixup section, since that would
-   cause the ex_table addresses to get out of order. */
+	andi.	r5,r5,7
 
-	.globl src_error_1
-src_error_1:
-	li	r6,0
-	subi	r5,r5,2
-95:	sth	r6,4(r4)
+.Lcopy_tail_word:			/* Up to 7 bytes to go */
+	srdi.	r6,r5,2
+	beq	.Lcopy_tail_halfword
+
+source;	lwz	r6,0(r3)
+	addi	r3,r3,4
+	adde	r0,r0,r6
+dest;	stw	r6,0(r4)
+	addi	r4,r4,4
+	subi	r5,r5,4
+
+.Lcopy_tail_halfword:			/* Up to 3 bytes to go */
+	srdi.	r6,r5,1
+	beq	.Lcopy_tail_byte
+
+source;	lhz	r6,0(r3)
+	addi	r3,r3,2
+	adde	r0,r0,r6
+dest;	sth	r6,0(r4)
 	addi	r4,r4,2
-	srwi.	r6,r5,2
-	beq	3f
-	mtctr	r6
-	.globl src_error_2
-src_error_2:
-	li	r6,0
-96:	stwu	r6,4(r4)
-	bdnz	96b
-3:	andi.	r5,r5,3
-	beq	src_error
-	.globl src_error_3
-src_error_3:
-	li	r6,0
-	mtctr	r5
-	addi	r4,r4,3
-97:	stbu	r6,1(r4)
-	bdnz	97b
-	.globl src_error
-src_error:
+	subi	r5,r5,2
+
+.Lcopy_tail_byte:			/* Up to 1 byte to go */
+	andi.	r6,r5,1
+	beq	.Lcopy_finish
+
+source;	lbz	r6,0(r3)
+	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
+	adde	r0,r0,r9
+dest;	stb	r6,0(r4)
+
+.Lcopy_finish:
+	addze	r0,r0			/* add in final carry */
+	rldicl	r4,r0,32,0		/* fold two 32 bit halves together */
+	add	r3,r4,r0
+	srdi	r3,r3,32
+	blr
+
+.Lsrc_error:
 	cmpdi	0,r7,0
-	beq	1f
+	beqlr
 	li	r6,-EFAULT
 	stw	r6,0(r7)
-1:	addze	r3,r0
 	blr
 
-	.globl dst_error
-dst_error:
+.Ldest_error:
 	cmpdi	0,r8,0
-	beq	1f
+	beqlr
 	li	r6,-EFAULT
 	stw	r6,0(r8)
-1:	addze	r3,r0
 	blr
-
-.section __ex_table,"a"
-	.align  3
-	.llong	81b,src_error_1
-	.llong	91b,dst_error
-	.llong	82b,src_error_2
-	.llong	92b,dst_error
-	.llong	83b,src_error_3
-	.llong	93b,dst_error
-	.llong	84b,src_error_3
-	.llong	94b,dst_error
-	.llong	95b,dst_error
-	.llong	96b,dst_error
-	.llong	97b,dst_error
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c
new file mode 100644
index 000000000000..614cff1a8e0e
--- /dev/null
+++ b/arch/powerpc/lib/checksum_wrappers_64.c
@@ -0,0 +1,65 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2010
+ *
+ * Author: Anton Blanchard <anton@au.ibm.com>
+ */
+#include <linux/module.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+
+__wsum csum_and_copy_from_user(const void __user *src, void *dst,
+			       int len, __wsum sum, int *err_ptr)
+{
+	unsigned int csum;
+
+	might_sleep();
+
+	*err_ptr = 0;
+
+	if (!len) {
+		csum = 0;
+		goto out;
+	}
+
+	if (unlikely((len < 0) || !access_ok(VERIFY_READ, src, len))) {
+		*err_ptr = -EFAULT;
+		csum = (__force unsigned int)sum;
+		goto out;
+	}
+
+	csum = csum_partial_copy_generic((void __force *)src, dst,
+					 len, sum, err_ptr, NULL);
+
+	if (unlikely(*err_ptr)) {
+		int missing = __copy_from_user(dst, src, len);
+
+		if (missing) {
+			memset(dst + len - missing, 0, missing);
+			*err_ptr = -EFAULT;
+		} else {
+			*err_ptr = 0;
+		}
+
+		csum = csum_partial(dst, len, sum);
+	}
+
+out:
+	return (__force __wsum)csum;
+}
+EXPORT_SYMBOL(csum_and_copy_from_user);
-- 
cgit v1.2.3


From 8c77391475bc3284a380fc46aaf0bcf26bde3ae6 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 2 Aug 2010 20:11:36 +0000
Subject: powerpc: Add 64bit csum_and_copy_to_user

This adds the equivalent of csum_and_copy_from_user for the receive side so we
can copy and checksum in one pass. It is modelled on the generic checksum
routine.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/checksum.h     |  3 +++
 arch/powerpc/lib/checksum_wrappers_64.c | 37 +++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 9ea58c0e7cfb..ce0c28495f9a 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -57,6 +57,9 @@ extern __wsum csum_partial_copy_generic(const void *src, void *dst,
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 				      int len, __wsum sum, int *err_ptr);
+#define HAVE_CSUM_COPY_USER
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
+				    int len, __wsum sum, int *err_ptr);
 #else
 /*
  * the same as csum_partial, but copies from src to dst while it
diff --git a/arch/powerpc/lib/checksum_wrappers_64.c b/arch/powerpc/lib/checksum_wrappers_64.c
index 614cff1a8e0e..769b817fbb32 100644
--- a/arch/powerpc/lib/checksum_wrappers_64.c
+++ b/arch/powerpc/lib/checksum_wrappers_64.c
@@ -63,3 +63,40 @@ out:
 	return (__force __wsum)csum;
 }
 EXPORT_SYMBOL(csum_and_copy_from_user);
+
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
+			     __wsum sum, int *err_ptr)
+{
+	unsigned int csum;
+
+	might_sleep();
+
+	*err_ptr = 0;
+
+	if (!len) {
+		csum = 0;
+		goto out;
+	}
+
+	if (unlikely((len < 0) || !access_ok(VERIFY_WRITE, dst, len))) {
+		*err_ptr = -EFAULT;
+		csum = -1; /* invalid checksum */
+		goto out;
+	}
+
+	csum = csum_partial_copy_generic(src, (void __force *)dst,
+					 len, sum, NULL, err_ptr);
+
+	if (unlikely(*err_ptr)) {
+		csum = csum_partial(src, len, sum);
+
+		if (copy_to_user(dst, src, len)) {
+			*err_ptr = -EFAULT;
+			csum = -1; /* invalid checksum */
+		}
+	}
+
+out:
+	return (__force __wsum)csum;
+}
+EXPORT_SYMBOL(csum_and_copy_to_user);
-- 
cgit v1.2.3


From f89451fbd2b9f28f5ff156154989599ec062354b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 11 Aug 2010 01:40:27 +0000
Subject: powerpc: Feature nop out reservation clear when stcx checks address

The POWER architecture does not require stcx to check that it is operating
on the same address as the larx. This means it is possible for an
an exception handler to execute a larx, get a reservation, decide
not to do the stcx and then return back with an active reservation. If the
interrupted code was in the middle of a larx/stcx sequence the stcx could
incorrectly succeed.

All recent POWER CPUs check the address before letting the stcx succeed
so we can create a CPU feature and nop it out. As Ben suggested, we can
only do this in our syscall path because there is a remote possibility
some kernel code gets interrupted by an exception that ends up operating
on the same cacheline.

Thanks to Paul Mackerras and Derek Williams for the idea.

To test this I used a very simple null syscall (actually getppid) testcase
at http://ozlabs.org/~anton/junkcode/null_syscall.c

I tested against 2.6.35-git10 with the following changes against the
pseries_defconfig:

CONFIG_VIRT_CPU_ACCOUNTING=n
CONFIG_AUDIT=n
CONFIG_PPC_4K_PAGES=n
CONFIG_PPC_64K_PAGES=y
CONFIG_FORCE_MAX_ZONEORDER=9
CONFIG_PPC_SUBPAGE_PROT=n
CONFIG_FUNCTION_TRACER=n
CONFIG_FUNCTION_GRAPH_TRACER=n
CONFIG_IRQSOFF_TRACER=n
CONFIG_STACK_TRACER=n

to remove the overhead of virtual CPU accounting, syscall auditing and
the ftrace mcount tracers. 64kB pages were enabled to minimise TLB misses.

POWER6: +8.2%
POWER7: +7.0%

Another suggestion was to use a larx to something in the L1 instead of a stcx.
This was almost as fast as removing the larx on POWER6, but only 3.5% faster
on POWER7. We can use this to speed up the reservation clear in our
exception exit code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/cputable.h | 14 +++++++++-----
 arch/powerpc/kernel/entry_64.S      | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 3a40a992e594..f3a1fdd9cf08 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -198,6 +198,7 @@ extern const char *powerpc_base_platform;
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0040000000000000)
 #define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0080000000000000)
 #define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0100000000000000)
+#define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0200000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -392,28 +393,31 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_MMCRA | CPU_FTR_CTRL)
 #define CPU_FTRS_POWER4	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
-	    CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ)
+	    CPU_FTR_MMCRA | CPU_FTR_CP_USE_DCBTZ | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS)
 #define CPU_FTRS_PPC970	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA | \
-	    CPU_FTR_CP_USE_DCBTZ)
+	    CPU_FTR_CP_USE_DCBTZ | CPU_FTR_STCX_CHECKS_ADDRESS)
 #define CPU_FTRS_POWER5	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
-	    CPU_FTR_PURR)
+	    CPU_FTR_PURR | CPU_FTR_STCX_CHECKS_ADDRESS)
 #define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD)
+	    CPU_FTR_DSCR | CPU_FTR_UNALIGNED_LD_STD | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS)
 #define CPU_FTRS_POWER7 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT)
+	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
+	    CPU_FTR_STCX_CHECKS_ADDRESS)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 42e9d908914a..4d5fa12ca6e8 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -202,7 +202,9 @@ syscall_exit:
 	bge-	syscall_error
 syscall_error_cont:
 	ld	r7,_NIP(r1)
+BEGIN_FTR_SECTION
 	stdcx.	r0,0,r1			/* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	andi.	r6,r8,MSR_PR
 	ld	r4,_LINK(r1)
 	/*
@@ -419,6 +421,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	sync
 #endif /* CONFIG_SMP */
 
+	/*
+	 * If we optimise away the clear of the reservation in system
+	 * calls because we know the CPU tracks the address of the
+	 * reservation, then we need to clear it here to cover the
+	 * case that the kernel context switch path has no larx
+	 * instructions.
+	 */
+BEGIN_FTR_SECTION
+	ldarx	r6,0,r1
+END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
+
 	addi	r6,r4,-THREAD	/* Convert THREAD to 'current' */
 	std	r6,PACACURRENT(r13)	/* Set new 'current' */
 
@@ -576,7 +589,16 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES)
 	andi.	r0,r3,MSR_RI
 	beq-	unrecov_restore
 
+	/*
+	 * Clear the reservation. If we know the CPU tracks the address of
+	 * the reservation then we can potentially save some cycles and use
+	 * a larx. On POWER6 and POWER7 this is significantly faster.
+	 */
+BEGIN_FTR_SECTION
 	stdcx.	r0,0,r1		/* to clear the reservation */
+FTR_SECTION_ELSE
+	ldarx	r4,0,r1
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
 	/*
 	 * Clear RI before restoring r13.  If we are returning to
-- 
cgit v1.2.3


From 28b549905b239357db7c249e261857c1716db05a Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Tue, 24 Aug 2010 13:15:28 +0000
Subject: powerpc: Check end of stack canary at oops time

Add a check for the stack canary when we oops, similar to x86. This should make
it clear that we overran our stack:

Unable to handle kernel paging request for data at address 0x24652f63700ac689
Faulting instruction address: 0xc000000000063d24
Thread overran stack, or stack corrupted

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/fault.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 1bd712c33ce2..54f4fb994e99 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,6 +30,7 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
+#include <linux/magic.h>
 
 #include <asm/firmware.h>
 #include <asm/page.h>
@@ -385,6 +386,7 @@ do_sigbus:
 void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 {
 	const struct exception_table_entry *entry;
+	unsigned long *stackend;
 
 	/* Are we prepared to handle this fault?  */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -413,5 +415,9 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 	printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
 		regs->nip);
 
+	stackend = end_of_stack(current);
+	if (current != &init_task && *stackend != STACK_END_MAGIC)
+		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
+
 	die("Kernel access of bad area", regs, sig);
 }
-- 
cgit v1.2.3


From e1f0ece113fe028593b6869fe191a991322c5d85 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Tue, 10 Aug 2010 20:02:05 +0000
Subject: powerpc: Move arch_sd_sibling_asym_packing() to smp.c

Simple cleanup by moving arch_sd_sibling_asym_packing from process.c to
smp.c to save an #ifdef CONFIG_SMP

No functionality change.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/process.c | 11 -----------
 arch/powerpc/kernel/smp.c     |  9 +++++++++
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index b1c648a36b03..37bc8ff16cac 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1298,14 +1298,3 @@ unsigned long randomize_et_dyn(unsigned long base)
 
 	return ret;
 }
-
-#ifdef CONFIG_SMP
-int arch_sd_sibling_asym_packing(void)
-{
-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-		return SD_ASYM_PACKING;
-	}
-	return 0;
-}
-#endif
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0008bc58e826..9019f0f1bb5e 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -580,6 +580,15 @@ void __init smp_cpus_done(unsigned int max_cpus)
 	dump_numa_cpu_topology();
 }
 
+int arch_sd_sibling_asym_packing(void)
+{
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+		return SD_ASYM_PACKING;
+	}
+	return 0;
+}
+
 #ifdef CONFIG_HOTPLUG_CPU
 int __cpu_disable(void)
 {
-- 
cgit v1.2.3


From 8154c5d22d91cd16bd9985b0638c8957e4688d0e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 12 Aug 2010 20:18:15 +0000
Subject: powerpc: Abstract indexing of lppaca structs

Currently we have the lppaca structs as a simple array of NR_CPUS
entries, taking up space in the data section of the kernel image.
In future we would like to allocate them dynamically, so this
abstracts out the accesses to the array, making it easier to
change how we locate the lppaca for a given cpu in future.
Specifically, lppaca[cpu] changes to lppaca_of(cpu).

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/lppaca.h     |  2 ++
 arch/powerpc/kernel/lparcfg.c         | 14 +++++++-------
 arch/powerpc/lib/locks.c              |  4 ++--
 arch/powerpc/platforms/iseries/dt.c   |  4 ++--
 arch/powerpc/platforms/iseries/smp.c  |  2 +-
 arch/powerpc/platforms/pseries/dtl.c  |  8 ++++----
 arch/powerpc/platforms/pseries/lpar.c |  4 ++--
 7 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 14b592dfb4e8..6b73554433a0 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -153,6 +153,8 @@ struct lppaca {
 
 extern struct lppaca lppaca[];
 
+#define lppaca_of(cpu)	(lppaca[cpu])
+
 /*
  * SLB shadow buffer structure as defined in the PAPR.  The save_area
  * contains adjacent ESID and VSID pairs for each shadowed SLB.  The
diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 50362b6ef6e9..8d9e3b9cda64 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -56,7 +56,7 @@ static unsigned long get_purr(void)
 
 	for_each_possible_cpu(cpu) {
 		if (firmware_has_feature(FW_FEATURE_ISERIES))
-			sum_purr += lppaca[cpu].emulated_time_base;
+			sum_purr += lppaca_of(cpu).emulated_time_base;
 		else {
 			struct cpu_usage *cu;
 
@@ -263,7 +263,7 @@ static void parse_ppp_data(struct seq_file *m)
 	           ppp_data.active_system_procs);
 
 	/* pool related entries are apropriate for shared configs */
-	if (lppaca[0].shared_proc) {
+	if (lppaca_of(0).shared_proc) {
 		unsigned long pool_idle_time, pool_procs;
 
 		seq_printf(m, "pool=%d\n", ppp_data.pool_num);
@@ -460,8 +460,8 @@ static void pseries_cmo_data(struct seq_file *m)
 		return;
 
 	for_each_possible_cpu(cpu) {
-		cmo_faults += lppaca[cpu].cmo_faults;
-		cmo_fault_time += lppaca[cpu].cmo_fault_time;
+		cmo_faults += lppaca_of(cpu).cmo_faults;
+		cmo_fault_time += lppaca_of(cpu).cmo_fault_time;
 	}
 
 	seq_printf(m, "cmo_faults=%lu\n", cmo_faults);
@@ -479,8 +479,8 @@ static void splpar_dispatch_data(struct seq_file *m)
 	unsigned long dispatch_dispersions = 0;
 
 	for_each_possible_cpu(cpu) {
-		dispatches += lppaca[cpu].yield_count;
-		dispatch_dispersions += lppaca[cpu].dispersion_count;
+		dispatches += lppaca_of(cpu).yield_count;
+		dispatch_dispersions += lppaca_of(cpu).dispersion_count;
 	}
 
 	seq_printf(m, "dispatches=%lu\n", dispatches);
@@ -545,7 +545,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v)
 	seq_printf(m, "partition_potential_processors=%d\n",
 		   partition_potential_processors);
 
-	seq_printf(m, "shared_processor_mode=%d\n", lppaca[0].shared_proc);
+	seq_printf(m, "shared_processor_mode=%d\n", lppaca_of(0).shared_proc);
 
 	seq_printf(m, "slb_size=%d\n", mmu_slb_size);
 
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 58e14fba11b1..9b8182e82166 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -34,7 +34,7 @@ void __spin_yield(arch_spinlock_t *lock)
 		return;
 	holder_cpu = lock_value & 0xffff;
 	BUG_ON(holder_cpu >= NR_CPUS);
-	yield_count = lppaca[holder_cpu].yield_count;
+	yield_count = lppaca_of(holder_cpu).yield_count;
 	if ((yield_count & 1) == 0)
 		return;		/* virtual cpu is currently running */
 	rmb();
@@ -65,7 +65,7 @@ void __rw_yield(arch_rwlock_t *rw)
 		return;		/* no write lock at present */
 	holder_cpu = lock_value & 0xffff;
 	BUG_ON(holder_cpu >= NR_CPUS);
-	yield_count = lppaca[holder_cpu].yield_count;
+	yield_count = lppaca_of(holder_cpu).yield_count;
 	if ((yield_count & 1) == 0)
 		return;		/* virtual cpu is currently running */
 	rmb();
diff --git a/arch/powerpc/platforms/iseries/dt.c b/arch/powerpc/platforms/iseries/dt.c
index 7f45a51fe793..fdb7384c0c4f 100644
--- a/arch/powerpc/platforms/iseries/dt.c
+++ b/arch/powerpc/platforms/iseries/dt.c
@@ -243,7 +243,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
 	pft_size[1] = __ilog2(HvCallHpt_getHptPages() * HW_PAGE_SIZE);
 
 	for (i = 0; i < NR_CPUS; i++) {
-		if (lppaca[i].dyn_proc_status >= 2)
+		if (lppaca_of(i).dyn_proc_status >= 2)
 			continue;
 
 		snprintf(p, 32 - (p - buf), "@%d", i);
@@ -251,7 +251,7 @@ static void __init dt_cpus(struct iseries_flat_dt *dt)
 
 		dt_prop_str(dt, "device_type", device_type_cpu);
 
-		index = lppaca[i].dyn_hv_phys_proc_index;
+		index = lppaca_of(i).dyn_hv_phys_proc_index;
 		d = &xIoHriProcessorVpd[index];
 
 		dt_prop_u32(dt, "i-cache-size", d->xInstCacheSize * 1024);
diff --git a/arch/powerpc/platforms/iseries/smp.c b/arch/powerpc/platforms/iseries/smp.c
index 6590850045af..6c6029914dbc 100644
--- a/arch/powerpc/platforms/iseries/smp.c
+++ b/arch/powerpc/platforms/iseries/smp.c
@@ -91,7 +91,7 @@ static void smp_iSeries_kick_cpu(int nr)
 	BUG_ON((nr < 0) || (nr >= NR_CPUS));
 
 	/* Verify that our partition has a processor nr */
-	if (lppaca[nr].dyn_proc_status >= 2)
+	if (lppaca_of(nr).dyn_proc_status >= 2)
 		return;
 
 	/* The processor is currently spinning, waiting
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index a00addb55945..adfd5441b612 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -107,14 +107,14 @@ static int dtl_enable(struct dtl *dtl)
 	}
 
 	/* set our initial buffer indices */
-	dtl->last_idx = lppaca[dtl->cpu].dtl_idx = 0;
+	dtl->last_idx = lppaca_of(dtl->cpu).dtl_idx = 0;
 
 	/* ensure that our updates to the lppaca fields have occurred before
 	 * we actually enable the logging */
 	smp_wmb();
 
 	/* enable event logging */
-	lppaca[dtl->cpu].dtl_enable_mask = dtl_event_mask;
+	lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask;
 
 	return 0;
 }
@@ -123,7 +123,7 @@ static void dtl_disable(struct dtl *dtl)
 {
 	int hwcpu = get_hard_smp_processor_id(dtl->cpu);
 
-	lppaca[dtl->cpu].dtl_enable_mask = 0x0;
+	lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
 
 	unregister_dtl(hwcpu, __pa(dtl->buf));
 
@@ -171,7 +171,7 @@ static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
 	/* actual number of entries read */
 	n_read = 0;
 
-	cur_idx = lppaca[dtl->cpu].dtl_idx;
+	cur_idx = lppaca_of(dtl->cpu).dtl_idx;
 	last_idx = dtl->last_idx;
 
 	if (cur_idx - last_idx > dtl->buf_entries) {
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index cf79b46d8f88..a17fe4a9059f 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -250,9 +250,9 @@ void vpa_init(int cpu)
 	long ret;
 
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
-		lppaca[cpu].vmxregs_in_use = 1;
+		lppaca_of(cpu).vmxregs_in_use = 1;
 
-	addr = __pa(&lppaca[cpu]);
+	addr = __pa(&lppaca_of(cpu));
 	ret = register_vpa(hwcpu, addr);
 
 	if (ret) {
-- 
cgit v1.2.3


From 93c22703efa72c7527dbd586d1951c1f4a85fd70 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 12 Aug 2010 20:18:48 +0000
Subject: powerpc: Dynamically allocate most lppaca structs

This arranges for the lppaca structs for most cpus to be dynamically
allocated in the same manner as the paca structs.  If we don't include
support for legacy iSeries, only the first lppaca is statically
allocated; the rest are dynamically allocated.  If we include legacy
iSeries support, then we statically allocate the first 64 lppaca
structs, since the iSeries hypervisor requires that the lppaca
structs be present in the data section of the kernel image, but
legacy iSeries supports at most 64 cpus.

With CONFIG_NR_CPUS, the kernel image size for a typical pSeries config
went from:

   text    data     bss     dec     hex filename
9524478 4734564 8469944 22728986        15ad11a ../test-1024/vmlinux

to:

   text    data     bss     dec     hex filename
9524482 3751508 8469944 21745934        14bd10e ../test-1024/vmlinux

a reduction of 983052 bytes overall.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/lppaca.h |  2 +-
 arch/powerpc/kernel/paca.c        | 70 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 69 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 6b73554433a0..6d02624b622c 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -153,7 +153,7 @@ struct lppaca {
 
 extern struct lppaca lppaca[];
 
-#define lppaca_of(cpu)	(lppaca[cpu])
+#define lppaca_of(cpu)	(*paca[cpu].lppaca_ptr)
 
 /*
  * SLB shadow buffer structure as defined in the PAPR.  The save_area
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index d0a26f1770fe..1e068a46e6c3 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -26,6 +26,20 @@ extern unsigned long __toc_start;
 
 #ifdef CONFIG_PPC_BOOK3S
 
+/*
+ * We only have to have statically allocated lppaca structs on
+ * legacy iSeries, which supports at most 64 cpus.
+ */
+#ifdef CONFIG_PPC_ISERIES
+#if NR_CPUS < 64
+#define NR_LPPACAS	NR_CPUS
+#else
+#define NR_LPPACAS	64
+#endif
+#else /* not iSeries */
+#define NR_LPPACAS	1
+#endif
+
 /*
  * The structure which the hypervisor knows about - this structure
  * should not cross a page boundary.  The vpa_init/register_vpa call
@@ -36,7 +50,7 @@ extern unsigned long __toc_start;
  * will suffice to ensure that it doesn't cross a page boundary.
  */
 struct lppaca lppaca[] = {
-	[0 ... (NR_CPUS-1)] = {
+	[0 ... (NR_LPPACAS-1)] = {
 		.desc = 0xd397d781,	/* "LpPa" */
 		.size = sizeof(struct lppaca),
 		.dyn_proc_status = 2,
@@ -49,6 +63,54 @@ struct lppaca lppaca[] = {
 	},
 };
 
+static struct lppaca *extra_lppacas;
+static long __initdata lppaca_size;
+
+static void allocate_lppacas(int nr_cpus, unsigned long limit)
+{
+	if (nr_cpus <= NR_LPPACAS)
+		return;
+
+	lppaca_size = PAGE_ALIGN(sizeof(struct lppaca) *
+				 (nr_cpus - NR_LPPACAS));
+	extra_lppacas = __va(memblock_alloc_base(lppaca_size,
+						 PAGE_SIZE, limit));
+}
+
+static struct lppaca *new_lppaca(int cpu)
+{
+	struct lppaca *lp;
+
+	if (cpu < NR_LPPACAS)
+		return &lppaca[cpu];
+
+	lp = extra_lppacas + (cpu - NR_LPPACAS);
+	*lp = lppaca[0];
+
+	return lp;
+}
+
+static void free_lppacas(void)
+{
+	long new_size = 0, nr;
+
+	if (!lppaca_size)
+		return;
+	nr = num_possible_cpus() - NR_LPPACAS;
+	if (nr > 0)
+		new_size = PAGE_ALIGN(nr * sizeof(struct lppaca));
+	if (new_size >= lppaca_size)
+		return;
+
+	memblock_free(__pa(extra_lppacas) + new_size, lppaca_size - new_size);
+	lppaca_size = new_size;
+}
+
+#else
+
+static inline void allocate_lppacas(int, unsigned long) { }
+static inline void free_lppacas(void) { }
+
 #endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_STD_MMU_64
@@ -88,7 +150,7 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
 	unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL;
 
 #ifdef CONFIG_PPC_BOOK3S
-	new_paca->lppaca_ptr = &lppaca[cpu];
+	new_paca->lppaca_ptr = new_lppaca(cpu);
 #else
 	new_paca->kernel_pgd = swapper_pg_dir;
 #endif
@@ -144,6 +206,8 @@ void __init allocate_pacas(void)
 	printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n",
 		paca_size, nr_cpus, paca);
 
+	allocate_lppacas(nr_cpus, limit);
+
 	/* Can't use for_each_*_cpu, as they aren't functional yet */
 	for (cpu = 0; cpu < nr_cpus; cpu++)
 		initialise_paca(&paca[cpu], cpu);
@@ -164,4 +228,6 @@ void __init free_unused_pacas(void)
 		paca_size - new_size);
 
 	paca_size = new_size;
+
+	free_lppacas();
 }
-- 
cgit v1.2.3


From cf9efce0ce3136fa076f53e53154e98455229514 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 26 Aug 2010 19:56:43 +0000
Subject: powerpc: Account time using timebase rather than PURR

Currently, when CONFIG_VIRT_CPU_ACCOUNTING is enabled, we use the
PURR register for measuring the user and system time used by
processes, as well as other related times such as hardirq and
softirq times.  This turns out to be quite confusing for users
because it means that a program will often be measured as taking
less time when run on a multi-threaded processor (SMT2 or SMT4 mode)
than it does when run on a single-threaded processor (ST mode), even
though the program takes longer to finish.  The discrepancy is
accounted for as stolen time, which is also confusing, particularly
when there are no other partitions running.

This changes the accounting to use the timebase instead, meaning that
the reported user and system times are the actual number of real-time
seconds that the program was executing on the processor thread,
regardless of which SMT mode the processor is in.  Thus a program will
generally show greater user and system times when run on a
multi-threaded processor than on a single-threaded processor.

On pSeries systems on POWER5 or later processors, we measure the
stolen time (time when this partition wasn't running) using the
hypervisor dispatch trace log.  We check for new entries in the
log on every entry from user mode and on every transition from
kernel process context to soft or hard IRQ context (i.e. when
account_system_vtime() gets called).  So that we can correctly
distinguish time stolen from user time and time stolen from system
time, without having to check the log on every exit to user mode,
we store separate timestamps for exit to user mode and entry from
user mode.

On systems that have a SPURR (POWER6 and POWER7), we read the SPURR
in account_system_vtime() (as before), and then apportion the SPURR
ticks since the last time we read it between scaled user time and
scaled system time according to the relative proportions of user
time and system time over the same interval.  This avoids having to
read the SPURR on every kernel entry and exit.  On systems that have
PURR but not SPURR (i.e., POWER5), we do the same using the PURR
rather than the SPURR.

This disables the DTL user interface in /sys/debug/kernel/powerpc/dtl
for now since it conflicts with the use of the dispatch trace log
by the time accounting code.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/exception-64s.h |   3 +-
 arch/powerpc/include/asm/lppaca.h        |  19 +++
 arch/powerpc/include/asm/paca.h          |  10 +-
 arch/powerpc/include/asm/ppc_asm.h       |  50 +++---
 arch/powerpc/include/asm/time.h          |   5 -
 arch/powerpc/kernel/asm-offsets.c        |   8 +-
 arch/powerpc/kernel/entry_64.S           |  18 +++
 arch/powerpc/kernel/process.c            |   1 -
 arch/powerpc/kernel/smp.c                |   5 -
 arch/powerpc/kernel/time.c               | 268 +++++++++++++++----------------
 arch/powerpc/platforms/pseries/dtl.c     |  24 +--
 arch/powerpc/platforms/pseries/lpar.c    |  21 +++
 arch/powerpc/platforms/pseries/setup.c   |  52 ++++++
 13 files changed, 290 insertions(+), 194 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 57c400071995..7778d6f0c878 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -137,7 +137,8 @@
 	li	r10,0;							   \
 	ld	r11,exception_marker@toc(r2);				   \
 	std	r10,RESULT(r1);		/* clear regs->result		*/ \
-	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/
+	std	r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame	*/ \
+	ACCOUNT_STOLEN_TIME
 
 /*
  * Exception vectors.
diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index 6d02624b622c..cfb85ec85750 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -172,6 +172,25 @@ struct slb_shadow {
 
 extern struct slb_shadow slb_shadow[];
 
+/*
+ * Layout of entries in the hypervisor's dispatch trace log buffer.
+ */
+struct dtl_entry {
+	u8	dispatch_reason;
+	u8	preempt_reason;
+	u16	processor_id;
+	u32	enqueue_to_dispatch_time;
+	u32	ready_to_enqueue_time;
+	u32	waiting_to_ready_time;
+	u64	timebase;
+	u64	fault_addr;
+	u64	srr0;
+	u64	srr1;
+};
+
+#define DISPATCH_LOG_BYTES	4096	/* bytes per cpu */
+#define N_DISPATCH_LOG		(DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
+
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..6af6c1613409 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -85,6 +85,8 @@ struct paca_struct {
 	u8 kexec_state;		/* set when kexec down has irqs off */
 #ifdef CONFIG_PPC_STD_MMU_64
 	struct slb_shadow *slb_shadow_ptr;
+	struct dtl_entry *dispatch_log;
+	struct dtl_entry *dispatch_log_end;
 
 	/*
 	 * Now, starting in cacheline 2, the exception save areas
@@ -134,8 +136,14 @@ struct paca_struct {
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
 	u64 system_time;		/* accumulated system TB ticks */
-	u64 startpurr;			/* PURR/TB value snapshot */
+	u64 user_time_scaled;		/* accumulated usermode SPURR ticks */
+	u64 starttime;			/* TB value snapshot */
+	u64 starttime_user;		/* TB value on exit to usermode */
 	u64 startspurr;			/* SPURR value snapshot */
+	u64 utime_sspurr;		/* ->user_time when ->startspurr set */
+	u64 stolen_time;		/* TB ticks taken by hypervisor */
+	u64 dtl_ridx;			/* read index in dispatch log */
+	struct dtl_entry *dtl_curr;	/* pointer corresponding to dtl_ridx */
 
 #ifdef CONFIG_KVM_BOOK3S_HANDLER
 	/* We use this to store guest state in */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 498fe09263d3..98210067c1cc 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -9,6 +9,7 @@
 #include <asm/asm-compat.h>
 #include <asm/processor.h>
 #include <asm/ppc-opcode.h>
+#include <asm/firmware.h>
 
 #ifndef __ASSEMBLY__
 #error __FILE__ should only be used in assembler files
@@ -26,17 +27,13 @@
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)
+#define ACCOUNT_STOLEN_TIME
 #else
 #define ACCOUNT_CPU_USER_ENTRY(ra, rb)					\
 	beq	2f;			/* if from kernel mode */	\
-BEGIN_FTR_SECTION;							\
-	mfspr	ra,SPRN_PURR;		/* get processor util. reg */	\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);					\
-BEGIN_FTR_SECTION;							\
-	MFTB(ra);			/* or get TB if no PURR */	\
-END_FTR_SECTION_IFCLR(CPU_FTR_PURR);					\
-	ld	rb,PACA_STARTPURR(r13);					\
-	std	ra,PACA_STARTPURR(r13);					\
+	MFTB(ra);			/* get timebase */		\
+	ld	rb,PACA_STARTTIME_USER(r13);				\
+	std	ra,PACA_STARTTIME(r13);					\
 	subf	rb,rb,ra;		/* subtract start value */	\
 	ld	ra,PACA_USER_TIME(r13);					\
 	add	ra,ra,rb;		/* add on to user time */	\
@@ -44,19 +41,34 @@ END_FTR_SECTION_IFCLR(CPU_FTR_PURR);					\
 2:
 
 #define ACCOUNT_CPU_USER_EXIT(ra, rb)					\
-BEGIN_FTR_SECTION;							\
-	mfspr	ra,SPRN_PURR;		/* get processor util. reg */	\
-END_FTR_SECTION_IFSET(CPU_FTR_PURR);					\
-BEGIN_FTR_SECTION;							\
-	MFTB(ra);			/* or get TB if no PURR */	\
-END_FTR_SECTION_IFCLR(CPU_FTR_PURR);					\
-	ld	rb,PACA_STARTPURR(r13);					\
-	std	ra,PACA_STARTPURR(r13);					\
+	MFTB(ra);			/* get timebase */		\
+	ld	rb,PACA_STARTTIME(r13);					\
+	std	ra,PACA_STARTTIME_USER(r13);				\
 	subf	rb,rb,ra;		/* subtract start value */	\
 	ld	ra,PACA_SYSTEM_TIME(r13);				\
-	add	ra,ra,rb;		/* add on to user time */	\
-	std	ra,PACA_SYSTEM_TIME(r13);
-#endif
+	add	ra,ra,rb;		/* add on to system time */	\
+	std	ra,PACA_SYSTEM_TIME(r13)
+
+#ifdef CONFIG_PPC_SPLPAR
+#define ACCOUNT_STOLEN_TIME						\
+BEGIN_FW_FTR_SECTION;							\
+	beq	33f;							\
+	/* from user - see if there are any DTL entries to process */	\
+	ld	r10,PACALPPACAPTR(r13);	/* get ptr to VPA */		\
+	ld	r11,PACA_DTL_RIDX(r13);	/* get log read index */	\
+	ld	r10,LPPACA_DTLIDX(r10);	/* get log write index */	\
+	cmpd	cr1,r11,r10;						\
+	beq+	cr1,33f;						\
+	bl	.accumulate_stolen_time;				\
+33:									\
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
+
+#else  /* CONFIG_PPC_SPLPAR */
+#define ACCOUNT_STOLEN_TIME
+
+#endif /* CONFIG_PPC_SPLPAR */
+
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
 /*
  * Macros for storing registers into and loading registers from
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index dc779dfcf258..fe6f7c2c9c68 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -34,7 +34,6 @@ extern void to_tm(int tim, struct rtc_time * tm);
 extern void GregorianDay(struct rtc_time *tm);
 
 extern void generic_calibrate_decr(void);
-extern void snapshot_timebase(void);
 
 extern void set_dec_cpu6(unsigned int val);
 
@@ -212,12 +211,8 @@ struct cpu_usage {
 DECLARE_PER_CPU(struct cpu_usage, cpu_usage_array);
 
 #if defined(CONFIG_VIRT_CPU_ACCOUNTING)
-extern void calculate_steal_time(void);
-extern void snapshot_timebases(void);
 #define account_process_vtime(tsk)		account_process_tick(tsk, 0)
 #else
-#define calculate_steal_time()			do { } while (0)
-#define snapshot_timebases()			do { } while (0)
 #define account_process_vtime(tsk)		do { } while (0)
 #endif
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1c0607ddccc0..c63494090854 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -181,17 +181,19 @@ int main(void)
 	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].vsid));
 	DEFINE(SLBSHADOW_STACKESID,
 	       offsetof(struct slb_shadow, save_area[SLB_NUM_BOLTED - 1].esid));
+	DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
 	DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0));
 	DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1));
 	DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int));
 	DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int));
-	DEFINE(SLBSHADOW_SAVEAREA, offsetof(struct slb_shadow, save_area));
+	DEFINE(LPPACA_DTLIDX, offsetof(struct lppaca, dtl_idx));
+	DEFINE(PACA_DTL_RIDX, offsetof(struct paca_struct, dtl_ridx));
 #endif /* CONFIG_PPC_STD_MMU_64 */
 	DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp));
 	DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id));
 	DEFINE(PACAKEXECSTATE, offsetof(struct paca_struct, kexec_state));
-	DEFINE(PACA_STARTPURR, offsetof(struct paca_struct, startpurr));
-	DEFINE(PACA_STARTSPURR, offsetof(struct paca_struct, startspurr));
+	DEFINE(PACA_STARTTIME, offsetof(struct paca_struct, starttime));
+	DEFINE(PACA_STARTTIME_USER, offsetof(struct paca_struct, starttime_user));
 	DEFINE(PACA_USER_TIME, offsetof(struct paca_struct, user_time));
 	DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time));
 	DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save));
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 4d5fa12ca6e8..d82878c4daa6 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -97,6 +97,24 @@ system_call_common:
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 	ld	r11,exception_marker@toc(r2)
 	std	r11,-16(r9)		/* "regshere" marker */
+#if defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR)
+BEGIN_FW_FTR_SECTION
+	beq	33f
+	/* if from user, see if there are any DTL entries to process */
+	ld	r10,PACALPPACAPTR(r13)	/* get ptr to VPA */
+	ld	r11,PACA_DTL_RIDX(r13)	/* get log read index */
+	ld	r10,LPPACA_DTLIDX(r10)	/* get log write index */
+	cmpd	cr1,r11,r10
+	beq+	cr1,33f
+	bl	.accumulate_stolen_time
+	REST_GPR(0,r1)
+	REST_4GPRS(3,r1)
+	REST_2GPRS(7,r1)
+	addi	r9,r1,STACK_FRAME_OVERHEAD
+33:
+END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING && CONFIG_PPC_SPLPAR */
+
 #ifdef CONFIG_TRACE_IRQFLAGS
 	bl	.trace_hardirqs_on
 	REST_GPR(0,r1)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 37bc8ff16cac..84906d3fc860 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -517,7 +517,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	account_system_vtime(current);
 	account_process_vtime(current);
-	calculate_steal_time();
 
 	/*
 	 * We can't take a PMU exception inside _switch() since there is a
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9019f0f1bb5e..68034bbf2e4f 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -508,9 +508,6 @@ int __devinit start_secondary(void *unused)
 	if (smp_ops->take_timebase)
 		smp_ops->take_timebase();
 
-	if (system_state > SYSTEM_BOOTING)
-		snapshot_timebase();
-
 	secondary_cpu_time_init();
 
 	ipi_call_lock();
@@ -575,8 +572,6 @@ void __init smp_cpus_done(unsigned int max_cpus)
 
 	free_cpumask_var(old_mask);
 
-	snapshot_timebases();
-
 	dump_numa_cpu_topology();
 }
 
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..fca20643c368 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -164,8 +164,6 @@ unsigned long ppc_proc_freq;
 EXPORT_SYMBOL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
 
-static DEFINE_PER_CPU(u64, last_jiffy);
-
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
  * Factors for converting from cputime_t (timebase ticks) to
@@ -200,62 +198,151 @@ static void calc_cputime_factors(void)
 }
 
 /*
- * Read the PURR on systems that have it, otherwise the timebase.
+ * Read the SPURR on systems that have it, otherwise the PURR,
+ * or if that doesn't exist return the timebase value passed in.
  */
-static u64 read_purr(void)
+static u64 read_spurr(u64 tb)
 {
+	if (cpu_has_feature(CPU_FTR_SPURR))
+		return mfspr(SPRN_SPURR);
 	if (cpu_has_feature(CPU_FTR_PURR))
 		return mfspr(SPRN_PURR);
-	return mftb();
+	return tb;
 }
 
+#ifdef CONFIG_PPC_SPLPAR
+
 /*
- * Read the SPURR on systems that have it, otherwise the purr
+ * Scan the dispatch trace log and count up the stolen time.
+ * Should be called with interrupts disabled.
  */
-static u64 read_spurr(u64 purr)
+static u64 scan_dispatch_log(u64 stop_tb)
 {
-	/*
-	 * cpus without PURR won't have a SPURR
-	 * We already know the former when we use this, so tell gcc
-	 */
-	if (cpu_has_feature(CPU_FTR_PURR) && cpu_has_feature(CPU_FTR_SPURR))
-		return mfspr(SPRN_SPURR);
-	return purr;
+	unsigned long i = local_paca->dtl_ridx;
+	struct dtl_entry *dtl = local_paca->dtl_curr;
+	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
+	u64 tb_delta;
+	u64 stolen = 0;
+	u64 dtb;
+
+	if (i == vpa->dtl_idx)
+		return 0;
+	while (i < vpa->dtl_idx) {
+		dtb = dtl->timebase;
+		tb_delta = dtl->enqueue_to_dispatch_time +
+			dtl->ready_to_enqueue_time;
+		barrier();
+		if (i + N_DISPATCH_LOG < vpa->dtl_idx) {
+			/* buffer has overflowed */
+			i = vpa->dtl_idx - N_DISPATCH_LOG;
+			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
+			continue;
+		}
+		if (dtb > stop_tb)
+			break;
+		stolen += tb_delta;
+		++i;
+		++dtl;
+		if (dtl == dtl_end)
+			dtl = local_paca->dispatch_log;
+	}
+	local_paca->dtl_ridx = i;
+	local_paca->dtl_curr = dtl;
+	return stolen;
 }
 
+/*
+ * Accumulate stolen time by scanning the dispatch trace log.
+ * Called on entry from user mode.
+ */
+void accumulate_stolen_time(void)
+{
+	u64 sst, ust;
+
+	sst = scan_dispatch_log(get_paca()->starttime_user);
+	ust = scan_dispatch_log(get_paca()->starttime);
+	get_paca()->system_time -= sst;
+	get_paca()->user_time -= ust;
+	get_paca()->stolen_time += ust + sst;
+}
+
+static inline u64 calculate_stolen_time(u64 stop_tb)
+{
+	u64 stolen = 0;
+
+	if (get_paca()->dtl_ridx != get_paca()->lppaca_ptr->dtl_idx) {
+		stolen = scan_dispatch_log(stop_tb);
+		get_paca()->system_time -= stolen;
+	}
+
+	stolen += get_paca()->stolen_time;
+	get_paca()->stolen_time = 0;
+	return stolen;
+}
+
+#else /* CONFIG_PPC_SPLPAR */
+static inline u64 calculate_stolen_time(u64 stop_tb)
+{
+	return 0;
+}
+
+#endif /* CONFIG_PPC_SPLPAR */
+
 /*
  * Account time for a transition between system, hard irq
  * or soft irq state.
  */
 void account_system_vtime(struct task_struct *tsk)
 {
-	u64 now, nowscaled, delta, deltascaled, sys_time;
+	u64 now, nowscaled, delta, deltascaled;
 	unsigned long flags;
+	u64 stolen, udelta, sys_scaled, user_scaled;
 
 	local_irq_save(flags);
-	now = read_purr();
+	now = mftb();
 	nowscaled = read_spurr(now);
-	delta = now - get_paca()->startpurr;
+	get_paca()->system_time += now - get_paca()->starttime;
+	get_paca()->starttime = now;
 	deltascaled = nowscaled - get_paca()->startspurr;
-	get_paca()->startpurr = now;
 	get_paca()->startspurr = nowscaled;
-	if (!in_interrupt()) {
-		/* deltascaled includes both user and system time.
-		 * Hence scale it based on the purr ratio to estimate
-		 * the system time */
-		sys_time = get_paca()->system_time;
-		if (get_paca()->user_time)
-			deltascaled = deltascaled * sys_time /
-			     (sys_time + get_paca()->user_time);
-		delta += sys_time;
-		get_paca()->system_time = 0;
+
+	stolen = calculate_stolen_time(now);
+
+	delta = get_paca()->system_time;
+	get_paca()->system_time = 0;
+	udelta = get_paca()->user_time - get_paca()->utime_sspurr;
+	get_paca()->utime_sspurr = get_paca()->user_time;
+
+	/*
+	 * Because we don't read the SPURR on every kernel entry/exit,
+	 * deltascaled includes both user and system SPURR ticks.
+	 * Apportion these ticks to system SPURR ticks and user
+	 * SPURR ticks in the same ratio as the system time (delta)
+	 * and user time (udelta) values obtained from the timebase
+	 * over the same interval.  The system ticks get accounted here;
+	 * the user ticks get saved up in paca->user_time_scaled to be
+	 * used by account_process_tick.
+	 */
+	sys_scaled = delta;
+	user_scaled = udelta;
+	if (deltascaled != delta + udelta) {
+		if (udelta) {
+			sys_scaled = deltascaled * delta / (delta + udelta);
+			user_scaled = deltascaled - sys_scaled;
+		} else {
+			sys_scaled = deltascaled;
+		}
+	}
+	get_paca()->user_time_scaled += user_scaled;
+
+	if (in_irq() || idle_task(smp_processor_id()) != tsk) {
+		account_system_time(tsk, 0, delta, sys_scaled);
+		if (stolen)
+			account_steal_time(stolen);
+	} else {
+		account_idle_time(delta + stolen);
 	}
-	if (in_irq() || idle_task(smp_processor_id()) != tsk)
-		account_system_time(tsk, 0, delta, deltascaled);
-	else
-		account_idle_time(delta);
-	__get_cpu_var(cputime_last_delta) = delta;
-	__get_cpu_var(cputime_scaled_last_delta) = deltascaled;
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(account_system_vtime);
@@ -265,125 +352,26 @@ EXPORT_SYMBOL_GPL(account_system_vtime);
  * by the exception entry and exit code to the generic process
  * user and system time records.
  * Must be called with interrupts disabled.
+ * Assumes that account_system_vtime() has been called recently
+ * (i.e. since the last entry from usermode) so that
+ * get_paca()->user_time_scaled is up to date.
  */
 void account_process_tick(struct task_struct *tsk, int user_tick)
 {
 	cputime_t utime, utimescaled;
 
 	utime = get_paca()->user_time;
+	utimescaled = get_paca()->user_time_scaled;
 	get_paca()->user_time = 0;
-	utimescaled = cputime_to_scaled(utime);
+	get_paca()->user_time_scaled = 0;
+	get_paca()->utime_sspurr = 0;
 	account_user_time(tsk, utime, utimescaled);
 }
 
-/*
- * Stuff for accounting stolen time.
- */
-struct cpu_purr_data {
-	int	initialized;			/* thread is running */
-	u64	tb;			/* last TB value read */
-	u64	purr;			/* last PURR value read */
-	u64	spurr;			/* last SPURR value read */
-};
-
-/*
- * Each entry in the cpu_purr_data array is manipulated only by its
- * "owner" cpu -- usually in the timer interrupt but also occasionally
- * in process context for cpu online.  As long as cpus do not touch
- * each others' cpu_purr_data, disabling local interrupts is
- * sufficient to serialize accesses.
- */
-static DEFINE_PER_CPU(struct cpu_purr_data, cpu_purr_data);
-
-static void snapshot_tb_and_purr(void *data)
-{
-	unsigned long flags;
-	struct cpu_purr_data *p = &__get_cpu_var(cpu_purr_data);
-
-	local_irq_save(flags);
-	p->tb = get_tb_or_rtc();
-	p->purr = mfspr(SPRN_PURR);
-	wmb();
-	p->initialized = 1;
-	local_irq_restore(flags);
-}
-
-/*
- * Called during boot when all cpus have come up.
- */
-void snapshot_timebases(void)
-{
-	if (!cpu_has_feature(CPU_FTR_PURR))
-		return;
-	on_each_cpu(snapshot_tb_and_purr, NULL, 1);
-}
-
-/*
- * Must be called with interrupts disabled.
- */
-void calculate_steal_time(void)
-{
-	u64 tb, purr;
-	s64 stolen;
-	struct cpu_purr_data *pme;
-
-	pme = &__get_cpu_var(cpu_purr_data);
-	if (!pme->initialized)
-		return;		/* !CPU_FTR_PURR or early in early boot */
-	tb = mftb();
-	purr = mfspr(SPRN_PURR);
-	stolen = (tb - pme->tb) - (purr - pme->purr);
-	if (stolen > 0) {
-		if (idle_task(smp_processor_id()) != current)
-			account_steal_time(stolen);
-		else
-			account_idle_time(stolen);
-	}
-	pme->tb = tb;
-	pme->purr = purr;
-}
-
-#ifdef CONFIG_PPC_SPLPAR
-/*
- * Must be called before the cpu is added to the online map when
- * a cpu is being brought up at runtime.
- */
-static void snapshot_purr(void)
-{
-	struct cpu_purr_data *pme;
-	unsigned long flags;
-
-	if (!cpu_has_feature(CPU_FTR_PURR))
-		return;
-	local_irq_save(flags);
-	pme = &__get_cpu_var(cpu_purr_data);
-	pme->tb = mftb();
-	pme->purr = mfspr(SPRN_PURR);
-	pme->initialized = 1;
-	local_irq_restore(flags);
-}
-
-#endif /* CONFIG_PPC_SPLPAR */
-
 #else /* ! CONFIG_VIRT_CPU_ACCOUNTING */
 #define calc_cputime_factors()
-#define calculate_steal_time()		do { } while (0)
 #endif
 
-#if !(defined(CONFIG_VIRT_CPU_ACCOUNTING) && defined(CONFIG_PPC_SPLPAR))
-#define snapshot_purr()			do { } while (0)
-#endif
-
-/*
- * Called when a cpu comes up after the system has finished booting,
- * i.e. as a result of a hotplug cpu action.
- */
-void snapshot_timebase(void)
-{
-	__get_cpu_var(last_jiffy) = get_tb_or_rtc();
-	snapshot_purr();
-}
-
 void __delay(unsigned long loops)
 {
 	unsigned long start;
@@ -585,8 +573,6 @@ void timer_interrupt(struct pt_regs * regs)
 	old_regs = set_irq_regs(regs);
 	irq_enter();
 
-	calculate_steal_time();
-
 	if (test_perf_event_pending()) {
 		clear_perf_event_pending();
 		perf_event_do_pending();
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index adfd5441b612..0357655db49d 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -27,27 +27,10 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/firmware.h>
+#include <asm/lppaca.h>
 
 #include "plpar_wrappers.h"
 
-/*
- * Layout of entries in the hypervisor's DTL buffer. Although we don't
- * actually access the internals of an entry (we only need to know the size),
- * we might as well define it here for reference.
- */
-struct dtl_entry {
-	u8	dispatch_reason;
-	u8	preempt_reason;
-	u16	processor_id;
-	u32	enqueue_to_dispatch_time;
-	u32	ready_to_enqueue_time;
-	u32	waiting_to_ready_time;
-	u64	timebase;
-	u64	fault_addr;
-	u64	srr0;
-	u64	srr1;
-};
-
 struct dtl {
 	struct dtl_entry	*buf;
 	struct dentry		*file;
@@ -237,6 +220,11 @@ static int dtl_init(void)
 	struct dentry *event_mask_file, *buf_entries_file;
 	int rc, i;
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+	/* disable this for now */
+	return -ENODEV;
+#endif
+
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return -ENODEV;
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index a17fe4a9059f..f129040d974c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -248,6 +248,8 @@ void vpa_init(int cpu)
 	int hwcpu = get_hard_smp_processor_id(cpu);
 	unsigned long addr;
 	long ret;
+	struct paca_struct *pp;
+	struct dtl_entry *dtl;
 
 	if (cpu_has_feature(CPU_FTR_ALTIVEC))
 		lppaca_of(cpu).vmxregs_in_use = 1;
@@ -274,6 +276,25 @@ void vpa_init(int cpu)
 			       "registration for cpu %d (hw %d) of area %lx "
 			       "returns %ld\n", cpu, hwcpu, addr, ret);
 	}
+
+	/*
+	 * Register dispatch trace log, if one has been allocated.
+	 */
+	pp = &paca[cpu];
+	dtl = pp->dispatch_log;
+	if (dtl) {
+		pp->dtl_ridx = 0;
+		pp->dtl_curr = dtl;
+		lppaca_of(cpu).dtl_idx = 0;
+
+		/* hypervisor reads buffer length from this field */
+		dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
+		ret = register_dtl(hwcpu, __pa(dtl));
+		if (ret)
+			pr_warn("DTL registration failed for cpu %d (%ld)\n",
+				cpu, ret);
+		lppaca_of(cpu).dtl_enable_mask = 2;
+	}
 }
 
 static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index a6d19e3a505e..d345bfd56bbe 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -273,6 +273,58 @@ static struct notifier_block pci_dn_reconfig_nb = {
 	.notifier_call = pci_dn_reconfig_notifier,
 };
 
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+/*
+ * Allocate space for the dispatch trace log for all possible cpus
+ * and register the buffers with the hypervisor.  This is used for
+ * computing time stolen by the hypervisor.
+ */
+static int alloc_dispatch_logs(void)
+{
+	int cpu, ret;
+	struct paca_struct *pp;
+	struct dtl_entry *dtl;
+
+	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		pp = &paca[cpu];
+		dtl = kmalloc_node(DISPATCH_LOG_BYTES, GFP_KERNEL,
+				   cpu_to_node(cpu));
+		if (!dtl) {
+			pr_warn("Failed to allocate dispatch trace log for cpu %d\n",
+				cpu);
+			pr_warn("Stolen time statistics will be unreliable\n");
+			break;
+		}
+
+		pp->dtl_ridx = 0;
+		pp->dispatch_log = dtl;
+		pp->dispatch_log_end = dtl + N_DISPATCH_LOG;
+		pp->dtl_curr = dtl;
+	}
+
+	/* Register the DTL for the current (boot) cpu */
+	dtl = get_paca()->dispatch_log;
+	get_paca()->dtl_ridx = 0;
+	get_paca()->dtl_curr = dtl;
+	get_paca()->lppaca_ptr->dtl_idx = 0;
+
+	/* hypervisor reads buffer length from this field */
+	dtl->enqueue_to_dispatch_time = DISPATCH_LOG_BYTES;
+	ret = register_dtl(hard_smp_processor_id(), __pa(dtl));
+	if (ret)
+		pr_warn("DTL registration failed for boot cpu %d (%d)\n",
+			smp_processor_id(), ret);
+	get_paca()->lppaca_ptr->dtl_enable_mask = 2;
+
+	return 0;
+}
+
+early_initcall(alloc_dispatch_logs);
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
+
 static void __init pSeries_setup_arch(void)
 {
 	/* Discover PIC type and setup ppc_md accordingly */
-- 
cgit v1.2.3


From 872e439a45ed4a4bd499bc55cb0dffa74027f749 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 31 Aug 2010 01:59:53 +0000
Subject: powerpc/pseries: Re-enable dispatch trace log userspace interface

Since the cpu accounting code uses the hypervisor dispatch trace log
now when CONFIG_VIRT_CPU_ACCOUNTING = y, the previous commit disabled
access to it via files in the /sys/kernel/debug/powerpc/dtl/ directory
in that case.  This restores those files.

To do this, we now have a hook that the cpu accounting code will call
as it processes each entry from the hypervisor dispatch trace log.
The code in dtl.c now uses that to fill up its ring buffer, rather
than having the hypervisor fill the ring buffer directly.

This also fixes dtl_file_read() to handle overflow conditions a bit
better and adds a spinlock to ensure that race conditions (multiple
processes opening or reading the file concurrently) are handled
correctly.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/lppaca.h    |   8 ++
 arch/powerpc/kernel/time.c           |   6 +-
 arch/powerpc/platforms/pseries/dtl.c | 206 ++++++++++++++++++++++++++++-------
 3 files changed, 179 insertions(+), 41 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h
index cfb85ec85750..7f5e0fefebb0 100644
--- a/arch/powerpc/include/asm/lppaca.h
+++ b/arch/powerpc/include/asm/lppaca.h
@@ -191,6 +191,14 @@ struct dtl_entry {
 #define DISPATCH_LOG_BYTES	4096	/* bytes per cpu */
 #define N_DISPATCH_LOG		(DISPATCH_LOG_BYTES / sizeof(struct dtl_entry))
 
+/*
+ * When CONFIG_VIRT_CPU_ACCOUNTING = y, the cpu accounting code controls
+ * reading from the dispatch trace log.  If other code wants to consume
+ * DTL entries, it can set this pointer to a function that will get
+ * called once for each DTL entry that gets processed.
+ */
+extern void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
+
 #endif /* CONFIG_PPC_BOOK3S */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_LPPACA_H */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index fca20643c368..bcb738b9ff8c 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -183,6 +183,8 @@ DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
 
 cputime_t cputime_one_jiffy;
 
+void (*dtl_consumer)(struct dtl_entry *, u64);
+
 static void calc_cputime_factors(void)
 {
 	struct div_result res;
@@ -218,7 +220,7 @@ static u64 read_spurr(u64 tb)
  */
 static u64 scan_dispatch_log(u64 stop_tb)
 {
-	unsigned long i = local_paca->dtl_ridx;
+	u64 i = local_paca->dtl_ridx;
 	struct dtl_entry *dtl = local_paca->dtl_curr;
 	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
 	struct lppaca *vpa = local_paca->lppaca_ptr;
@@ -229,6 +231,8 @@ static u64 scan_dispatch_log(u64 stop_tb)
 	if (i == vpa->dtl_idx)
 		return 0;
 	while (i < vpa->dtl_idx) {
+		if (dtl_consumer)
+			dtl_consumer(dtl, i);
 		dtb = dtl->timebase;
 		tb_delta = dtl->enqueue_to_dispatch_time +
 			dtl->ready_to_enqueue_time;
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 0357655db49d..c371bc06434b 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -23,6 +23,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
+#include <linux/spinlock.h>
 #include <asm/smp.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
@@ -37,6 +38,7 @@ struct dtl {
 	int			cpu;
 	int			buf_entries;
 	u64			last_idx;
+	spinlock_t		lock;
 };
 static DEFINE_PER_CPU(struct dtl, cpu_dtl);
 
@@ -55,25 +57,97 @@ static u8 dtl_event_mask = 0x7;
 static int dtl_buf_entries = (16 * 85);
 
 
-static int dtl_enable(struct dtl *dtl)
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+struct dtl_ring {
+	u64	write_index;
+	struct dtl_entry *write_ptr;
+	struct dtl_entry *buf;
+	struct dtl_entry *buf_end;
+	u8	saved_dtl_mask;
+};
+
+static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
+
+static atomic_t dtl_count;
+
+/*
+ * The cpu accounting code controls the DTL ring buffer, and we get
+ * given entries as they are processed.
+ */
+static void consume_dtle(struct dtl_entry *dtle, u64 index)
 {
-	unsigned long addr;
-	int ret, hwcpu;
+	struct dtl_ring *dtlr = &__get_cpu_var(dtl_rings);
+	struct dtl_entry *wp = dtlr->write_ptr;
+	struct lppaca *vpa = local_paca->lppaca_ptr;
 
-	/* only allow one reader */
-	if (dtl->buf)
-		return -EBUSY;
+	if (!wp)
+		return;
 
-	/* we need to store the original allocation size for use during read */
-	dtl->buf_entries = dtl_buf_entries;
+	*wp = *dtle;
+	barrier();
 
-	dtl->buf = kmalloc_node(dtl->buf_entries * sizeof(struct dtl_entry),
-			GFP_KERNEL, cpu_to_node(dtl->cpu));
-	if (!dtl->buf) {
-		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
-				__func__, dtl->cpu);
-		return -ENOMEM;
-	}
+	/* check for hypervisor ring buffer overflow, ignore this entry if so */
+	if (index + N_DISPATCH_LOG < vpa->dtl_idx)
+		return;
+
+	++wp;
+	if (wp == dtlr->buf_end)
+		wp = dtlr->buf;
+	dtlr->write_ptr = wp;
+
+	/* incrementing write_index makes the new entry visible */
+	smp_wmb();
+	++dtlr->write_index;
+}
+
+static int dtl_start(struct dtl *dtl)
+{
+	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+	dtlr->buf = dtl->buf;
+	dtlr->buf_end = dtl->buf + dtl->buf_entries;
+	dtlr->write_index = 0;
+
+	/* setting write_ptr enables logging into our buffer */
+	smp_wmb();
+	dtlr->write_ptr = dtl->buf;
+
+	/* enable event logging */
+	dtlr->saved_dtl_mask = lppaca_of(dtl->cpu).dtl_enable_mask;
+	lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
+
+	dtl_consumer = consume_dtle;
+	atomic_inc(&dtl_count);
+	return 0;
+}
+
+static void dtl_stop(struct dtl *dtl)
+{
+	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
+
+	dtlr->write_ptr = NULL;
+	smp_wmb();
+
+	dtlr->buf = NULL;
+
+	/* restore dtl_enable_mask */
+	lppaca_of(dtl->cpu).dtl_enable_mask = dtlr->saved_dtl_mask;
+
+	if (atomic_dec_and_test(&dtl_count))
+		dtl_consumer = NULL;
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+	return per_cpu(dtl_rings, dtl->cpu).write_index;
+}
+
+#else /* CONFIG_VIRT_CPU_ACCOUNTING */
+
+static int dtl_start(struct dtl *dtl)
+{
+	unsigned long addr;
+	int ret, hwcpu;
 
 	/* Register our dtl buffer with the hypervisor. The HV expects the
 	 * buffer size to be passed in the second word of the buffer */
@@ -85,12 +159,11 @@ static int dtl_enable(struct dtl *dtl)
 	if (ret) {
 		printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) "
 		       "failed with %d\n", __func__, dtl->cpu, hwcpu, ret);
-		kfree(dtl->buf);
 		return -EIO;
 	}
 
 	/* set our initial buffer indices */
-	dtl->last_idx = lppaca_of(dtl->cpu).dtl_idx = 0;
+	lppaca_of(dtl->cpu).dtl_idx = 0;
 
 	/* ensure that our updates to the lppaca fields have occurred before
 	 * we actually enable the logging */
@@ -102,17 +175,66 @@ static int dtl_enable(struct dtl *dtl)
 	return 0;
 }
 
-static void dtl_disable(struct dtl *dtl)
+static void dtl_stop(struct dtl *dtl)
 {
 	int hwcpu = get_hard_smp_processor_id(dtl->cpu);
 
 	lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
 
 	unregister_dtl(hwcpu, __pa(dtl->buf));
+}
+
+static u64 dtl_current_index(struct dtl *dtl)
+{
+	return lppaca_of(dtl->cpu).dtl_idx;
+}
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
 
+static int dtl_enable(struct dtl *dtl)
+{
+	long int n_entries;
+	long int rc;
+	struct dtl_entry *buf = NULL;
+
+	/* only allow one reader */
+	if (dtl->buf)
+		return -EBUSY;
+
+	n_entries = dtl_buf_entries;
+	buf = kmalloc_node(n_entries * sizeof(struct dtl_entry),
+			GFP_KERNEL, cpu_to_node(dtl->cpu));
+	if (!buf) {
+		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
+				__func__, dtl->cpu);
+		return -ENOMEM;
+	}
+
+	spin_lock(&dtl->lock);
+	rc = -EBUSY;
+	if (!dtl->buf) {
+		/* store the original allocation size for use during read */
+		dtl->buf_entries = n_entries;
+		dtl->buf = buf;
+		dtl->last_idx = 0;
+		rc = dtl_start(dtl);
+		if (rc)
+			dtl->buf = NULL;
+	}
+	spin_unlock(&dtl->lock);
+
+	if (rc)
+		kfree(buf);
+	return rc;
+}
+
+static void dtl_disable(struct dtl *dtl)
+{
+	spin_lock(&dtl->lock);
+	dtl_stop(dtl);
 	kfree(dtl->buf);
 	dtl->buf = NULL;
 	dtl->buf_entries = 0;
+	spin_unlock(&dtl->lock);
 }
 
 /* file interface */
@@ -140,8 +262,9 @@ static int dtl_file_release(struct inode *inode, struct file *filp)
 static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
 		loff_t *pos)
 {
-	int rc, cur_idx, last_idx, n_read, n_req, read_size;
+	long int rc, n_read, n_req, read_size;
 	struct dtl *dtl;
+	u64 cur_idx, last_idx, i;
 
 	if ((len % sizeof(struct dtl_entry)) != 0)
 		return -EINVAL;
@@ -154,41 +277,48 @@ static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
 	/* actual number of entries read */
 	n_read = 0;
 
-	cur_idx = lppaca_of(dtl->cpu).dtl_idx;
+	spin_lock(&dtl->lock);
+
+	cur_idx = dtl_current_index(dtl);
 	last_idx = dtl->last_idx;
 
-	if (cur_idx - last_idx > dtl->buf_entries) {
-		pr_debug("%s: hv buffer overflow for cpu %d, samples lost\n",
-				__func__, dtl->cpu);
-	}
+	if (last_idx + dtl->buf_entries <= cur_idx)
+		last_idx = cur_idx - dtl->buf_entries + 1;
+
+	if (last_idx + n_req > cur_idx)
+		n_req = cur_idx - last_idx;
 
-	cur_idx  %= dtl->buf_entries;
-	last_idx %= dtl->buf_entries;
+	if (n_req > 0)
+		dtl->last_idx = last_idx + n_req;
+
+	spin_unlock(&dtl->lock);
+
+	if (n_req <= 0)
+		return 0;
+
+	i = last_idx % dtl->buf_entries;
 
 	/* read the tail of the buffer if we've wrapped */
-	if (last_idx > cur_idx) {
-		read_size = min(n_req, dtl->buf_entries - last_idx);
+	if (i + n_req > dtl->buf_entries) {
+		read_size = dtl->buf_entries - i;
 
-		rc = copy_to_user(buf, &dtl->buf[last_idx],
+		rc = copy_to_user(buf, &dtl->buf[i],
 				read_size * sizeof(struct dtl_entry));
 		if (rc)
 			return -EFAULT;
 
-		last_idx = 0;
+		i = 0;
 		n_req -= read_size;
 		n_read += read_size;
 		buf += read_size * sizeof(struct dtl_entry);
 	}
 
 	/* .. and now the head */
-	read_size = min(n_req, cur_idx - last_idx);
-	rc = copy_to_user(buf, &dtl->buf[last_idx],
-			read_size * sizeof(struct dtl_entry));
+	rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry));
 	if (rc)
 		return -EFAULT;
 
-	n_read += read_size;
-	dtl->last_idx += n_read;
+	n_read += n_req;
 
 	return n_read * sizeof(struct dtl_entry);
 }
@@ -220,11 +350,6 @@ static int dtl_init(void)
 	struct dentry *event_mask_file, *buf_entries_file;
 	int rc, i;
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-	/* disable this for now */
-	return -ENODEV;
-#endif
-
 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
 		return -ENODEV;
 
@@ -251,6 +376,7 @@ static int dtl_init(void)
 	/* set up the per-cpu log structures */
 	for_each_possible_cpu(i) {
 		struct dtl *dtl = &per_cpu(cpu_dtl, i);
+		spin_lock_init(&dtl->lock);
 		dtl->cpu = i;
 
 		rc = dtl_setup_file(dtl);
-- 
cgit v1.2.3


From 05d77ac90c0d260ae18decd70507dc4f5b71a2cb Mon Sep 17 00:00:00 2001
From: Andreas Schwab <schwab@linux-m68k.org>
Date: Sat, 21 Aug 2010 11:43:20 +0000
Subject: powerpc: Remove fpscr use from [kvm_]cvt_{fd,df}

Neither lfs nor stfs touch the fpscr, so remove the restore/save of it
around them.

Signed-off-by: Andreas Schwab <schwab@linux-m68k.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/kvm_fpu.h       |  4 +--
 arch/powerpc/include/asm/system.h        |  4 +--
 arch/powerpc/kernel/align.c              |  4 +--
 arch/powerpc/kernel/fpu.S                | 10 --------
 arch/powerpc/kvm/book3s_paired_singles.c | 44 +++++++++++++++-----------------
 arch/powerpc/kvm/fpu.S                   |  8 ------
 6 files changed, 26 insertions(+), 48 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h
index c3d4f0518a67..92daae132492 100644
--- a/arch/powerpc/include/asm/kvm_fpu.h
+++ b/arch/powerpc/include/asm/kvm_fpu.h
@@ -82,7 +82,7 @@ FPD_THREE_IN(fmadd)
 FPD_THREE_IN(fnmsub)
 FPD_THREE_IN(fnmadd)
 
-extern void kvm_cvt_fd(u32 *from, u64 *to, u64 *fpscr);
-extern void kvm_cvt_df(u64 *from, u32 *to, u64 *fpscr);
+extern void kvm_cvt_fd(u32 *from, u64 *to);
+extern void kvm_cvt_df(u64 *from, u32 *to);
 
 #endif
diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 6c294acac848..0b3fe78be71b 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -154,8 +154,8 @@ extern void enable_kernel_spe(void);
 extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
 extern int fix_alignment(struct pt_regs *);
-extern void cvt_fd(float *from, double *to, struct thread_struct *thread);
-extern void cvt_df(double *from, float *to, struct thread_struct *thread);
+extern void cvt_fd(float *from, double *to);
+extern void cvt_df(double *from, float *to);
 
 #ifndef CONFIG_SMP
 extern void discard_lazy_cpu_state(void);
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index b876e989220b..8184ee97e484 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -889,7 +889,7 @@ int fix_alignment(struct pt_regs *regs)
 #ifdef CONFIG_PPC_FPU
 			preempt_disable();
 			enable_kernel_fp();
-			cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+			cvt_df(&data.dd, (float *)&data.v[4]);
 			preempt_enable();
 #else
 			return 0;
@@ -933,7 +933,7 @@ int fix_alignment(struct pt_regs *regs)
 #ifdef CONFIG_PPC_FPU
 		preempt_disable();
 		enable_kernel_fp();
-		cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+		cvt_fd((float *)&data.v[4], &data.dd);
 		preempt_enable();
 #else
 		return 0;
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index fc8f5b14019c..e86c040ae585 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -163,24 +163,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 /*
  * These are used in the alignment trap handler when emulating
  * single-precision loads and stores.
- * We restore and save the fpscr so the task gets the same result
- * and exceptions as if the cpu had performed the load or store.
  */
 
 _GLOBAL(cvt_fd)
-	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
-	MTFSF_L(0)
 	lfs	0,0(r3)
 	stfd	0,0(r4)
-	mffs	0
-	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
 	blr
 
 _GLOBAL(cvt_df)
-	lfd	0,THREAD_FPSCR(r5)	/* load up fpscr value */
-	MTFSF_L(0)
 	lfd	0,0(r3)
 	stfs	0,0(r4)
-	mffs	0
-	stfd	0,THREAD_FPSCR(r5)	/* save new fpscr value */
 	blr
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 474f2e24050a..35a701f3ece4 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -159,7 +159,7 @@
 
 static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 {
-	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt], &vcpu->arch.fpscr);
+	kvm_cvt_df(&vcpu->arch.fpr[rt], &vcpu->arch.qpr[rt]);
 }
 
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
@@ -204,7 +204,7 @@ static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* put in registers */
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
-		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
+		kvm_cvt_fd((u32*)tmp, &vcpu->arch.fpr[rs]);
 		vcpu->arch.qpr[rs] = *((u32*)tmp);
 		break;
 	case FPU_LS_DOUBLE:
@@ -230,7 +230,7 @@ static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	switch (ls_type) {
 	case FPU_LS_SINGLE:
-		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp, &vcpu->arch.fpscr);
+		kvm_cvt_df(&vcpu->arch.fpr[rs], (u32*)tmp);
 		val = *((u32*)tmp);
 		len = sizeof(u32);
 		break;
@@ -296,7 +296,7 @@ static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	emulated = EMULATE_DONE;
 
 	/* put in registers */
-	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs], &vcpu->arch.fpscr);
+	kvm_cvt_fd(&tmp[0], &vcpu->arch.fpr[rs]);
 	vcpu->arch.qpr[rs] = tmp[1];
 
 	dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0],
@@ -314,7 +314,7 @@ static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	u32 tmp[2];
 	int len = w ? sizeof(u32) : sizeof(u64);
 
-	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0], &vcpu->arch.fpscr);
+	kvm_cvt_df(&vcpu->arch.fpr[rs], &tmp[0]);
 	tmp[1] = vcpu->arch.qpr[rs];
 
 	r = kvmppc_st(vcpu, &addr, len, tmp, true);
@@ -516,9 +516,9 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
-	kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
-	kvm_cvt_df(&fpr[reg_in3], &ps0_in3, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
+	kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
+	kvm_cvt_df(&fpr[reg_in3], &ps0_in3);
 
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
@@ -529,7 +529,7 @@ static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc,
 			  ps0_in1, ps0_in2, ps0_in3, ps0_out);
 
 	if (!(scalar & SCALAR_NO_PS0))
-		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
+		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
 
 	/* PS1 */
 	ps1_in1 = qpr[reg_in1];
@@ -566,12 +566,12 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in1], &ps0_in1, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in1], &ps0_in1);
 
 	if (scalar & SCALAR_LOW)
 		ps0_in2 = qpr[reg_in2];
 	else
-		kvm_cvt_df(&fpr[reg_in2], &ps0_in2, &vcpu->arch.fpscr);
+		kvm_cvt_df(&fpr[reg_in2], &ps0_in2);
 
 	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in1, &ps0_in2);
 
@@ -579,7 +579,7 @@ static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc,
 		dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n",
 				  ps0_in1, ps0_in2, ps0_out);
 
-		kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
+		kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
 	}
 
 	/* PS1 */
@@ -615,13 +615,13 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 	WARN_ON(rc);
 
 	/* PS0 */
-	kvm_cvt_df(&fpr[reg_in], &ps0_in, &vcpu->arch.fpscr);
+	kvm_cvt_df(&fpr[reg_in], &ps0_in);
 	func(&vcpu->arch.fpscr, &ps0_out, &ps0_in);
 
 	dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n",
 			  ps0_in, ps0_out);
 
-	kvm_cvt_fd(&ps0_out, &fpr[reg_out], &vcpu->arch.fpscr);
+	kvm_cvt_fd(&ps0_out, &fpr[reg_out]);
 
 	/* PS1 */
 	ps1_in = qpr[reg_in];
@@ -671,7 +671,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #ifdef DEBUG
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
 		u32 f;
-		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
+		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx    QPR[%d] = 0x%x\n",
 			i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]);
 	}
@@ -796,8 +796,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra];
 			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
 			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
-				   &vcpu->arch.qpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.qpr[ax_rd]);
 			break;
 		case OP_4X_PS_MERGE01:
 			WARN_ON(rcomp);
@@ -808,19 +807,16 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			WARN_ON(rcomp);
 			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
-				   &vcpu->arch.fpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.fpr[ax_rd]);
 			/* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */
 			kvm_cvt_df(&vcpu->arch.fpr[ax_rb],
-				   &vcpu->arch.qpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.qpr[ax_rd]);
 			break;
 		case OP_4X_PS_MERGE11:
 			WARN_ON(rcomp);
 			/* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */
 			kvm_cvt_fd(&vcpu->arch.qpr[ax_ra],
-				   &vcpu->arch.fpr[ax_rd],
-				   &vcpu->arch.fpscr);
+				   &vcpu->arch.fpr[ax_rd]);
 			vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb];
 			break;
 		}
@@ -1255,7 +1251,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #ifdef DEBUG
 	for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) {
 		u32 f;
-		kvm_cvt_df(&vcpu->arch.fpr[i], &f, &vcpu->arch.fpscr);
+		kvm_cvt_df(&vcpu->arch.fpr[i], &f);
 		dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f);
 	}
 #endif
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
index cb34bbe16113..bf68d597549e 100644
--- a/arch/powerpc/kvm/fpu.S
+++ b/arch/powerpc/kvm/fpu.S
@@ -273,19 +273,11 @@ FPD_THREE_IN(fnmsub)
 FPD_THREE_IN(fnmadd)
 
 _GLOBAL(kvm_cvt_fd)
-	lfd	0,0(r5)			/* load up fpscr value */
-	MTFSF_L(0)
 	lfs	0,0(r3)
 	stfd	0,0(r4)
-	mffs	0
-	stfd	0,0(r5)			/* save new fpscr value */
 	blr
 
 _GLOBAL(kvm_cvt_df)
-	lfd	0,0(r5)			/* load up fpscr value */
-	MTFSF_L(0)
 	lfd	0,0(r3)
 	stfs	0,0(r4)
-	mffs	0
-	stfd	0,0(r5)			/* save new fpscr value */
 	blr
-- 
cgit v1.2.3


From cab175f9fa2973f0deb1580fca3c966fe1d3981e Mon Sep 17 00:00:00 2001
From: Denis Kirjanov <dkirjanov@kernel.org>
Date: Fri, 27 Aug 2010 03:49:11 +0000
Subject: powerpc: Use is_32bit_task() helper to test 32-bit binary

This patch removes all explicit tests for the TIF_32BIT flag

Signed-off-by: Denis Kirjanov <dkirjanov@kernel.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/compat.h    | 4 ++--
 arch/powerpc/include/asm/elf.h       | 2 +-
 arch/powerpc/include/asm/page_64.h   | 4 ++--
 arch/powerpc/include/asm/processor.h | 4 ++--
 arch/powerpc/kernel/ptrace.c         | 2 +-
 arch/powerpc/kernel/vdso.c           | 6 +++---
 arch/powerpc/oprofile/backtrace.c    | 2 +-
 7 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 396d21a80058..3369e2c83609 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -143,7 +143,7 @@ static inline void __user *compat_alloc_user_space(long len)
 	 * We cant access below the stack pointer in the 32bit ABI and
 	 * can access 288 bytes in the 64bit ABI
 	 */
-	if (!(test_thread_flag(TIF_32BIT)))
+	if (!is_32bit_task())
 		usp -= 288;
 
 	return (void __user *) (usp - len);
@@ -213,7 +213,7 @@ struct compat_shmid64_ds {
 
 static inline int is_compat_task(void)
 {
-	return test_thread_flag(TIF_32BIT);
+	return is_32bit_task();
 }
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index c376eda15313..2b917c69ed15 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -250,7 +250,7 @@ do {								\
  * the 64bit ABI has never had these issues dont enable the workaround
  * even if we have an executable stack.
  */
-# define elf_read_implies_exec(ex, exec_stk) (test_thread_flag(TIF_32BIT) ? \
+# define elf_read_implies_exec(ex, exec_stk) (is_32bit_task() ? \
 		(exec_stk == EXSTACK_DEFAULT) : 0)
 #else 
 # define SET_PERSONALITY(ex) \
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index 358ff14ea25e..932f88dcf6fa 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -163,7 +163,7 @@ do {						\
 #endif /* !CONFIG_HUGETLB_PAGE */
 
 #define VM_DATA_DEFAULT_FLAGS \
-	(test_thread_flag(TIF_32BIT) ? \
+	(is_32bit_task() ? \
 	 VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64)
 
 /*
@@ -179,7 +179,7 @@ do {						\
 					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
 #define VM_STACK_DEFAULT_FLAGS \
-	(test_thread_flag(TIF_32BIT) ? \
+	(is_32bit_task() ? \
 	 VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64)
 
 #include <asm-generic/getorder.h>
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 19c05b0f74be..4c14187ba02d 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -118,7 +118,7 @@ extern struct task_struct *last_task_used_spe;
 #define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
 #define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_USER64 / 4))
 
-#define TASK_UNMAPPED_BASE ((test_thread_flag(TIF_32BIT)) ? \
+#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
 		TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
 #endif
 
@@ -128,7 +128,7 @@ extern struct task_struct *last_task_used_spe;
 #define STACK_TOP_USER64 TASK_SIZE_USER64
 #define STACK_TOP_USER32 TASK_SIZE_USER32
 
-#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
+#define STACK_TOP (is_32bit_task() ? \
 		   STACK_TOP_USER32 : STACK_TOP_USER64)
 
 #define STACK_TOP_MAX STACK_TOP_USER64
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 11f3cd9c832f..286d9783d93f 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1681,7 +1681,7 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
 	if (unlikely(current->audit_context)) {
 #ifdef CONFIG_PPC64
-		if (!test_thread_flag(TIF_32BIT))
+		if (!is_32bit_task())
 			audit_syscall_entry(AUDIT_ARCH_PPC64,
 					    regs->gpr[0],
 					    regs->gpr[3], regs->gpr[4],
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 13002fe206e7..fd8728729abc 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -159,7 +159,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma)
 {
 	int i;
 
-	if (!vma || test_thread_flag(TIF_32BIT)) {
+	if (!vma || is_32bit_task()) {
 		printk("vDSO32 @ %016lx:\n", (unsigned long)vdso32_kbase);
 		for (i=0; i<vdso32_pages; i++) {
 			struct page *pg = virt_to_page(vdso32_kbase +
@@ -170,7 +170,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma)
 			dump_one_vdso_page(pg, upg);
 		}
 	}
-	if (!vma || !test_thread_flag(TIF_32BIT)) {
+	if (!vma || !is_32bit_task()) {
 		printk("vDSO64 @ %016lx:\n", (unsigned long)vdso64_kbase);
 		for (i=0; i<vdso64_pages; i++) {
 			struct page *pg = virt_to_page(vdso64_kbase +
@@ -200,7 +200,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		return 0;
 
 #ifdef CONFIG_PPC64
-	if (test_thread_flag(TIF_32BIT)) {
+	if (is_32bit_task()) {
 		vdso_pagelist = vdso32_pagelist;
 		vdso_pages = vdso32_pages;
 		vdso_base = VDSO32_MBASE;
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index b4278cfd1f80..f75301f2c85f 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -105,7 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth)
 		}
 	} else {
 #ifdef CONFIG_PPC64
-		if (!test_thread_flag(TIF_32BIT)) {
+		if (!is_32bit_task()) {
 			while (depth--) {
 				sp = user_getsp64(sp, first_frame);
 				if (!sp)
-- 
cgit v1.2.3


From 5b6e9ff6deb703b95fb355bb66d86096c1a2df09 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 30 Aug 2010 19:23:52 +0000
Subject: powerpc/dma: Add optional platform override of dma_set_mask()

Some platforms may want to override dma_set_mask() to take into
account some specific "features" such as the availability of
a direct-map window in addition to an iommu.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/dma-mapping.h | 14 +-------------
 arch/powerpc/include/asm/machdep.h     |  3 +++
 arch/powerpc/kernel/dma.c              | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
index 8c9c6ad2004e..6d2416a85709 100644
--- a/arch/powerpc/include/asm/dma-mapping.h
+++ b/arch/powerpc/include/asm/dma-mapping.h
@@ -127,19 +127,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 	return dma_ops->dma_supported(dev, mask);
 }
 
-static inline int dma_set_mask(struct device *dev, u64 dma_mask)
-{
-	struct dma_map_ops *dma_ops = get_dma_ops(dev);
-
-	if (unlikely(dma_ops == NULL))
-		return -EIO;
-	if (dma_ops->set_dma_mask != NULL)
-		return dma_ops->set_dma_mask(dev, dma_mask);
-	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
-		return -EIO;
-	*dev->dma_mask = dma_mask;
-	return 0;
-}
+extern int dma_set_mask(struct device *dev, u64 dma_mask);
 
 static inline void *dma_alloc_coherent(struct device *dev, size_t size,
 				       dma_addr_t *dma_handle, gfp_t flag)
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index adc8e6cdf339..d045b0145537 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -102,6 +102,9 @@ struct machdep_calls {
 	void		(*pci_dma_dev_setup)(struct pci_dev *dev);
 	void		(*pci_dma_bus_setup)(struct pci_bus *bus);
 
+	/* Platform set_dma_mask override */
+	int		(*dma_set_mask)(struct device *dev, u64 dma_mask);
+
 	int		(*probe)(void);
 	void		(*setup_arch)(void); /* Optional, may be NULL */
 	void		(*init_early)(void);
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 84d6367ec003..f368c075c90b 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -12,6 +12,7 @@
 #include <linux/memblock.h>
 #include <asm/bug.h>
 #include <asm/abs_addr.h>
+#include <asm/machdep.h>
 
 /*
  * Generic direct DMA implementation
@@ -154,6 +155,23 @@ EXPORT_SYMBOL(dma_direct_ops);
 
 #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
 
+int dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	struct dma_map_ops *dma_ops = get_dma_ops(dev);
+
+	if (ppc_md.dma_set_mask)
+		return ppc_md.dma_set_mask(dev, dma_mask);
+	if (unlikely(dma_ops == NULL))
+		return -EIO;
+	if (dma_ops->set_dma_mask != NULL)
+		return dma_ops->set_dma_mask(dev, dma_mask);
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+	*dev->dma_mask = dma_mask;
+	return 0;
+}
+EXPORT_SYMBOL(dma_set_mask);
+
 static int __init dma_init(void)
 {
        dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
-- 
cgit v1.2.3


From 8fb07c0444c37caa39a8df7c70a694c6211f2f57 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Mon, 30 Aug 2010 19:24:18 +0000
Subject: powerpc/dart_iommu: Support for 64-bit iommu bypass window on PCIe

The PCI-Express bus off the U4/CPC945 bridge supports direct DMA to
all of memory, bypassing the DART iommu, for 64-bit capable devices.

This adds support for it on Bimini and Apple Quad G5's in order to
improve DMA performances of cards using that slot (the x16 graphics
slot). Tested with an Intel ixgbe 10GE card.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/sysdev/dart_iommu.c | 74 ++++++++++++++++++++++++++++++++++------
 1 file changed, 64 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 559db2b846a9..17cf15ec38be 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -70,6 +70,8 @@ static int iommu_table_dart_inited;
 static int dart_dirty;
 static int dart_is_u4;
 
+#define DART_U4_BYPASS_BASE	0x8000000000ull
+
 #define DBG(...)
 
 static inline void dart_tlb_invalidate_all(void)
@@ -292,12 +294,20 @@ static void iommu_table_dart_setup(void)
 	set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map);
 }
 
-static void pci_dma_dev_setup_dart(struct pci_dev *dev)
+static void dma_dev_setup_dart(struct device *dev)
 {
 	/* We only have one iommu table on the mac for now, which makes
 	 * things simple. Setup all PCI devices to point to this table
 	 */
-	set_iommu_table_base(&dev->dev, &iommu_table_dart);
+	if (get_dma_ops(dev) == &dma_direct_ops)
+		set_dma_offset(dev, DART_U4_BYPASS_BASE);
+	else
+		set_iommu_table_base(dev, &iommu_table_dart);
+}
+
+static void pci_dma_dev_setup_dart(struct pci_dev *dev)
+{
+	dma_dev_setup_dart(&dev->dev);
 }
 
 static void pci_dma_bus_setup_dart(struct pci_bus *bus)
@@ -315,6 +325,45 @@ static void pci_dma_bus_setup_dart(struct pci_bus *bus)
 		PCI_DN(dn)->iommu_table = &iommu_table_dart;
 }
 
+static bool dart_device_on_pcie(struct device *dev)
+{
+	struct device_node *np = of_node_get(dev->of_node);
+
+	while(np) {
+		if (of_device_is_compatible(np, "U4-pcie") ||
+		    of_device_is_compatible(np, "u4-pcie")) {
+			of_node_put(np);
+			return true;
+		}
+		np = of_get_next_parent(np);
+	}
+	return false;
+}
+
+static int dart_dma_set_mask(struct device *dev, u64 dma_mask)
+{
+	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
+		return -EIO;
+
+	/* U4 supports a DART bypass, we use it for 64-bit capable
+	 * devices to improve performances. However, that only works
+	 * for devices connected to U4 own PCIe interface, not bridged
+	 * through hypertransport. We need the device to support at
+	 * least 40 bits of addresses.
+	 */
+	if (dart_device_on_pcie(dev) && dma_mask >= DMA_BIT_MASK(40)) {
+		dev_info(dev, "Using 64-bit DMA iommu bypass\n");
+		set_dma_ops(dev, &dma_direct_ops);
+	} else {
+		dev_info(dev, "Using 32-bit DMA via iommu\n");
+		set_dma_ops(dev, &dma_iommu_ops);
+	}
+	dma_dev_setup_dart(dev);
+
+	*dev->dma_mask = dma_mask;
+	return 0;
+}
+
 void __init iommu_init_early_dart(void)
 {
 	struct device_node *dn;
@@ -328,20 +377,25 @@ void __init iommu_init_early_dart(void)
 		dart_is_u4 = 1;
 	}
 
+	/* Initialize the DART HW */
+	if (dart_init(dn) != 0)
+		goto bail;
+
 	/* Setup low level TCE operations for the core IOMMU code */
 	ppc_md.tce_build = dart_build;
 	ppc_md.tce_free  = dart_free;
 	ppc_md.tce_flush = dart_flush;
 
-	/* Initialize the DART HW */
-	if (dart_init(dn) == 0) {
-		ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart;
-		ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart;
+	/* Setup bypass if supported */
+	if (dart_is_u4)
+		ppc_md.dma_set_mask = dart_dma_set_mask;
 
-		/* Setup pci_dma ops */
-		set_pci_dma_ops(&dma_iommu_ops);
-		return;
-	}
+	ppc_md.pci_dma_dev_setup = pci_dma_dev_setup_dart;
+	ppc_md.pci_dma_bus_setup = pci_dma_bus_setup_dart;
+
+	/* Setup pci_dma ops */
+	set_pci_dma_ops(&dma_iommu_ops);
+	return;
 
  bail:
 	/* If init failed, use direct iommu and null setup functions */
-- 
cgit v1.2.3


From a8e25c61546bdc30d936bc0f4fd3a7cb594490c5 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 28 Aug 2010 23:52:45 +0000
Subject: powerpc/maple: Add of_node_put to avoid memory leak

Add a call to of_node_put in the error handling code following a call to
of_find_node_by_path.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E,E1;
statement S;
@@

*x =
(of_find_node_by_path
|of_find_node_by_name
|of_find_node_by_phandle
|of_get_parent
|of_get_next_parent
|of_get_next_child
|of_find_compatible_node
|of_match_node
)(...);
...
if (x == NULL) S
<... when != x = E
*if (...) {
  ... when != of_node_put(x)
      when != if (...) { ... of_node_put(x); ... }
(
  return <+...x...+>;
|
*  return ...;
)
}
...>
of_node_put(x);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/maple/setup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index 3fff8d979b41..fe34c3d9bb74 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -358,6 +358,7 @@ static int __init maple_cpc925_edac_setup(void)
 	model = (const unsigned char *)of_get_property(np, "model", NULL);
 	if (!model) {
 		printk(KERN_ERR "%s: Unabel to get model info\n", __func__);
+		of_node_put(np);
 		return -ENODEV;
 	}
 
-- 
cgit v1.2.3


From 0373721b19217c85c8c9435b79f0bac88fec9f26 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 28 Aug 2010 23:52:43 +0000
Subject: powerpc/powermac/pfunc_core.c: Add of_node_put to avoid memory leak

Add a call to of_node_put in the error handling code following a call to
of_find_node_by_phandle.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E,E1;
statement S;
@@

*x =
(of_find_node_by_path
|of_find_node_by_name
|of_find_node_by_phandle
|of_get_parent
|of_get_next_parent
|of_get_next_child
|of_find_compatible_node
|of_match_node
)(...);
...
if (x == NULL) S
<... when != x = E
*if (...) {
  ... when != of_node_put(x)
      when != if (...) { ... of_node_put(x); ... }
(
  return <+...x...+>;
|
*  return ...;
)
}
...>
of_node_put(x);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/powermac/pfunc_core.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powermac/pfunc_core.c b/arch/powerpc/platforms/powermac/pfunc_core.c
index cec635942657..b0c3777528a1 100644
--- a/arch/powerpc/platforms/powermac/pfunc_core.c
+++ b/arch/powerpc/platforms/powermac/pfunc_core.c
@@ -837,8 +837,10 @@ struct pmf_function *__pmf_find_function(struct device_node *target,
 		return NULL;
  find_it:
 	dev = pmf_find_device(actor);
-	if (dev == NULL)
-		return NULL;
+	if (dev == NULL) {
+		result = NULL;
+		goto out;
+	}
 
 	list_for_each_entry(func, &dev->functions, link) {
 		if (name && strcmp(name, func->name))
@@ -850,8 +852,9 @@ struct pmf_function *__pmf_find_function(struct device_node *target,
 		result = func;
 		break;
 	}
-	of_node_put(actor);
 	pmf_put_device(dev);
+out:
+	of_node_put(actor);
 	return result;
 }
 
-- 
cgit v1.2.3


From 182f30e4b9f6e26d565eb432d1140a487c64fa17 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 28 Aug 2010 23:52:46 +0000
Subject: powerpc/cell: Add of_node_put to avoid memory leak

Add calls to of_node_put in the error handling code following calls to
of_find_node_by_path and of_find_node_by_phandle.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E,E1;
statement S;
@@

*x =
(of_find_node_by_path
|of_find_node_by_name
|of_find_node_by_phandle
|of_get_parent
|of_get_next_parent
|of_get_next_child
|of_find_compatible_node
|of_match_node
)(...);
...
if (x == NULL) S
<... when != x = E
*if (...) {
  ... when != of_node_put(x)
      when != if (...) { ... of_node_put(x); ... }
(
  return <+...x...+>;
|
*  return ...;
)
}
...>
of_node_put(x);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/ras.c        | 4 +++-
 arch/powerpc/platforms/cell/spider-pic.c | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c
index 1d3c4effea10..5ec1e47a0d77 100644
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@@ -173,8 +173,10 @@ static int __init cbe_ptcal_enable(void)
 		return -ENODEV;
 
 	size = of_get_property(np, "ibm,cbe-ptcal-size", NULL);
-	if (!size)
+	if (!size) {
+		of_node_put(np);
 		return -ENODEV;
+	}
 
 	pr_debug("%s: enabling PTCAL, size = 0x%x\n", __func__, *size);
 	order = get_order(*size);
diff --git a/arch/powerpc/platforms/cell/spider-pic.c b/arch/powerpc/platforms/cell/spider-pic.c
index 5876e888e412..3f2e557344a3 100644
--- a/arch/powerpc/platforms/cell/spider-pic.c
+++ b/arch/powerpc/platforms/cell/spider-pic.c
@@ -258,8 +258,10 @@ static unsigned int __init spider_find_cascade_and_node(struct spider_pic *pic)
 		return NO_IRQ;
 	imap += intsize + 1;
 	tmp = of_get_property(iic, "#interrupt-cells", NULL);
-	if (tmp == NULL)
+	if (tmp == NULL) {
+		of_node_put(iic);
 		return NO_IRQ;
+	}
 	intsize = *tmp;
 	/* Assume unit is last entry of interrupt specifier */
 	unit = imap[intsize - 1];
-- 
cgit v1.2.3


From 7cf9bac559d233ff3a7fdbbdc4fc4c184abc5f59 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 31 Aug 2010 05:48:58 +0000
Subject: powerpc/chrp/nvram.c: Add of_node_put to avoid memory leak

Add a call to of_node_put in the error handling code following a call to
of_find_node_by_type.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E,E1,E2;
statement S;
@@

*x =
(of_find_node_by_path
|of_find_node_by_name
|of_find_node_by_phandle
|of_get_parent
|of_get_next_parent
|of_get_next_child
|of_find_compatible_node
|of_match_node
|of_find_node_by_type
|of_find_node_with_property
|of_find_matching_node
|of_parse_phandle
)(...);
...
if (x == NULL) S
<... when != x = E
*if (...) {
  ... when != of_node_put(x)
      when != if (...) { ... of_node_put(x); ... }
(
  return <+...x...+>;
|
*  return ...;
)
}
...>
(
E2 = x;
|
of_node_put(x);
)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/chrp/nvram.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/chrp/nvram.c b/arch/powerpc/platforms/chrp/nvram.c
index ba3588f2d8e0..d3ceff04ffc7 100644
--- a/arch/powerpc/platforms/chrp/nvram.c
+++ b/arch/powerpc/platforms/chrp/nvram.c
@@ -74,8 +74,10 @@ void __init chrp_nvram_init(void)
 		return;
 
 	nbytes_p = of_get_property(nvram, "#bytes", &proplen);
-	if (nbytes_p == NULL || proplen != sizeof(unsigned int))
+	if (nbytes_p == NULL || proplen != sizeof(unsigned int)) {
+		of_node_put(nvram);
 		return;
+	}
 
 	nvram_size = *nbytes_p;
 
-- 
cgit v1.2.3


From 86250b9d12caa1a3dee12a7cf638b7dd70eaadb6 Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Wed, 25 Aug 2010 18:50:28 +0000
Subject: powerpc: Wire up direct socket system calls

This patch wires up the various socket system calls on PowerPC so that
userspace can call them directly, rather than by going through the
multiplexed socketcall system call.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/systbl.h | 19 +++++++++++++++++++
 arch/powerpc/include/asm/unistd.h | 21 ++++++++++++++++++++-
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h
index 3d212669a130..aa0f1ebb4aaf 100644
--- a/arch/powerpc/include/asm/systbl.h
+++ b/arch/powerpc/include/asm/systbl.h
@@ -329,3 +329,22 @@ COMPAT_SYS(rt_tgsigqueueinfo)
 SYSCALL(fanotify_init)
 COMPAT_SYS(fanotify_mark)
 SYSCALL_SPU(prlimit64)
+SYSCALL_SPU(socket)
+SYSCALL_SPU(bind)
+SYSCALL_SPU(connect)
+SYSCALL_SPU(listen)
+SYSCALL_SPU(accept)
+SYSCALL_SPU(getsockname)
+SYSCALL_SPU(getpeername)
+SYSCALL_SPU(socketpair)
+SYSCALL_SPU(send)
+SYSCALL_SPU(sendto)
+COMPAT_SYS_SPU(recv)
+COMPAT_SYS_SPU(recvfrom)
+SYSCALL_SPU(shutdown)
+COMPAT_SYS_SPU(setsockopt)
+COMPAT_SYS_SPU(getsockopt)
+COMPAT_SYS_SPU(sendmsg)
+COMPAT_SYS_SPU(recvmsg)
+COMPAT_SYS_SPU(recvmmsg)
+SYSCALL_SPU(accept4)
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 597e6f9d094a..6151937657f6 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -348,10 +348,29 @@
 #define __NR_fanotify_init	323
 #define __NR_fanotify_mark	324
 #define __NR_prlimit64		325
+#define __NR_socket		326
+#define __NR_bind		327
+#define __NR_connect		328
+#define __NR_listen		329
+#define __NR_accept		330
+#define __NR_getsockname	331
+#define __NR_getpeername	332
+#define __NR_socketpair		333
+#define __NR_send		334
+#define __NR_sendto		335
+#define __NR_recv		336
+#define __NR_recvfrom		337
+#define __NR_shutdown		338
+#define __NR_setsockopt		339
+#define __NR_getsockopt		340
+#define __NR_sendmsg		341
+#define __NR_recvmsg		342
+#define __NR_recvmmsg		343
+#define __NR_accept4		344
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		326
+#define __NR_syscalls		345
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
-- 
cgit v1.2.3


From 0d35e1620d2882d74faed90d9ac457bf6c7a0886 Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Tue, 31 Aug 2010 13:24:44 +0000
Subject: powerpc/mm: Assume first cpu is boot_cpuid not 0

arch/powerpc/mm/mmu_context_nohash.c assumes the boot cpu
will always have smp_processor_id() == 0. This patch fixes
that assumption

Signed-off-by: Matthew McClintock <msm@freescale.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/mm/mmu_context_nohash.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index ddfd7ad4e1d6..5ce99848d91e 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -334,7 +334,7 @@ static int __cpuinit mmu_context_cpu_notify(struct notifier_block *self,
 	/* We don't touch CPU 0 map, it's allocated at aboot and kept
 	 * around forever
 	 */
-	if (cpu == 0)
+	if (cpu == boot_cpuid)
 		return NOTIFY_OK;
 
 	switch (action) {
@@ -420,9 +420,11 @@ void __init mmu_context_init(void)
 	 */
 	context_map = alloc_bootmem(CTX_MAP_SIZE);
 	context_mm = alloc_bootmem(sizeof(void *) * (last_context + 1));
+#ifndef CONFIG_SMP
 	stale_map[0] = alloc_bootmem(CTX_MAP_SIZE);
+#else
+	stale_map[boot_cpuid] = alloc_bootmem(CTX_MAP_SIZE);
 
-#ifdef CONFIG_SMP
 	register_cpu_notifier(&mmu_context_cpu_nb);
 #endif
 
-- 
cgit v1.2.3


From 025c0186a0357b0bd92039a927a07860e8be4205 Mon Sep 17 00:00:00 2001
From: Sean MacLennan <smaclennan@pikatech.com>
Date: Wed, 1 Sep 2010 07:21:21 +0000
Subject: powerpc: Fix incorrect .stabs entry for copy_32.S

Signed-off-by: Sean MacLennan <smaclennan@pikatech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/lib/copy_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 74a7f4130b4c..55f19f9fd708 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -62,7 +62,7 @@
 
 	.text
 	.stabs	"arch/powerpc/lib/",N_SO,0,0,0f
-	.stabs	"copy32.S",N_SO,0,0,0f
+	.stabs	"copy_32.S",N_SO,0,0,0f
 0:
 
 CACHELINE_BYTES = L1_CACHE_BYTES
-- 
cgit v1.2.3


From cd64d1697cf079bb8a67766e36e88ced38498933 Mon Sep 17 00:00:00 2001
From: Sean MacLennan <smaclennan@pikatech.com>
Date: Wed, 1 Sep 2010 07:21:21 +0000
Subject: powerpc: mtmsrd not defined

Replace the BOOK3S_64 specific mtmsrd with the generic MTMSRD macro.
Only enable ldstfp when CONFIG_PPC_FPU is set.

Signed-off-by: Sean MacLennan <smaclennan@pikatech.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/lib/ldstfp.S | 36 ++++++++++++++++++++----------------
 arch/powerpc/lib/sstep.c  |  8 ++++++++
 2 files changed, 28 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/lib/ldstfp.S b/arch/powerpc/lib/ldstfp.S
index f6448636baf5..6a85380520b6 100644
--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
@@ -17,6 +17,8 @@
 #include <asm/asm-offsets.h>
 #include <linux/errno.h>
 
+#ifdef CONFIG_PPC_FPU
+
 #define STKFRM	(PPC_MIN_STKFRM + 16)
 
 	.macro	extab	instr,handler
@@ -81,7 +83,7 @@ _GLOBAL(do_lfs)
 	mfmsr	r6
 	ori	r7,r6,MSR_FP
 	cmpwi	cr7,r3,0
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	stfd	fr0,STKFRM-16(r1)
@@ -93,7 +95,7 @@ _GLOBAL(do_lfs)
 	lfd	fr0,STKFRM-16(r1)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -108,7 +110,7 @@ _GLOBAL(do_lfd)
 	mfmsr	r6
 	ori	r7,r6,MSR_FP
 	cmpwi	cr7,r3,0
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	stfd	fr0,STKFRM-16(r1)
@@ -120,7 +122,7 @@ _GLOBAL(do_lfd)
 	lfd	fr0,STKFRM-16(r1)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -135,7 +137,7 @@ _GLOBAL(do_stfs)
 	mfmsr	r6
 	ori	r7,r6,MSR_FP
 	cmpwi	cr7,r3,0
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	stfd	fr0,STKFRM-16(r1)
@@ -147,7 +149,7 @@ _GLOBAL(do_stfs)
 	lfd	fr0,STKFRM-16(r1)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -162,7 +164,7 @@ _GLOBAL(do_stfd)
 	mfmsr	r6
 	ori	r7,r6,MSR_FP
 	cmpwi	cr7,r3,0
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	stfd	fr0,STKFRM-16(r1)
@@ -174,7 +176,7 @@ _GLOBAL(do_stfd)
 	lfd	fr0,STKFRM-16(r1)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -229,7 +231,7 @@ _GLOBAL(do_lvx)
 	oris	r7,r6,MSR_VEC@h
 	cmpwi	cr7,r3,0
 	li	r8,STKFRM-16
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	stvx	vr0,r1,r8
@@ -241,7 +243,7 @@ _GLOBAL(do_lvx)
 	lvx	vr0,r1,r8
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -257,7 +259,7 @@ _GLOBAL(do_stvx)
 	oris	r7,r6,MSR_VEC@h
 	cmpwi	cr7,r3,0
 	li	r8,STKFRM-16
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	stvx	vr0,r1,r8
@@ -269,7 +271,7 @@ _GLOBAL(do_stvx)
 	lvx	vr0,r1,r8
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -325,7 +327,7 @@ _GLOBAL(do_lxvd2x)
 	oris	r7,r6,MSR_VSX@h
 	cmpwi	cr7,r3,0
 	li	r8,STKFRM-16
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	STXVD2X(0,r1,r8)
@@ -337,7 +339,7 @@ _GLOBAL(do_lxvd2x)
 	LXVD2X(0,r1,r8)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -353,7 +355,7 @@ _GLOBAL(do_stxvd2x)
 	oris	r7,r6,MSR_VSX@h
 	cmpwi	cr7,r3,0
 	li	r8,STKFRM-16
-	mtmsrd	r7
+	MTMSRD(r7)
 	isync
 	beq	cr7,1f
 	STXVD2X(0,r1,r8)
@@ -365,7 +367,7 @@ _GLOBAL(do_stxvd2x)
 	LXVD2X(0,r1,r8)
 4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
 	mtlr	r0
-	mtmsrd	r6
+	MTMSRD(r6)
 	isync
 	mr	r3,r9
 	addi	r1,r1,STKFRM
@@ -373,3 +375,5 @@ _GLOBAL(do_stxvd2x)
 	extab	2b,3b
 
 #endif /* CONFIG_VSX */
+
+#endif	/* CONFIG_PPC_FPU */
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index e0a9858d537e..ae5189ab0049 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -30,6 +30,7 @@ extern char system_call_common[];
 #define XER_OV		0x40000000U
 #define XER_CA		0x20000000U
 
+#ifdef CONFIG_PPC_FPU
 /*
  * Functions in ldstfp.S
  */
@@ -41,6 +42,7 @@ extern int do_lvx(int rn, unsigned long ea);
 extern int do_stvx(int rn, unsigned long ea);
 extern int do_lxvd2x(int rn, unsigned long ea);
 extern int do_stxvd2x(int rn, unsigned long ea);
+#endif
 
 /*
  * Determine whether a conditional branch instruction would branch.
@@ -290,6 +292,7 @@ static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb,
 	return write_mem_unaligned(val, ea, nb, regs);
 }
 
+#ifdef CONFIG_PPC_FPU
 /*
  * Check the address and alignment, and call func to do the actual
  * load or store.
@@ -351,6 +354,7 @@ static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
 	}
 	return err;
 }
+#endif
 
 #ifdef CONFIG_ALTIVEC
 /* For Altivec/VMX, no need to worry about alignment */
@@ -1393,6 +1397,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 				regs->gpr[rd] = byterev_4(val);
 			goto ldst_done;
 
+#ifdef CONFIG_PPC_CPU
 		case 535:	/* lfsx */
 		case 567:	/* lfsux */
 			if (!(regs->msr & MSR_FP))
@@ -1424,6 +1429,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			ea = xform_ea(instr, regs, u);
 			err = do_fp_store(rd, do_stfd, ea, 8, regs);
 			goto ldst_done;
+#endif
 
 #ifdef __powerpc64__
 		case 660:	/* stdbrx */
@@ -1534,6 +1540,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		} while (++rd < 32);
 		goto instr_done;
 
+#ifdef CONFIG_PPC_FPU
 	case 48:	/* lfs */
 	case 49:	/* lfsu */
 		if (!(regs->msr & MSR_FP))
@@ -1565,6 +1572,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		ea = dform_ea(instr, regs);
 		err = do_fp_store(rd, do_stfd, ea, 8, regs);
 		goto ldst_done;
+#endif
 
 #ifdef __powerpc64__
 	case 58:	/* ld[u], lwa */
-- 
cgit v1.2.3


From 915b96191f01d53e30d74083dcf4aebfb5b7ce10 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 4 Sep 2010 09:13:17 +0200
Subject: powerpc/5200: efika.c: Add of_node_put to avoid memory leak

This function is implemented as though the function of_get_next_child does
not increment the reference count of its result, but actually it does.
Thus the patch adds of_node_put in error handling code and drops a call to
of_node_get.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
local idexpression x;
expression E1;
position p1,p2;
@@

x@p1 = of_get_next_child(...);
... when != x = E1
of_node_get@p2(x)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@

cocci.print_main("call",p1)
cocci.print_secs("get",p2)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/52xx/efika.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c
index 45c0cb9b67e6..18c104820198 100644
--- a/arch/powerpc/platforms/52xx/efika.c
+++ b/arch/powerpc/platforms/52xx/efika.c
@@ -99,7 +99,7 @@ static void __init efika_pcisetup(void)
 	if (bus_range == NULL || len < 2 * sizeof(int)) {
 		printk(KERN_WARNING EFIKA_PLATFORM_NAME
 		       ": Can't get bus-range for %s\n", pcictrl->full_name);
-		return;
+		goto out_put;
 	}
 
 	if (bus_range[1] == bus_range[0])
@@ -111,12 +111,12 @@ static void __init efika_pcisetup(void)
 	printk(" controlled by %s\n", pcictrl->full_name);
 	printk("\n");
 
-	hose = pcibios_alloc_controller(of_node_get(pcictrl));
+	hose = pcibios_alloc_controller(pcictrl);
 	if (!hose) {
 		printk(KERN_WARNING EFIKA_PLATFORM_NAME
 		       ": Can't allocate PCI controller structure for %s\n",
 		       pcictrl->full_name);
-		return;
+		goto out_put;
 	}
 
 	hose->first_busno = bus_range[0];
@@ -124,6 +124,9 @@ static void __init efika_pcisetup(void)
 	hose->ops = &rtas_pci_ops;
 
 	pci_process_bridge_OF_ranges(hose, pcictrl, 0);
+	return;
+out_put:
+	of_node_put(pcictrl);
 }
 
 #else
-- 
cgit v1.2.3


From fa32154e47a203688453e53c1369fcbc63b06a21 Mon Sep 17 00:00:00 2001
From: Eric Millbrandt <emillbrandt@dekaresearch.com>
Date: Fri, 3 Sep 2010 13:27:38 -0400
Subject: powerpc/5200: tighten up ac97 reset timing

Tighten up time timing around the gpio reset functionality.  Add a 200ns
delay before remuxing the pins back to ac97 to comply with the ac97 spec.

Signed-off-by: Eric Millbrandt <emillbrandt@dekaresearch.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/platforms/52xx/mpc52xx_common.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c
index 6e905314ad5d..41f3a7eda1de 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_common.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c
@@ -325,12 +325,16 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number)
 	clrbits32(&simple_gpio->simple_dvo, sync | out);
 	clrbits8(&wkup_gpio->wkup_dvo, reset);
 
-	/* wait at lease 1 us */
-	udelay(2);
+	/* wait for 1 us */
+	udelay(1);
 
 	/* Deassert reset */
 	setbits8(&wkup_gpio->wkup_dvo, reset);
 
+	/* wait at least 200ns */
+	/* 7 ~= (200ns * timebase) / ns2sec */
+	__delay(7);
+
 	/* Restore pin-muxing */
 	out_be32(&simple_gpio->port_config, mux);
 
-- 
cgit v1.2.3


From 51b0fe39549a04858001922919ab355dee9bdfcf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Jun 2010 13:35:57 +0200
Subject: perf: Deconstify struct pmu

sed -ie 's/const struct pmu\>/struct pmu/g' `git grep -l "const struct pmu\>"`

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c         | 8 ++++----
 arch/powerpc/kernel/perf_event_fsl_emb.c | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index d301a30445e0..5f78681ad902 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -857,7 +857,7 @@ static void power_pmu_unthrottle(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-void power_pmu_start_txn(const struct pmu *pmu)
+void power_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
@@ -870,7 +870,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-void power_pmu_cancel_txn(const struct pmu *pmu)
+void power_pmu_cancel_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
@@ -882,7 +882,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-int power_pmu_commit_txn(const struct pmu *pmu)
+int power_pmu_commit_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	long i, n;
@@ -1014,7 +1014,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	u64 ev;
 	unsigned long flags;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 1ba45471ae43..d7619b5e7a6e 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -428,7 +428,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	u64 ev;
 	struct perf_event *events[MAX_HWEVENTS];
-- 
cgit v1.2.3


From b0a873ebbf87bf38bf70b5e39a7cadc96099fa13 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Jun 2010 13:35:08 +0200
Subject: perf: Register PMU implementations

Simple registration interface for struct pmu, this provides the
infrastructure for removing all the weak functions.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c         | 46 +++++++++++++++++---------------
 arch/powerpc/kernel/perf_event_fsl_emb.c | 37 ++++++++++++-------------
 2 files changed, 43 insertions(+), 40 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5f78681ad902..19131b2614b9 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu)
 	return 0;
 }
 
-struct pmu power_pmu = {
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
-	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
-	.start_txn	= power_pmu_start_txn,
-	.cancel_txn	= power_pmu_cancel_txn,
-	.commit_txn	= power_pmu_commit_txn,
-};
-
 /*
  * Return 1 if we might be able to put event on a limited PMC,
  * or 0 if not.
@@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-struct pmu *hw_perf_event_init(struct perf_event *event)
+static int power_pmu_event_init(struct perf_event *event)
 {
 	u64 ev;
 	unsigned long flags;
@@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	struct cpu_hw_events *cpuhw;
 
 	if (!ppmu)
-		return ERR_PTR(-ENXIO);
+		return -ENOENT;
+
 	switch (event->attr.type) {
 	case PERF_TYPE_HARDWARE:
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
+			return -EOPNOTSUPP;
 		ev = ppmu->generic_events[ev];
 		break;
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
-			return ERR_PTR(err);
+			return err;
 		break;
 	case PERF_TYPE_RAW:
 		ev = event->attr.config;
 		break;
 	default:
-		return ERR_PTR(-EINVAL);
+		return -ENOENT;
 	}
+
 	event->hw.config_base = ev;
 	event->hw.idx = 0;
 
@@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 			 */
 			ev = normal_pmc_alternative(ev, flags);
 			if (!ev)
-				return ERR_PTR(-EINVAL);
+				return -EINVAL;
 		}
 	}
 
@@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		n = collect_events(event->group_leader, ppmu->n_counter - 1,
 				   ctrs, events, cflags);
 		if (n < 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 	events[n] = ev;
 	ctrs[n] = event;
 	cflags[n] = flags;
 	if (check_excludes(ctrs, cflags, n, 1))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	cpuhw = &get_cpu_var(cpu_hw_events);
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 	put_cpu_var(cpu_hw_events);
 	if (err)
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	event->hw.config = events[n];
 	event->hw.event_base = cflags[n];
@@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	}
 	event->destroy = hw_perf_event_destroy;
 
-	if (err)
-		return ERR_PTR(err);
-	return &power_pmu;
+	return err;
 }
 
+struct pmu power_pmu = {
+	.event_init	= power_pmu_event_init,
+	.enable		= power_pmu_enable,
+	.disable	= power_pmu_disable,
+	.read		= power_pmu_read,
+	.unthrottle	= power_pmu_unthrottle,
+	.start_txn	= power_pmu_start_txn,
+	.cancel_txn	= power_pmu_cancel_txn,
+	.commit_txn	= power_pmu_commit_txn,
+};
+
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu)
 		freeze_events_kernel = MMCR0_FCHV;
 #endif /* CONFIG_PPC64 */
 
+	perf_pmu_register(&power_pmu);
 	perf_cpu_notifier(power_pmu_notifier);
 
 	return 0;
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index d7619b5e7a6e..ea6a804e43fd 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	local_irq_restore(flags);
 }
 
-static struct pmu fsl_emb_pmu = {
-	.enable		= fsl_emb_pmu_enable,
-	.disable	= fsl_emb_pmu_disable,
-	.read		= fsl_emb_pmu_read,
-	.unthrottle	= fsl_emb_pmu_unthrottle,
-};
-
 /*
  * Release the PMU if this is the last perf_event.
  */
@@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp)
 	return 0;
 }
 
-struct pmu *hw_perf_event_init(struct perf_event *event)
+static int fsl_emb_pmu_event_init(struct perf_event *event)
 {
 	u64 ev;
 	struct perf_event *events[MAX_HWEVENTS];
@@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	case PERF_TYPE_HARDWARE:
 		ev = event->attr.config;
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
-			return ERR_PTR(-EOPNOTSUPP);
+			return -EOPNOTSUPP;
 		ev = ppmu->generic_events[ev];
 		break;
 
 	case PERF_TYPE_HW_CACHE:
 		err = hw_perf_cache_event(event->attr.config, &ev);
 		if (err)
-			return ERR_PTR(err);
+			return err;
 		break;
 
 	case PERF_TYPE_RAW:
@@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		break;
 
 	default:
-		return ERR_PTR(-EINVAL);
+		return -ENOENT;
 	}
 
 	event->hw.config = ppmu->xlate_event(ev);
 	if (!(event->hw.config & FSL_EMB_EVENT_VALID))
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
 
 	/*
 	 * If this is in a group, check if it can go on with all the
@@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		n = collect_events(event->group_leader,
 		                   ppmu->n_counter - 1, events);
 		if (n < 0)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) {
@@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 		}
 
 		if (num_restricted >= ppmu->n_restricted)
-			return ERR_PTR(-EINVAL);
+			return -EINVAL;
 	}
 
 	event->hw.idx = -1;
@@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	if (event->attr.exclude_kernel)
 		event->hw.config_base |= PMLCA_FCS;
 	if (event->attr.exclude_idle)
-		return ERR_PTR(-ENOTSUPP);
+		return -ENOTSUPP;
 
 	event->hw.last_period = event->hw.sample_period;
 	local64_set(&event->hw.period_left, event->hw.last_period);
@@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event)
 	}
 	event->destroy = hw_perf_event_destroy;
 
-	if (err)
-		return ERR_PTR(err);
-	return &fsl_emb_pmu;
+	return err;
 }
 
+static struct pmu fsl_emb_pmu = {
+	.event_init	= fsl_emb_pmu_event_init,
+	.enable		= fsl_emb_pmu_enable,
+	.disable	= fsl_emb_pmu_disable,
+	.read		= fsl_emb_pmu_read,
+	.unthrottle	= fsl_emb_pmu_unthrottle,
+};
+
 /*
  * A counter has overflowed; update its count and record
  * things if requested.  Note that interrupts are hard-disabled
@@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu)
 	pr_info("%s performance monitor hardware support registered\n",
 		pmu->name);
 
+	perf_pmu_register(&fsl_emb_pmu);
+
 	return 0;
 }
-- 
cgit v1.2.3


From 24cd7f54a0d47e1d5b3de29e2456bfbd2d8447b7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 11 Jun 2010 17:32:03 +0200
Subject: perf: Reduce perf_disable() usage

Since the current perf_disable() usage is only an optimization,
remove it for now. This eases the removal of the __weak
hw_perf_enable() interface.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c         | 3 +++
 arch/powerpc/kernel/perf_event_fsl_emb.c | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 19131b2614b9..c1408821dbc2 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -861,6 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
+	perf_disable();
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
@@ -875,6 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_enable();
 }
 
 /*
@@ -901,6 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
+	perf_enable();
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index ea6a804e43fd..9bc84a7fd901 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -262,7 +262,7 @@ static int collect_events(struct perf_event *group, int max_count,
 	return n;
 }
 
-/* perf must be disabled, context locked on entry */
+/* context locked on entry */
 static int fsl_emb_pmu_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuhw;
@@ -271,6 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	u64 val;
 	int i;
 
+	perf_disable();
 	cpuhw = &get_cpu_var(cpu_hw_events);
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -310,15 +311,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	ret = 0;
  out:
 	put_cpu_var(cpu_hw_events);
+	perf_enable();
 	return ret;
 }
 
-/* perf must be disabled, context locked on entry */
+/* context locked on entry */
 static void fsl_emb_pmu_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
 
+	perf_disable();
 	if (i < 0)
 		goto out;
 
@@ -346,6 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	cpuhw->n_events--;
 
  out:
+	perf_enable();
 	put_cpu_var(cpu_hw_events);
 }
 
-- 
cgit v1.2.3


From 33696fc0d141bbbcb12f75b69608ea83282e3117 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 14 Jun 2010 08:49:00 +0200
Subject: perf: Per PMU disable

Changes perf_disable() into perf_pmu_disable().

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c         | 24 +++++++++++++-----------
 arch/powerpc/kernel/perf_event_fsl_emb.c | 18 ++++++++++--------
 2 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index c1408821dbc2..deb84bbcb0e6 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -517,7 +517,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-void hw_perf_disable(void)
+static void power_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -565,7 +565,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-void hw_perf_enable(void)
+static void power_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct perf_event *event;
 	struct cpu_hw_events *cpuhw;
@@ -735,7 +735,7 @@ static int power_pmu_enable(struct perf_event *event)
 	int ret = -EAGAIN;
 
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	/*
 	 * Add the event to the list (if there is room)
@@ -769,7 +769,7 @@ nocheck:
 
 	ret = 0;
  out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 	return ret;
 }
@@ -784,7 +784,7 @@ static void power_pmu_disable(struct perf_event *event)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 
 	power_pmu_read(event);
 
@@ -821,7 +821,7 @@ static void power_pmu_disable(struct perf_event *event)
 		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
 	}
 
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -837,7 +837,7 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	if (!event->hw.idx || !event->hw.sample_period)
 		return;
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	power_pmu_read(event);
 	left = event->hw.sample_period;
 	event->hw.last_period = left;
@@ -848,7 +848,7 @@ static void power_pmu_unthrottle(struct perf_event *event)
 	local64_set(&event->hw.prev_count, val);
 	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -861,7 +861,7 @@ void power_pmu_start_txn(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
-	perf_disable();
+	perf_pmu_disable(pmu);
 	cpuhw->group_flag |= PERF_EVENT_TXN;
 	cpuhw->n_txn_start = cpuhw->n_events;
 }
@@ -876,7 +876,7 @@ void power_pmu_cancel_txn(struct pmu *pmu)
 	struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 }
 
 /*
@@ -903,7 +903,7 @@ int power_pmu_commit_txn(struct pmu *pmu)
 		cpuhw->event[i]->hw.config = cpuhw->events[i];
 
 	cpuhw->group_flag &= ~PERF_EVENT_TXN;
-	perf_enable();
+	perf_pmu_enable(pmu);
 	return 0;
 }
 
@@ -1131,6 +1131,8 @@ static int power_pmu_event_init(struct perf_event *event)
 }
 
 struct pmu power_pmu = {
+	.pmu_enable	= power_pmu_pmu_enable,
+	.pmu_disable	= power_pmu_pmu_disable,
 	.event_init	= power_pmu_event_init,
 	.enable		= power_pmu_enable,
 	.disable	= power_pmu_disable,
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 9bc84a7fd901..84b1974c628f 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -177,7 +177,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-void hw_perf_disable(void)
+static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -216,7 +216,7 @@ void hw_perf_disable(void)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-void hw_perf_enable(void)
+static void fsl_emb_pmu_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -271,7 +271,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	u64 val;
 	int i;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	cpuhw = &get_cpu_var(cpu_hw_events);
 
 	if (event->hw.config & FSL_EMB_EVENT_RESTRICTED)
@@ -311,7 +311,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 	ret = 0;
  out:
 	put_cpu_var(cpu_hw_events);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	return ret;
 }
 
@@ -321,7 +321,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
 
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	if (i < 0)
 		goto out;
 
@@ -349,7 +349,7 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	cpuhw->n_events--;
 
  out:
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	put_cpu_var(cpu_hw_events);
 }
 
@@ -367,7 +367,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	if (event->hw.idx < 0 || !event->hw.sample_period)
 		return;
 	local_irq_save(flags);
-	perf_disable();
+	perf_pmu_disable(event->pmu);
 	fsl_emb_pmu_read(event);
 	left = event->hw.sample_period;
 	event->hw.last_period = left;
@@ -378,7 +378,7 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
 	local64_set(&event->hw.prev_count, val);
 	local64_set(&event->hw.period_left, left);
 	perf_event_update_userpage(event);
-	perf_enable();
+	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
 }
 
@@ -524,6 +524,8 @@ static int fsl_emb_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu fsl_emb_pmu = {
+	.pmu_enable	= fsl_emb_pmu_pmu_enable,
+	.pmu_disable	= fsl_emb_pmu_pmu_disable,
 	.event_init	= fsl_emb_pmu_event_init,
 	.enable		= fsl_emb_pmu_enable,
 	.disable	= fsl_emb_pmu_disable,
-- 
cgit v1.2.3


From a4eaf7f14675cb512d69f0c928055e73d0c6d252 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 16 Jun 2010 14:37:10 +0200
Subject: perf: Rework the PMU methods

Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.

The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.

This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).

It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).

The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:

 1) We disable the counter:
    a) the pmu has per-counter enable bits, we flip that
    b) we program a NOP event, preserving the counter state

 2) We store the counter state and ignore all read/overflow events

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c         | 105 +++++++++++++++++++-----------
 arch/powerpc/kernel/perf_event_fsl_emb.c | 107 +++++++++++++++++++------------
 2 files changed, 134 insertions(+), 78 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index deb84bbcb0e6..9cb4924b6c07 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
 
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	if (!event->hw.idx)
 		return;
 	/*
@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-static void power_pmu_pmu_disable(struct pmu *pmu)
+static void power_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -565,7 +568,7 @@ static void power_pmu_pmu_disable(struct pmu *pmu)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-static void power_pmu_pmu_enable(struct pmu *pmu)
+static void power_pmu_enable(struct pmu *pmu)
 {
 	struct perf_event *event;
 	struct cpu_hw_events *cpuhw;
@@ -672,6 +675,8 @@ static void power_pmu_pmu_enable(struct pmu *pmu)
 		}
 		local64_set(&event->hw.prev_count, val);
 		event->hw.idx = idx;
+		if (event->hw.state & PERF_HES_STOPPED)
+			val = 0;
 		write_pmc(idx, val);
 		perf_event_update_userpage(event);
 	}
@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
  * re-enable the PMU in order to get hw_perf_enable to do the
  * actual work of reconfiguring the PMU.
  */
-static int power_pmu_enable(struct perf_event *event)
+static int power_pmu_add(struct perf_event *event, int ef_flags)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
 	cpuhw->events[n0] = event->hw.config;
 	cpuhw->flags[n0] = event->hw.event_base;
 
+	if (!(ef_flags & PERF_EF_START))
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+
 	/*
 	 * If group events scheduling transaction was started,
 	 * skip the schedulability test here, it will be peformed
@@ -777,7 +785,7 @@ nocheck:
 /*
  * Remove a event from the PMU.
  */
-static void power_pmu_disable(struct perf_event *event)
+static void power_pmu_del(struct perf_event *event, int ef_flags)
 {
 	struct cpu_hw_events *cpuhw;
 	long i;
@@ -826,27 +834,53 @@ static void power_pmu_disable(struct perf_event *event)
 }
 
 /*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
+ * POWER-PMU does not support disabling individual counters, hence
+ * program their cycle counter to their max value and ignore the interrupts.
  */
-static void power_pmu_unthrottle(struct perf_event *event)
+
+static void power_pmu_start(struct perf_event *event, int ef_flags)
 {
-	s64 val, left;
 	unsigned long flags;
+	s64 left;
 
 	if (!event->hw.idx || !event->hw.sample_period)
 		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	write_pmc(event->hw.idx, left);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void power_pmu_stop(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+
+	if (!event->hw.idx || !event->hw.sample_period)
+		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	local_irq_save(flags);
 	perf_pmu_disable(event->pmu);
+
 	power_pmu_read(event);
-	left = event->hw.sample_period;
-	event->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
 	perf_event_update_userpage(event);
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
@@ -1131,13 +1165,14 @@ static int power_pmu_event_init(struct perf_event *event)
 }
 
 struct pmu power_pmu = {
-	.pmu_enable	= power_pmu_pmu_enable,
-	.pmu_disable	= power_pmu_pmu_disable,
+	.pmu_enable	= power_pmu_enable,
+	.pmu_disable	= power_pmu_disable,
 	.event_init	= power_pmu_event_init,
-	.enable		= power_pmu_enable,
-	.disable	= power_pmu_disable,
+	.add		= power_pmu_add,
+	.del		= power_pmu_del,
+	.start		= power_pmu_start,
+	.stop		= power_pmu_stop,
 	.read		= power_pmu_read,
-	.unthrottle	= power_pmu_unthrottle,
 	.start_txn	= power_pmu_start_txn,
 	.cancel_txn	= power_pmu_cancel_txn,
 	.commit_txn	= power_pmu_commit_txn,
@@ -1155,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	s64 prev, delta, left;
 	int record = 0;
 
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
@@ -1177,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			val = 0x80000000LL - left;
 	}
 
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
 	/*
 	 * Finally record data if requested.
 	 */
@@ -1189,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		if (event->attr.sample_type & PERF_SAMPLE_ADDR)
 			perf_get_data_addr(regs, &data.addr);
 
-		if (perf_event_overflow(event, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the event to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each event counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
+		if (perf_event_overflow(event, nmi, &data, regs))
+			power_pmu_stop(event, 0);
 	}
-
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
-	perf_event_update_userpage(event);
 }
 
 /*
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index 84b1974c628f..7ecca59ddf77 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
 {
 	s64 val, delta, prev;
 
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	/*
 	 * Performance monitor interrupts come even when interrupts
 	 * are soft-disabled, as long as interrupts are hard-enabled.
@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
  * Disable all events to prevent PMU interrupts and to allow
  * events to be added or removed.
  */
-static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
+static void fsl_emb_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -216,7 +219,7 @@ static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
  * If we were previously disabled and events were added, then
  * put the new config on the PMU.
  */
-static void fsl_emb_pmu_pmu_enable(struct pmu *pmu)
+static void fsl_emb_pmu_enable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
 	unsigned long flags;
@@ -263,7 +266,7 @@ static int collect_events(struct perf_event *group, int max_count,
 }
 
 /* context locked on entry */
-static int fsl_emb_pmu_enable(struct perf_event *event)
+static int fsl_emb_pmu_add(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuhw;
 	int ret = -EAGAIN;
@@ -302,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 			val = 0x80000000L - left;
 	}
 	local64_set(&event->hw.prev_count, val);
+
+	if (!(flags & PERF_EF_START)) {
+		event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
+		val = 0;
+	}
+
 	write_pmc(i, val);
 	perf_event_update_userpage(event);
 
@@ -316,7 +325,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
 }
 
 /* context locked on entry */
-static void fsl_emb_pmu_disable(struct perf_event *event)
+static void fsl_emb_pmu_del(struct perf_event *event, int flags)
 {
 	struct cpu_hw_events *cpuhw;
 	int i = event->hw.idx;
@@ -353,30 +362,49 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
 	put_cpu_var(cpu_hw_events);
 }
 
-/*
- * Re-enable interrupts on a event after they were throttled
- * because they were coming too fast.
- *
- * Context is locked on entry, but perf is not disabled.
- */
-static void fsl_emb_pmu_unthrottle(struct perf_event *event)
+static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
+{
+	unsigned long flags;
+	s64 left;
+
+	if (event->hw.idx < 0 || !event->hw.sample_period)
+		return;
+
+	if (!(event->hw.state & PERF_HES_STOPPED))
+		return;
+
+	if (ef_flags & PERF_EF_RELOAD)
+		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
+
+	local_irq_save(flags);
+	perf_pmu_disable(event->pmu);
+
+	event->hw.state = 0;
+	left = local64_read(&event->hw.period_left);
+	write_pmc(event->hw.idx, left);
+
+	perf_event_update_userpage(event);
+	perf_pmu_enable(event->pmu);
+	local_irq_restore(flags);
+}
+
+static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
 {
-	s64 val, left;
 	unsigned long flags;
 
 	if (event->hw.idx < 0 || !event->hw.sample_period)
 		return;
+
+	if (event->hw.state & PERF_HES_STOPPED)
+		return;
+
 	local_irq_save(flags);
 	perf_pmu_disable(event->pmu);
+
 	fsl_emb_pmu_read(event);
-	left = event->hw.sample_period;
-	event->hw.last_period = left;
-	val = 0;
-	if (left < 0x80000000L)
-		val = 0x80000000L - left;
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
+	event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
+	write_pmc(event->hw.idx, 0);
+
 	perf_event_update_userpage(event);
 	perf_pmu_enable(event->pmu);
 	local_irq_restore(flags);
@@ -524,13 +552,14 @@ static int fsl_emb_pmu_event_init(struct perf_event *event)
 }
 
 static struct pmu fsl_emb_pmu = {
-	.pmu_enable	= fsl_emb_pmu_pmu_enable,
-	.pmu_disable	= fsl_emb_pmu_pmu_disable,
+	.pmu_enable	= fsl_emb_pmu_enable,
+	.pmu_disable	= fsl_emb_pmu_disable,
 	.event_init	= fsl_emb_pmu_event_init,
-	.enable		= fsl_emb_pmu_enable,
-	.disable	= fsl_emb_pmu_disable,
+	.add		= fsl_emb_pmu_add,
+	.del		= fsl_emb_pmu_del,
+	.start		= fsl_emb_pmu_start,
+	.stop		= fsl_emb_pmu_stop,
 	.read		= fsl_emb_pmu_read,
-	.unthrottle	= fsl_emb_pmu_unthrottle,
 };
 
 /*
@@ -545,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 	s64 prev, delta, left;
 	int record = 0;
 
+	if (event->hw.state & PERF_HES_STOPPED) {
+		write_pmc(event->hw.idx, 0);
+		return;
+	}
+
 	/* we don't have to worry about interrupts here */
 	prev = local64_read(&event->hw.prev_count);
 	delta = (val - prev) & 0xfffffffful;
@@ -567,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 			val = 0x80000000LL - left;
 	}
 
+	write_pmc(event->hw.idx, val);
+	local64_set(&event->hw.prev_count, val);
+	local64_set(&event->hw.period_left, left);
+	perf_event_update_userpage(event);
+
 	/*
 	 * Finally record data if requested.
 	 */
@@ -576,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
 		perf_sample_data_init(&data, 0);
 		data.period = event->hw.last_period;
 
-		if (perf_event_overflow(event, nmi, &data, regs)) {
-			/*
-			 * Interrupts are coming too fast - throttle them
-			 * by setting the event to 0, so it will be
-			 * at least 2^30 cycles until the next interrupt
-			 * (assuming each event counts at most 2 counts
-			 * per cycle).
-			 */
-			val = 0;
-			left = ~0ULL >> 1;
-		}
+		if (perf_event_overflow(event, nmi, &data, regs))
+			fsl_emb_pmu_stop(event, 0);
 	}
-
-	write_pmc(event->hw.idx, val);
-	local64_set(&event->hw.prev_count, val);
-	local64_set(&event->hw.period_left, left);
-	perf_event_update_userpage(event);
 }
 
 static void perf_event_interrupt(struct pt_regs *regs)
-- 
cgit v1.2.3


From 823108a056c52a83c32ca199a57566a36fad4d19 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sat, 11 Sep 2010 00:08:42 -0700
Subject: powerpc, memblock: Fix memblock API change fallout

Fix memblock API change fallout in the WII code.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
LKML-Reference: <4C8B2AFA.2000705@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/platforms/embedded6xx/wii.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/embedded6xx/wii.c b/arch/powerpc/platforms/embedded6xx/wii.c
index 8450c29e9b2f..649473a729b8 100644
--- a/arch/powerpc/platforms/embedded6xx/wii.c
+++ b/arch/powerpc/platforms/embedded6xx/wii.c
@@ -65,7 +65,7 @@ static int __init page_aligned(unsigned long x)
 
 void __init wii_memory_fixups(void)
 {
-	struct memblock_region *p = memblock.memory.region;
+	struct memblock_region *p = memblock.memory.regions;
 
 	/*
 	 * This is part of a workaround to allow the use of two
-- 
cgit v1.2.3


From fe3b79dfdc34e562b8378fa958a503a2d5b077ff Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 6 Jul 2010 22:40:19 +0200
Subject: spufs: use llseek in all file operations

The default for llseek is changing, so we need
explicit operations everywhere.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jeremy Kerr <jk@ozlabs.org>
Cc: linuxppc-dev@ozlabs.org
---
 arch/powerpc/platforms/cell/spufs/file.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 1a40da92154c..02f7b113a31b 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -154,6 +154,7 @@ static const struct file_operations __fops = {				\
 	.release = spufs_attr_release,					\
 	.read	 = spufs_attr_read,					\
 	.write	 = spufs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
 };
 
 
@@ -521,6 +522,7 @@ static const struct file_operations spufs_cntl_fops = {
 	.release = spufs_cntl_release,
 	.read = simple_attr_read,
 	.write = simple_attr_write,
+	.llseek	= generic_file_llseek,
 	.mmap = spufs_cntl_mmap,
 };
 
@@ -714,6 +716,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_mbox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_read,
+	.llseek	= no_llseek,
 };
 
 static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
@@ -743,6 +746,7 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_mbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_stat_read,
+	.llseek = no_llseek,
 };
 
 /* low-level ibox access function */
@@ -863,6 +867,7 @@ static const struct file_operations spufs_ibox_fops = {
 	.read	= spufs_ibox_read,
 	.poll	= spufs_ibox_poll,
 	.fasync	= spufs_ibox_fasync,
+	.llseek = no_llseek,
 };
 
 static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
@@ -890,6 +895,7 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_ibox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_stat_read,
+	.llseek = no_llseek,
 };
 
 /* low-level mailbox write */
@@ -1011,6 +1017,7 @@ static const struct file_operations spufs_wbox_fops = {
 	.write	= spufs_wbox_write,
 	.poll	= spufs_wbox_poll,
 	.fasync	= spufs_wbox_fasync,
+	.llseek = no_llseek,
 };
 
 static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
@@ -1038,6 +1045,7 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_wbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_wbox_stat_read,
+	.llseek = no_llseek,
 };
 
 static int spufs_signal1_open(struct inode *inode, struct file *file)
@@ -1166,6 +1174,7 @@ static const struct file_operations spufs_signal1_fops = {
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
 };
 
 static const struct file_operations spufs_signal1_nosched_fops = {
@@ -1173,6 +1182,7 @@ static const struct file_operations spufs_signal1_nosched_fops = {
 	.release = spufs_signal1_release,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
 };
 
 static int spufs_signal2_open(struct inode *inode, struct file *file)
@@ -1305,6 +1315,7 @@ static const struct file_operations spufs_signal2_fops = {
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
 };
 
 static const struct file_operations spufs_signal2_nosched_fops = {
@@ -1312,6 +1323,7 @@ static const struct file_operations spufs_signal2_nosched_fops = {
 	.release = spufs_signal2_release,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
 };
 
 /*
@@ -1451,6 +1463,7 @@ static const struct file_operations spufs_mss_fops = {
 	.open	 = spufs_mss_open,
 	.release = spufs_mss_release,
 	.mmap	 = spufs_mss_mmap,
+	.llseek  = no_llseek,
 };
 
 static int
@@ -1508,6 +1521,7 @@ static const struct file_operations spufs_psmap_fops = {
 	.open	 = spufs_psmap_open,
 	.release = spufs_psmap_release,
 	.mmap	 = spufs_psmap_mmap,
+	.llseek  = no_llseek,
 };
 
 
@@ -1871,6 +1885,7 @@ static const struct file_operations spufs_mfc_fops = {
 	.fsync	 = spufs_mfc_fsync,
 	.fasync	 = spufs_mfc_fasync,
 	.mmap	 = spufs_mfc_mmap,
+	.llseek  = no_llseek,
 };
 
 static int spufs_npc_set(void *data, u64 val)
@@ -2246,6 +2261,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_dma_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_dma_info_read,
+	.llseek = no_llseek,
 };
 
 static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
@@ -2299,6 +2315,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_proxydma_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_proxydma_info_read,
+	.llseek = no_llseek,
 };
 
 static int spufs_show_tid(struct seq_file *s, void *private)
@@ -2585,6 +2602,7 @@ static const struct file_operations spufs_switch_log_fops = {
 	.read		= spufs_switch_log_read,
 	.poll		= spufs_switch_log_poll,
 	.release	= spufs_switch_log_release,
+	.llseek		= no_llseek,
 };
 
 /**
-- 
cgit v1.2.3


From 9f5f9ffe50e90ed73040d2100db8bfc341cee352 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 9 Sep 2010 19:02:40 +0000
Subject: powerpc/perf: Fix sampling enable for PPC970

The logic to distinguish marked instruction events from ordinary events
on PPC970 and derivatives was flawed.  The result is that instruction
sampling didn't get enabled in the PMU for some marked instruction
events, so they would never trigger.  This fixes it by adding the
appropriate break statements in the switch statement.

Reported-by: David Binderman <dcb314@hotmail.com>
Cc: stable@kernel.org
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/ppc970-pmu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c
index 8eff48e20dba..3fee685de4df 100644
--- a/arch/powerpc/kernel/ppc970-pmu.c
+++ b/arch/powerpc/kernel/ppc970-pmu.c
@@ -169,9 +169,11 @@ static int p970_marked_instr_event(u64 event)
 	switch (unit) {
 	case PM_VPU:
 		mask = 0x4c;		/* byte 0 bits 2,3,6 */
+		break;
 	case PM_LSU0:
 		/* byte 2 bits 0,2,3,4,6; all of byte 1 */
 		mask = 0x085dff00;
+		break;
 	case PM_LSU1L:
 		mask = 0x50 << 24;	/* byte 3 bits 4,6 */
 		break;
-- 
cgit v1.2.3


From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 5 Oct 2010 11:29:27 -0700
Subject: modules: Fix module_bug_list list corruption race

With all the recent module loading cleanups, we've minimized the code
that sits under module_mutex, fixing various deadlocks and making it
possible to do most of the module loading in parallel.

However, that whole conversion totally missed the rather obscure code
that adds a new module to the list for BUG() handling.  That code was
doubly obscure because (a) the code itself lives in lib/bugs.c (for
dubious reasons) and (b) it gets called from the architecture-specific
"module_finalize()" rather than from generic code.

Calling it from arch-specific code makes no sense what-so-ever to begin
with, and is now actively wrong since that code isn't protected by the
module loading lock any more.

So this commit moves the "module_bug_{finalize,cleanup}()" calls away
from the arch-specific code, and into the generic code - and in the
process protects it with the module_mutex so that the list operations
are now safe.

Future fixups:
 - move the module list handling code into kernel/module.c where it
   belongs.
 - get rid of 'module_bug_list' and just use the regular list of modules
   (called 'modules' - imagine that) that we already create and maintain
   for other reasons.

Reported-and-tested-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Adrian Bunk <bunk@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/module.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 477c663e0140..4ef93ae2235f 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const Elf_Shdr *sect;
 	int err;
 
-	err = module_bug_finalize(hdr, sechdrs, me);
-	if (err)
-		return err;
-
 	/* Apply feature fixups */
 	sect = find_section(hdr, sechdrs, "__ftr_fixup");
 	if (sect != NULL)
@@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr,
 
 void module_arch_cleanup(struct module *mod)
 {
-	module_bug_cleanup(mod);
 }
-- 
cgit v1.2.3


From 7c6d45e665d5322401e4439060bbf758b08422d4 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 6 Oct 2010 11:06:44 +1100
Subject: powerpc: remove unused variable

Since powerpc uses -Werror on arch powerpc, the build was broken like
this:

  cc1: warnings being treated as errors
  arch/powerpc/kernel/module.c: In function 'module_finalize':
  arch/powerpc/kernel/module.c:66: error: unused variable 'err'

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kernel/module.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index 4ef93ae2235f..49cee9df225b 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -63,7 +63,6 @@ int module_finalize(const Elf_Ehdr *hdr,
 		const Elf_Shdr *sechdrs, struct module *me)
 {
 	const Elf_Shdr *sect;
-	int err;
 
 	/* Apply feature fixups */
 	sect = find_section(hdr, sechdrs, "__ftr_fixup");
-- 
cgit v1.2.3


From df9ee29270c11dba7d0fe0b83ce47a4d8e8d2101 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 7 Oct 2010 14:08:55 +0100
Subject: Fix IRQ flag handling naming

Fix the IRQ flag handling naming.  In linux/irqflags.h under one configuration,
it maps:

	local_irq_enable() -> raw_local_irq_enable()
	local_irq_disable() -> raw_local_irq_disable()
	local_irq_save() -> raw_local_irq_save()
	...

and under the other configuration, it maps:

	raw_local_irq_enable() -> local_irq_enable()
	raw_local_irq_disable() -> local_irq_disable()
	raw_local_irq_save() -> local_irq_save()
	...

This is quite confusing.  There should be one set of names expected of the
arch, and this should be wrapped to give another set of names that are expected
by users of this facility.

Change this to have the arch provide:

	flags = arch_local_save_flags()
	flags = arch_local_irq_save()
	arch_local_irq_restore(flags)
	arch_local_irq_disable()
	arch_local_irq_enable()
	arch_irqs_disabled_flags(flags)
	arch_irqs_disabled()
	arch_safe_halt()

Then linux/irqflags.h wraps these to provide:

	raw_local_save_flags(flags)
	raw_local_irq_save(flags)
	raw_local_irq_restore(flags)
	raw_local_irq_disable()
	raw_local_irq_enable()
	raw_irqs_disabled_flags(flags)
	raw_irqs_disabled()
	raw_safe_halt()

with type checking on the flags 'arguments', and then wraps those to provide:

	local_save_flags(flags)
	local_irq_save(flags)
	local_irq_restore(flags)
	local_irq_disable()
	local_irq_enable()
	irqs_disabled_flags(flags)
	irqs_disabled()
	safe_halt()

with tracing included if enabled.

The arch functions can now all be inline functions rather than some of them
having to be macros.

Signed-off-by: David Howells <dhowells@redhat.com> [X86, FRV, MN10300]
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com> [Tile]
Signed-off-by: Michal Simek <monstr@monstr.eu> [Microblaze]
Tested-by: Catalin Marinas <catalin.marinas@arm.com> [ARM]
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Haavard Skinnemoen <haavard.skinnemoen@atmel.com> [AVR]
Acked-by: Tony Luck <tony.luck@intel.com> [IA-64]
Acked-by: Hirokazu Takata <takata@linux-m32r.org> [M32R]
Acked-by: Greg Ungerer <gerg@uclinux.org> [M68K/M68KNOMMU]
Acked-by: Ralf Baechle <ralf@linux-mips.org> [MIPS]
Acked-by: Kyle McMartin <kyle@mcmartin.ca> [PA-RISC]
Acked-by: Paul Mackerras <paulus@samba.org> [PowerPC]
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> [S390]
Acked-by: Chen Liqin <liqin.chen@sunplusct.com> [Score]
Acked-by: Matt Fleming <matt@console-pimps.org> [SH]
Acked-by: David S. Miller <davem@davemloft.net> [Sparc]
Acked-by: Chris Zankel <chris@zankel.net> [Xtensa]
Reviewed-by: Richard Henderson <rth@twiddle.net> [Alpha]
Reviewed-by: Yoshinori Sato <ysato@users.sourceforge.jp> [H8300]
Cc: starvik@axis.com [CRIS]
Cc: jesper.nilsson@axis.com [CRIS]
Cc: linux-cris-kernel@axis.com
---
 arch/powerpc/include/asm/hw_irq.h    | 113 ++++++++++++++++++++---------------
 arch/powerpc/include/asm/irqflags.h  |   2 +-
 arch/powerpc/kernel/exceptions-64s.S |   4 +-
 arch/powerpc/kernel/irq.c            |   4 +-
 4 files changed, 70 insertions(+), 53 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index bd100fcf40d0..ff08b70b36d4 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -16,42 +16,57 @@ extern void timer_interrupt(struct pt_regs *);
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
 
-static inline unsigned long local_get_flags(void)
+static inline unsigned long arch_local_save_flags(void)
 {
 	unsigned long flags;
 
-	__asm__ __volatile__("lbz %0,%1(13)"
-	: "=r" (flags)
-	: "i" (offsetof(struct paca_struct, soft_enabled)));
+	asm volatile(
+		"lbz %0,%1(13)"
+		: "=r" (flags)
+		: "i" (offsetof(struct paca_struct, soft_enabled)));
 
 	return flags;
 }
 
-static inline unsigned long raw_local_irq_disable(void)
+static inline unsigned long arch_local_irq_disable(void)
 {
 	unsigned long flags, zero;
 
-	__asm__ __volatile__("li %1,0; lbz %0,%2(13); stb %1,%2(13)"
-	: "=r" (flags), "=&r" (zero)
-	: "i" (offsetof(struct paca_struct, soft_enabled))
-	: "memory");
+	asm volatile(
+		"li %1,0; lbz %0,%2(13); stb %1,%2(13)"
+		: "=r" (flags), "=&r" (zero)
+		: "i" (offsetof(struct paca_struct, soft_enabled))
+		: "memory");
 
 	return flags;
 }
 
-extern void raw_local_irq_restore(unsigned long);
+extern void arch_local_irq_restore(unsigned long);
 extern void iseries_handle_interrupts(void);
 
-#define raw_local_irq_enable()		raw_local_irq_restore(1)
-#define raw_local_save_flags(flags)	((flags) = local_get_flags())
-#define raw_local_irq_save(flags)	((flags) = raw_local_irq_disable())
+static inline void arch_local_irq_enable(void)
+{
+	arch_local_irq_restore(1);
+}
+
+static inline unsigned long arch_local_irq_save(void)
+{
+	return arch_local_irq_disable();
+}
+
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
+{
+	return flags == 0;
+}
 
-#define raw_irqs_disabled()		(local_get_flags() == 0)
-#define raw_irqs_disabled_flags(flags)	((flags) == 0)
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
 
 #ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable()	__asm__ __volatile__("wrteei 1": : :"memory");
-#define __hard_irq_disable()	__asm__ __volatile__("wrteei 0": : :"memory");
+#define __hard_irq_enable()	asm volatile("wrteei 1" : : : "memory");
+#define __hard_irq_disable()	asm volatile("wrteei 0" : : : "memory");
 #else
 #define __hard_irq_enable()	__mtmsrd(mfmsr() | MSR_EE, 1)
 #define __hard_irq_disable()	__mtmsrd(mfmsr() & ~MSR_EE, 1)
@@ -64,64 +79,66 @@ extern void iseries_handle_interrupts(void);
 		get_paca()->hard_enabled = 0;	\
 	} while(0)
 
-#else
+#else /* CONFIG_PPC64 */
 
-#if defined(CONFIG_BOOKE)
 #define SET_MSR_EE(x)	mtmsr(x)
-#define raw_local_irq_restore(flags)	__asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory")
+
+static inline unsigned long arch_local_save_flags(void)
+{
+	return mfmsr();
+}
+
+static inline void arch_local_irq_restore(unsigned long flags)
+{
+#if defined(CONFIG_BOOKE)
+	asm volatile("wrtee %0" : : "r" (flags) : "memory");
 #else
-#define SET_MSR_EE(x)	mtmsr(x)
-#define raw_local_irq_restore(flags)	mtmsr(flags)
+	mtmsr(flags);
 #endif
+}
 
-static inline void raw_local_irq_disable(void)
+static inline unsigned long arch_local_irq_save(void)
 {
+	unsigned long flags = arch_local_save_flags();
 #ifdef CONFIG_BOOKE
-	__asm__ __volatile__("wrteei 0": : :"memory");
+	asm volatile("wrteei 0" : : : "memory");
 #else
-	unsigned long msr;
-
-	msr = mfmsr();
-	SET_MSR_EE(msr & ~MSR_EE);
+	SET_MSR_EE(flags & ~MSR_EE);
 #endif
+	return flags;
 }
 
-static inline void raw_local_irq_enable(void)
+static inline void arch_local_irq_disable(void)
 {
 #ifdef CONFIG_BOOKE
-	__asm__ __volatile__("wrteei 1": : :"memory");
+	asm volatile("wrteei 0" : : : "memory");
 #else
-	unsigned long msr;
-
-	msr = mfmsr();
-	SET_MSR_EE(msr | MSR_EE);
+	arch_local_irq_save();
 #endif
 }
 
-static inline void raw_local_irq_save_ptr(unsigned long *flags)
+static inline void arch_local_irq_enable(void)
 {
-	unsigned long msr;
-	msr = mfmsr();
-	*flags = msr;
 #ifdef CONFIG_BOOKE
-	__asm__ __volatile__("wrteei 0": : :"memory");
+	asm volatile("wrteei 1" : : : "memory");
 #else
-	SET_MSR_EE(msr & ~MSR_EE);
+	unsigned long msr = mfmsr();
+	SET_MSR_EE(msr | MSR_EE);
 #endif
 }
 
-#define raw_local_save_flags(flags)	((flags) = mfmsr())
-#define raw_local_irq_save(flags)	raw_local_irq_save_ptr(&flags)
-#define raw_irqs_disabled()		((mfmsr() & MSR_EE) == 0)
-#define raw_irqs_disabled_flags(flags)	(((flags) & MSR_EE) == 0)
-
-#define hard_irq_disable()		raw_local_irq_disable()
-
-static inline int irqs_disabled_flags(unsigned long flags)
+static inline bool arch_irqs_disabled_flags(unsigned long flags)
 {
 	return (flags & MSR_EE) == 0;
 }
 
+static inline bool arch_irqs_disabled(void)
+{
+	return arch_irqs_disabled_flags(arch_local_save_flags());
+}
+
+#define hard_irq_disable()		arch_local_irq_disable()
+
 #endif /* CONFIG_PPC64 */
 
 /*
diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 5f68ecfdf516..b85d8ddbb666 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -6,7 +6,7 @@
 
 #ifndef __ASSEMBLY__
 /*
- * Get definitions for raw_local_save_flags(x), etc.
+ * Get definitions for arch_local_save_flags(x), etc.
  */
 #include <asm/hw_irq.h>
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f53029a01554..39b0c48872d2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -818,12 +818,12 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
 
 	/*
 	 * hash_page couldn't handle it, set soft interrupt enable back
-	 * to what it was before the trap.  Note that .raw_local_irq_restore
+	 * to what it was before the trap.  Note that .arch_local_irq_restore
 	 * handles any interrupts pending at this point.
 	 */
 	ld	r3,SOFTE(r1)
 	TRACE_AND_RESTORE_IRQ_PARTIAL(r3, 11f)
-	bl	.raw_local_irq_restore
+	bl	.arch_local_irq_restore
 	b	11f
 
 /* We have a data breakpoint exception - handle it */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4a65386995d7..1903290f5469 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -116,7 +116,7 @@ static inline notrace void set_soft_enabled(unsigned long enable)
 	: : "r" (enable), "i" (offsetof(struct paca_struct, soft_enabled)));
 }
 
-notrace void raw_local_irq_restore(unsigned long en)
+notrace void arch_local_irq_restore(unsigned long en)
 {
 	/*
 	 * get_paca()->soft_enabled = en;
@@ -192,7 +192,7 @@ notrace void raw_local_irq_restore(unsigned long en)
 
 	__hard_irq_enable();
 }
-EXPORT_SYMBOL(raw_local_irq_restore);
+EXPORT_SYMBOL(arch_local_irq_restore);
 #endif /* CONFIG_PPC64 */
 
 static int show_other_interrupts(struct seq_file *p, int prec)
-- 
cgit v1.2.3


From f14362d1fe81cece6f1d78483e5bbfcf8cc497bf Mon Sep 17 00:00:00 2001
From: Ian Munsie <imunsie@au1.ibm.com>
Date: Fri, 1 Oct 2010 17:06:07 +1000
Subject: powerpc, of_serial: Endianness issues setting up the serial ports

The speed and clock of the serial ports is retrieved from the device
tree in both the PowerPC legacy serial code and the Open Firmware serial
driver, therefore they need to handle the fact that the device tree is
always big endian, while the CPU may not be.

Also fix other device tree references in the legacy serial code.

Signed-off-by: Ian Munsie <imunsie@au1.ibm.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/kernel/legacy_serial.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index c1fd0f9658fd..c834757bebc0 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -52,14 +52,14 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 				  phys_addr_t taddr, unsigned long irq,
 				  upf_t flags, int irq_check_parent)
 {
-	const u32 *clk, *spd;
+	const __be32 *clk, *spd;
 	u32 clock = BASE_BAUD * 16;
 	int index;
 
 	/* get clock freq. if present */
 	clk = of_get_property(np, "clock-frequency", NULL);
 	if (clk && *clk)
-		clock = *clk;
+		clock = be32_to_cpup(clk);
 
 	/* get default speed if present */
 	spd = of_get_property(np, "current-speed", NULL);
@@ -109,7 +109,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index,
 	legacy_serial_infos[index].taddr = taddr;
 	legacy_serial_infos[index].np = of_node_get(np);
 	legacy_serial_infos[index].clock = clock;
-	legacy_serial_infos[index].speed = spd ? *spd : 0;
+	legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0;
 	legacy_serial_infos[index].irq_check_parent = irq_check_parent;
 
 	printk(KERN_DEBUG "Found legacy serial port %d for %s\n",
@@ -168,7 +168,7 @@ static int __init add_legacy_soc_port(struct device_node *np,
 static int __init add_legacy_isa_port(struct device_node *np,
 				      struct device_node *isa_brg)
 {
-	const u32 *reg;
+	const __be32 *reg;
 	const char *typep;
 	int index = -1;
 	u64 taddr;
@@ -181,7 +181,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 		return -1;
 
 	/* Verify it's an IO port, we don't support anything else */
-	if (!(reg[0] & 0x00000001))
+	if (!(be32_to_cpu(reg[0]) & 0x00000001))
 		return -1;
 
 	/* Now look for an "ibm,aix-loc" property that gives us ordering
@@ -202,7 +202,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
 		taddr = 0;
 
 	/* Add port, irq will be dealt with later */
-	return add_legacy_port(np, index, UPIO_PORT, reg[1], taddr,
+	return add_legacy_port(np, index, UPIO_PORT, be32_to_cpu(reg[1]), taddr,
 			       NO_IRQ, UPF_BOOT_AUTOCONF, 0);
 
 }
@@ -251,9 +251,9 @@ static int __init add_legacy_pci_port(struct device_node *np,
 	 * we get to their "reg" property
 	 */
 	if (np != pci_dev) {
-		const u32 *reg = of_get_property(np, "reg", NULL);
-		if (reg && (*reg < 4))
-			index = lindex = *reg;
+		const __be32 *reg = of_get_property(np, "reg", NULL);
+		if (reg && (be32_to_cpup(reg) < 4))
+			index = lindex = be32_to_cpup(reg);
 	}
 
 	/* Local index means it's the Nth port in the PCI chip. Unfortunately
@@ -507,7 +507,7 @@ static int __init check_legacy_serial_console(void)
 	struct device_node *prom_stdout = NULL;
 	int i, speed = 0, offset = 0;
 	const char *name;
-	const u32 *spd;
+	const __be32 *spd;
 
 	DBG(" -> check_legacy_serial_console()\n");
 
@@ -547,7 +547,7 @@ static int __init check_legacy_serial_console(void)
 	}
 	spd = of_get_property(prom_stdout, "current-speed", NULL);
 	if (spd)
-		speed = *spd;
+		speed = be32_to_cpup(spd);
 
 	if (strcmp(name, "serial") != 0)
 		goto not_found;
-- 
cgit v1.2.3


From 5b8544c38e6fde6968645afd46ff681492192b86 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 8 Oct 2010 10:37:31 -0500
Subject: powerpc/ppc64e: Fix link problem when building ppc64e_defconfig

arch/powerpc/platforms/built-in.o:(.toc1+0x18): undefined reference to `__early_start'

This is due to the 85xx/smp.c not handling the 64-bit side properly.  We
need to set the entry point for secondary cores on ppc64e to
generic_secondary_smp_init instead of __early_start that we due on ppc32.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/smp.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index a6b106557be4..bd38b6a240de 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -79,6 +79,7 @@ smp_85xx_kick_cpu(int nr)
 	local_irq_save(flags);
 
 	out_be32(bptr_vaddr + BOOT_ENTRY_PIR, nr);
+#ifdef CONFIG_PPC32
 	out_be32(bptr_vaddr + BOOT_ENTRY_ADDR_LOWER, __pa(__early_start));
 
 	if (!ioremappable)
@@ -88,6 +89,12 @@ smp_85xx_kick_cpu(int nr)
 	/* Wait a bit for the CPU to ack. */
 	while ((__secondary_hold_acknowledge != nr) && (++n < 1000))
 		mdelay(1);
+#else
+	out_be64((u64 *)(bptr_vaddr + BOOT_ENTRY_ADDR_UPPER),
+		__pa((u64)*((unsigned long long *) generic_secondary_smp_init)));
+
+	smp_generic_kick_cpu(nr);
+#endif
 
 	local_irq_restore(flags);
 
-- 
cgit v1.2.3


From 1c9db52534a2c0e9776788cd34ccc193289fc18c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 28 Sep 2010 16:46:51 +0200
Subject: pci: Convert msi to new irq_chip functions

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Russell King <linux@arm.linux.org.uk>
---
 arch/powerpc/platforms/cell/axon_msi.c |  6 +++---
 arch/powerpc/platforms/pseries/xics.c  |  2 +-
 arch/powerpc/sysdev/fsl_msi.c          |  4 ++--
 arch/powerpc/sysdev/mpic_pasemi_msi.c  | 22 +++++++++++-----------
 arch/powerpc/sysdev/mpic_u3msi.c       | 18 +++++++++---------
 5 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 97085530aa63..e3e379c6caa7 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -310,9 +310,9 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
 }
 
 static struct irq_chip msic_irq_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
-	.shutdown	= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
+	.irq_shutdown	= mask_msi_irq,
 	.name		= "AXON-MSI",
 };
 
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 93834b0d8272..67e2c4bdac8f 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -243,7 +243,7 @@ static unsigned int xics_startup(unsigned int virq)
 	 * at that level, so we do it here by hand.
 	 */
 	if (irq_to_desc(virq)->msi_desc)
-		unmask_msi_irq(virq);
+		unmask_msi_irq(irq_get_irq_data(virq));
 
 	/* unmask it */
 	xics_unmask_irq(virq);
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 87991d3abbab..bdbd896c89d8 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -51,8 +51,8 @@ static void fsl_msi_end_irq(unsigned int virq)
 }
 
 static struct irq_chip fsl_msi_chip = {
-	.mask		= mask_msi_irq,
-	.unmask		= unmask_msi_irq,
+	.irq_mask	= mask_msi_irq,
+	.irq_unmask	= unmask_msi_irq,
 	.ack		= fsl_msi_end_irq,
 	.name		= "FSL-MSI",
 };
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c
index 3b6a9a43718f..320ad5a9a25d 100644
--- a/arch/powerpc/sysdev/mpic_pasemi_msi.c
+++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c
@@ -39,24 +39,24 @@
 static struct mpic *msi_mpic;
 
 
-static void mpic_pasemi_msi_mask_irq(unsigned int irq)
+static void mpic_pasemi_msi_mask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_mask_irq %d\n", irq);
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	pr_debug("mpic_pasemi_msi_mask_irq %d\n", data->irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }
 
-static void mpic_pasemi_msi_unmask_irq(unsigned int irq)
+static void mpic_pasemi_msi_unmask_irq(struct irq_data *data)
 {
-	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", irq);
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	pr_debug("mpic_pasemi_msi_unmask_irq %d\n", data->irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }
 
 static struct irq_chip mpic_pasemi_msi_chip = {
-	.shutdown	= mpic_pasemi_msi_mask_irq,
-	.mask		= mpic_pasemi_msi_mask_irq,
-	.unmask		= mpic_pasemi_msi_unmask_irq,
+	.irq_shutdown	= mpic_pasemi_msi_mask_irq,
+	.irq_mask	= mpic_pasemi_msi_mask_irq,
+	.irq_unmask	= mpic_pasemi_msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index bcbfe79c704b..a2b028b4a202 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -23,22 +23,22 @@
 /* A bit ugly, can we get this from the pci_dev somehow? */
 static struct mpic *msi_mpic;
 
-static void mpic_u3msi_mask_irq(unsigned int irq)
+static void mpic_u3msi_mask_irq(struct irq_data *data)
 {
-	mask_msi_irq(irq);
-	mpic_mask_irq(irq);
+	mask_msi_irq(data);
+	mpic_mask_irq(data->irq);
 }
 
-static void mpic_u3msi_unmask_irq(unsigned int irq)
+static void mpic_u3msi_unmask_irq(struct irq_data *data)
 {
-	mpic_unmask_irq(irq);
-	unmask_msi_irq(irq);
+	mpic_unmask_irq(data->irq);
+	unmask_msi_irq(data);
 }
 
 static struct irq_chip mpic_u3msi_chip = {
-	.shutdown	= mpic_u3msi_mask_irq,
-	.mask		= mpic_u3msi_mask_irq,
-	.unmask		= mpic_u3msi_unmask_irq,
+	.irq_shutdown	= mpic_u3msi_mask_irq,
+	.irq_mask	= mpic_u3msi_mask_irq,
+	.irq_unmask	= mpic_u3msi_unmask_irq,
 	.eoi		= mpic_end_irq,
 	.set_type	= mpic_set_irq_type,
 	.set_affinity	= mpic_set_affinity,
-- 
cgit v1.2.3


From c7fc2de0c83dbd2eaf759c5cd0e2b9cf1eb4df3a Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 12 Oct 2010 14:07:09 -0700
Subject: memblock, bootmem: Round pfn properly for memory and reserved regions

We need to round memory regions correctly -- specifically, we need to
round reserved region in the more expansive direction (lower limit
down, upper limit up) whereas usable memory regions need to be rounded
in the more restrictive direction (lower limit up, upper limit down).

This introduces two set of inlines:

	memblock_region_memory_base_pfn()
	memblock_region_memory_end_pfn()
	memblock_region_reserved_base_pfn()
	memblock_region_reserved_end_pfn()

Although they are antisymmetric (and therefore are technically
duplicates) the use of the different inlines explicitly documents the
programmer's intention.

The lack of proper rounding caused a bug on ARM, which was then found
to also affect other architectures.

Reported-by: Russell King <rmk@arm.linux.org.uk>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4CB4CDFD.4020105@kernel.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/powerpc/mm/mem.c  | 14 +++++++-------
 arch/powerpc/mm/numa.c |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f661f6c527da..a66499650909 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -148,8 +148,8 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
 	int ret = -1;
 
 	for_each_memblock(memory, reg) {
-		tstart = max(start_pfn, memblock_region_base_pfn(reg));
-		tend = min(end_pfn, memblock_region_end_pfn(reg));
+		tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
+		tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
 		if (tstart >= tend)
 			continue;
 		ret = (*func)(tstart, tend - tstart, arg);
@@ -195,8 +195,8 @@ void __init do_init_bootmem(void)
 	/* Add active regions with valid PFNs */
 	for_each_memblock(memory, reg) {
 		unsigned long start_pfn, end_pfn;
-		start_pfn = memblock_region_base_pfn(reg);
-		end_pfn = memblock_region_end_pfn(reg);
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
 		add_active_range(0, start_pfn, end_pfn);
 	}
 
@@ -236,9 +236,9 @@ static int __init mark_nonram_nosave(void)
 
 	for_each_memblock(memory, reg) {
 		if (prev &&
-		    memblock_region_end_pfn(prev) < memblock_region_base_pfn(reg))
-			register_nosave_region(memblock_region_end_pfn(prev),
-					       memblock_region_base_pfn(reg));
+		    memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg))
+			register_nosave_region(memblock_region_memory_end_pfn(prev),
+					       memblock_region_memory_base_pfn(reg));
 		prev = reg;
 	}
 	return 0;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 066fb443ba5a..74505b245374 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -811,8 +811,8 @@ static void __init setup_nonnuma(void)
 	       (top_of_ram - total_ram) >> 20);
 
 	for_each_memblock(memory, reg) {
-		start_pfn = memblock_region_base_pfn(reg);
-		end_pfn = memblock_region_end_pfn(reg);
+		start_pfn = memblock_region_memory_base_pfn(reg);
+		end_pfn = memblock_region_memory_end_pfn(reg);
 
 		fake_numa_create_new_node(end_pfn, &nid);
 		add_active_range(nid, start_pfn, end_pfn);
-- 
cgit v1.2.3


From f3016fa591c788d6d545ef7907e24c8b5d788759 Mon Sep 17 00:00:00 2001
From: Mingkai Hu <Mingkai.hu@freescale.com>
Date: Tue, 12 Oct 2010 18:18:33 +0800
Subject: powerpc/of: add eSPI controller dts bindings and DTS modification

Also modifiy the document of cell-index in SPI controller. Add the
SPI flash(s25fl128p01) support on p4080ds and mpc8536ds board.

Signed-off-by: Mingkai Hu <Mingkai.hu@freescale.com>
Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
---
 arch/powerpc/boot/dts/mpc8536ds.dts | 52 +++++++++++++++++++++++++++++++++++++
 arch/powerpc/boot/dts/p4080ds.dts   | 11 +++-----
 2 files changed, 56 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/mpc8536ds.dts b/arch/powerpc/boot/dts/mpc8536ds.dts
index 815cebb2e3e5..a75c10eed269 100644
--- a/arch/powerpc/boot/dts/mpc8536ds.dts
+++ b/arch/powerpc/boot/dts/mpc8536ds.dts
@@ -108,6 +108,58 @@
 			};
 		};
 
+		spi@7000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl,mpc8536-espi";
+			reg = <0x7000 0x1000>;
+			interrupts = <59 0x2>;
+			interrupt-parent = <&mpic>;
+			fsl,espi-num-chipselects = <4>;
+
+			flash@0 {
+				#address-cells = <1>;
+				#size-cells = <1>;
+				compatible = "spansion,s25sl12801";
+				reg = <0>;
+				spi-max-frequency = <40000000>;
+				partition@u-boot {
+					label = "u-boot";
+					reg = <0x00000000 0x00100000>;
+					read-only;
+				};
+				partition@kernel {
+					label = "kernel";
+					reg = <0x00100000 0x00500000>;
+					read-only;
+				};
+				partition@dtb {
+					label = "dtb";
+					reg = <0x00600000 0x00100000>;
+					read-only;
+				};
+				partition@fs {
+					label = "file system";
+					reg = <0x00700000 0x00900000>;
+				};
+			};
+			flash@1 {
+				compatible = "spansion,s25sl12801";
+				reg = <1>;
+				spi-max-frequency = <40000000>;
+			};
+			flash@2 {
+				compatible = "spansion,s25sl12801";
+				reg = <2>;
+				spi-max-frequency = <40000000>;
+			};
+			flash@3 {
+				compatible = "spansion,s25sl12801";
+				reg = <3>;
+				spi-max-frequency = <40000000>;
+			};
+		};
+
 		dma@21300 {
 			#address-cells = <1>;
 			#size-cells = <1>;
diff --git a/arch/powerpc/boot/dts/p4080ds.dts b/arch/powerpc/boot/dts/p4080ds.dts
index 2f0de24e3822..5b7fc29dd6cf 100644
--- a/arch/powerpc/boot/dts/p4080ds.dts
+++ b/arch/powerpc/boot/dts/p4080ds.dts
@@ -236,22 +236,19 @@
 		};
 
 		spi@110000 {
-			cell-index = <0>;
 			#address-cells = <1>;
 			#size-cells = <0>;
-			compatible = "fsl,espi";
+			compatible = "fsl,p4080-espi", "fsl,mpc8536-espi";
 			reg = <0x110000 0x1000>;
 			interrupts = <53 0x2>;
 			interrupt-parent = <&mpic>;
-			espi,num-ss-bits = <4>;
-			mode = "cpu";
+			fsl,espi-num-chipselects = <4>;
 
-			fsl_m25p80@0 {
+			flash@0 {
 				#address-cells = <1>;
 				#size-cells = <1>;
-				compatible = "fsl,espi-flash";
+				compatible = "spansion,s25sl12801";
 				reg = <0>;
-				linux,modalias = "fsl_m25p80";
 				spi-max-frequency = <40000000>; /* input clock */
 				partition@u-boot {
 					label = "u-boot";
-- 
cgit v1.2.3


From 206489748b64510d655e5c99193426667463dd15 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Fri, 10 Sep 2010 09:40:32 +0000
Subject: powerpc/pseries: Export device tree updating routines

Export routines associated with adding and removing device tree nodes on
pseries needed for device tree updating.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/dlpar.c   | 4 ++--
 arch/powerpc/platforms/pseries/pseries.h | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 72d8054fa739..75d336ab01bc 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -33,7 +33,7 @@ struct cc_workarea {
 	u32	prop_offset;
 };
 
-static void dlpar_free_cc_property(struct property *prop)
+void dlpar_free_cc_property(struct property *prop)
 {
 	kfree(prop->name);
 	kfree(prop->value);
@@ -102,7 +102,7 @@ static void dlpar_free_one_cc_node(struct device_node *dn)
 	kfree(dn);
 }
 
-static void dlpar_free_cc_nodes(struct device_node *dn)
+void dlpar_free_cc_nodes(struct device_node *dn)
 {
 	if (dn->child)
 		dlpar_free_cc_nodes(dn->child);
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 40c93cad91d2..e9f6d2859c3c 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -17,6 +17,8 @@ struct device_node;
 extern void request_event_sources_irqs(struct device_node *np,
 				       irq_handler_t handler, const char *name);
 
+#include <linux/of.h>
+
 extern void __init fw_feature_init(const char *hypertas, unsigned long len);
 
 struct pt_regs;
@@ -47,4 +49,11 @@ extern unsigned long rtas_poweron_auto;
 
 extern void find_udbg_vterm(void);
 
+/* Dynamic logical Partitioning/Mobility */
+extern void dlpar_free_cc_nodes(struct device_node *);
+extern void dlpar_free_cc_property(struct property *);
+extern struct device_node *dlpar_configure_connector(u32);
+extern int dlpar_attach_node(struct device_node *);
+extern int dlpar_detach_node(struct device_node *);
+
 #endif /* _PSERIES_PSERIES_H */
-- 
cgit v1.2.3


From d8862be1229534aac1768b8ac663e8fb2bb6ddf6 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Fri, 10 Sep 2010 09:41:35 +0000
Subject: powerpc/pseries: Export rtas_ibm_suspend_me()

Export the rtas_ibm_suspend_me() routine.  This is needed to perform
partition migration in the kernel.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/rtas.h | 1 +
 arch/powerpc/kernel/rtas.c      | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3d35f8ae377e..9a1193e30f26 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -187,6 +187,7 @@ extern void rtas_progress(char *s, unsigned short hex);
 extern void rtas_initialize(void);
 extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
+extern int rtas_ibm_suspend_me(struct rtas_args *);
 
 struct rtc_time;
 extern unsigned long rtas_get_boot_time(void);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 41048de3c6c3..dc67ea46465e 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -805,7 +805,7 @@ static void rtas_percpu_suspend_me(void *info)
 	__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
 }
 
-static int rtas_ibm_suspend_me(struct rtas_args *args)
+int rtas_ibm_suspend_me(struct rtas_args *args)
 {
 	long state;
 	long rc;
@@ -855,7 +855,7 @@ static int rtas_ibm_suspend_me(struct rtas_args *args)
 	return atomic_read(&data.error);
 }
 #else /* CONFIG_PPC_PSERIES */
-static int rtas_ibm_suspend_me(struct rtas_args *args)
+int rtas_ibm_suspend_me(struct rtas_args *args)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From 410bccf978819394669dede571de878f4576fd3e Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@austin.ibm.com>
Date: Fri, 10 Sep 2010 09:42:36 +0000
Subject: powerpc/pseries: Partition migration in the kernel

Enable partition migration in the kernel.  To do this a new sysfs file,
/sys/kernel/mobility/migration, is created.  In order to initiate a migration
the stream id (generated by the HMC managing the system) is written to this
file.

After a migration occurs, and what is the majority of this code, the device
tree needs to be updated for the new system the partition is running on. This
is done via the ibm,update-nodes and ibm,update-properties rtas calls which
return information regarding which nodes and properties of the device tree
are to be added/removed/updated.

Signed-off-by: Nathan Fontenot <nfont@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/Makefile   |   2 +-
 arch/powerpc/platforms/pseries/mobility.c | 362 ++++++++++++++++++++++++++++++
 2 files changed, 363 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/pseries/mobility.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 046ace9c4381..93541b39dd12 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -8,7 +8,7 @@ endif
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
-			   firmware.o power.o dlpar.o
+			   firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)	+= smp.o
 obj-$(CONFIG_XICS)	+= xics.o
 obj-$(CONFIG_SCANLOG)	+= scanlog.o
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
new file mode 100644
index 000000000000..3e7f651e50ac
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -0,0 +1,362 @@
+/*
+ * Support for Partition Mobility/Migration
+ *
+ * Copyright (C) 2010 Nathan Fontenot
+ * Copyright (C) 2010 IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/smp.h>
+#include <linux/completion.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+
+#include <asm/rtas.h>
+#include "pseries.h"
+
+static struct kobject *mobility_kobj;
+
+struct update_props_workarea {
+	u32 phandle;
+	u32 state;
+	u64 reserved;
+	u32 nprops;
+};
+
+#define NODE_ACTION_MASK	0xff000000
+#define NODE_COUNT_MASK		0x00ffffff
+
+#define DELETE_DT_NODE	0x01000000
+#define UPDATE_DT_NODE	0x02000000
+#define ADD_DT_NODE	0x03000000
+
+static int mobility_rtas_call(int token, char *buf)
+{
+	int rc;
+
+	spin_lock(&rtas_data_buf_lock);
+
+	memcpy(rtas_data_buf, buf, RTAS_DATA_BUF_SIZE);
+	rc = rtas_call(token, 2, 1, NULL, rtas_data_buf, 1);
+	memcpy(buf, rtas_data_buf, RTAS_DATA_BUF_SIZE);
+
+	spin_unlock(&rtas_data_buf_lock);
+	return rc;
+}
+
+static int delete_dt_node(u32 phandle)
+{
+	struct device_node *dn;
+
+	dn = of_find_node_by_phandle(phandle);
+	if (!dn)
+		return -ENOENT;
+
+	dlpar_detach_node(dn);
+	return 0;
+}
+
+static int update_dt_property(struct device_node *dn, struct property **prop,
+			      const char *name, u32 vd, char *value)
+{
+	struct property *new_prop = *prop;
+	struct property *old_prop;
+	int more = 0;
+
+	/* A negative 'vd' value indicates that only part of the new property
+	 * value is contained in the buffer and we need to call
+	 * ibm,update-properties again to get the rest of the value.
+	 *
+	 * A negative value is also the two's compliment of the actual value.
+	 */
+	if (vd & 0x80000000) {
+		vd = ~vd + 1;
+		more = 1;
+	}
+
+	if (new_prop) {
+		/* partial property fixup */
+		char *new_data = kzalloc(new_prop->length + vd, GFP_KERNEL);
+		if (!new_data)
+			return -ENOMEM;
+
+		memcpy(new_data, new_prop->value, new_prop->length);
+		memcpy(new_data + new_prop->length, value, vd);
+
+		kfree(new_prop->value);
+		new_prop->value = new_data;
+		new_prop->length += vd;
+	} else {
+		new_prop = kzalloc(sizeof(*new_prop), GFP_KERNEL);
+		if (!new_prop)
+			return -ENOMEM;
+
+		new_prop->name = kstrdup(name, GFP_KERNEL);
+		if (!new_prop->name) {
+			kfree(new_prop);
+			return -ENOMEM;
+		}
+
+		new_prop->length = vd;
+		new_prop->value = kzalloc(new_prop->length, GFP_KERNEL);
+		if (!new_prop->value) {
+			kfree(new_prop->name);
+			kfree(new_prop);
+			return -ENOMEM;
+		}
+
+		memcpy(new_prop->value, value, vd);
+		*prop = new_prop;
+	}
+
+	if (!more) {
+		old_prop = of_find_property(dn, new_prop->name, NULL);
+		if (old_prop)
+			prom_update_property(dn, new_prop, old_prop);
+		else
+			prom_add_property(dn, new_prop);
+
+		new_prop = NULL;
+	}
+
+	return 0;
+}
+
+static int update_dt_node(u32 phandle)
+{
+	struct update_props_workarea *upwa;
+	struct device_node *dn;
+	struct property *prop = NULL;
+	int i, rc;
+	char *prop_data;
+	char *rtas_buf;
+	int update_properties_token;
+
+	update_properties_token = rtas_token("ibm,update-properties");
+	if (update_properties_token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_buf)
+		return -ENOMEM;
+
+	dn = of_find_node_by_phandle(phandle);
+	if (!dn) {
+		kfree(rtas_buf);
+		return -ENOENT;
+	}
+
+	upwa = (struct update_props_workarea *)&rtas_buf[0];
+	upwa->phandle = phandle;
+
+	do {
+		rc = mobility_rtas_call(update_properties_token, rtas_buf);
+		if (rc < 0)
+			break;
+
+		prop_data = rtas_buf + sizeof(*upwa);
+
+		for (i = 0; i < upwa->nprops; i++) {
+			char *prop_name;
+			u32 vd;
+
+			prop_name = prop_data + 1;
+			prop_data += strlen(prop_name) + 1;
+			vd = *prop_data++;
+
+			switch (vd) {
+			case 0x00000000:
+				/* name only property, nothing to do */
+				break;
+
+			case 0x80000000:
+				prop = of_find_property(dn, prop_name, NULL);
+				prom_remove_property(dn, prop);
+				prop = NULL;
+				break;
+
+			default:
+				rc = update_dt_property(dn, &prop, prop_name,
+							vd, prop_data);
+				if (rc) {
+					printk(KERN_ERR "Could not update %s"
+					       " property\n", prop_name);
+				}
+
+				prop_data += vd;
+			}
+		}
+	} while (rc == 1);
+
+	of_node_put(dn);
+	kfree(rtas_buf);
+	return 0;
+}
+
+static int add_dt_node(u32 parent_phandle, u32 drc_index)
+{
+	struct device_node *dn;
+	struct device_node *parent_dn;
+	int rc;
+
+	dn = dlpar_configure_connector(drc_index);
+	if (!dn)
+		return -ENOENT;
+
+	parent_dn = of_find_node_by_phandle(parent_phandle);
+	if (!parent_dn) {
+		dlpar_free_cc_nodes(dn);
+		return -ENOENT;
+	}
+
+	dn->parent = parent_dn;
+	rc = dlpar_attach_node(dn);
+	if (rc)
+		dlpar_free_cc_nodes(dn);
+
+	of_node_put(parent_dn);
+	return rc;
+}
+
+static int pseries_devicetree_update(void)
+{
+	char *rtas_buf;
+	u32 *data;
+	int update_nodes_token;
+	int rc;
+
+	update_nodes_token = rtas_token("ibm,update-nodes");
+	if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
+		return -EINVAL;
+
+	rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
+	if (!rtas_buf)
+		return -ENOMEM;
+
+	do {
+		rc = mobility_rtas_call(update_nodes_token, rtas_buf);
+		if (rc && rc != 1)
+			break;
+
+		data = (u32 *)rtas_buf + 4;
+		while (*data & NODE_ACTION_MASK) {
+			int i;
+			u32 action = *data & NODE_ACTION_MASK;
+			int node_count = *data & NODE_COUNT_MASK;
+
+			data++;
+
+			for (i = 0; i < node_count; i++) {
+				u32 phandle = *data++;
+				u32 drc_index;
+
+				switch (action) {
+				case DELETE_DT_NODE:
+					delete_dt_node(phandle);
+					break;
+				case UPDATE_DT_NODE:
+					update_dt_node(phandle);
+					break;
+				case ADD_DT_NODE:
+					drc_index = *data++;
+					add_dt_node(phandle, drc_index);
+					break;
+				}
+			}
+		}
+	} while (rc == 1);
+
+	kfree(rtas_buf);
+	return rc;
+}
+
+void post_mobility_fixup(void)
+{
+	int rc;
+	int activate_fw_token;
+
+	rc = pseries_devicetree_update();
+	if (rc) {
+		printk(KERN_ERR "Initial post-mobility device tree update "
+		       "failed: %d\n", rc);
+		return;
+	}
+
+	activate_fw_token = rtas_token("ibm,activate-firmware");
+	if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
+		printk(KERN_ERR "Could not make post-mobility "
+		       "activate-fw call.\n");
+		return;
+	}
+
+	rc = rtas_call(activate_fw_token, 0, 1, NULL);
+	if (!rc) {
+		rc = pseries_devicetree_update();
+		if (rc)
+			printk(KERN_ERR "Secondary post-mobility device tree "
+			       "update failed: %d\n", rc);
+	} else {
+		printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
+		return;
+	}
+
+	return;
+}
+
+static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct rtas_args args;
+	u64 streamid;
+	int rc;
+
+	rc = strict_strtoull(buf, 0, &streamid);
+	if (rc)
+		return rc;
+
+	memset(&args, 0, sizeof(args));
+	args.token = rtas_token("ibm,suspend-me");
+	args.nargs = 2;
+	args.nret = 1;
+
+	args.args[0] = streamid >> 32 ;
+	args.args[1] = streamid & 0xffffffff;
+	args.rets = &args.args[args.nargs];
+
+	do {
+		args.rets[0] = 0;
+		rc = rtas_ibm_suspend_me(&args);
+		if (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE)
+			ssleep(1);
+	} while (!rc && args.rets[0] == RTAS_NOT_SUSPENDABLE);
+
+	if (rc)
+		return rc;
+	else if (args.rets[0])
+		return args.rets[0];
+
+	post_mobility_fixup();
+	return count;
+}
+
+static CLASS_ATTR(migration, S_IWUSR, NULL, migrate_store);
+
+static int __init mobility_sysfs_init(void)
+{
+	int rc;
+
+	mobility_kobj = kobject_create_and_add("mobility", kernel_kobj);
+	if (!mobility_kobj)
+		return -ENOMEM;
+
+	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
+
+	return rc;
+}
+device_initcall(mobility_sysfs_init);
-- 
cgit v1.2.3


From 4e74fd7d0a6eda70f9356c113450182a844abcf1 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 13 Sep 2010 09:47:40 +0000
Subject: powerpc: Use static const char arrays

Signed-off-by: Joe Perches <joe@perches.com>
Reviewed-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/boot/addnote.c | 4 ++--
 arch/powerpc/kernel/irq.c   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index b1e5611b2ab1..349b5530d2c4 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -20,7 +20,7 @@
 #include <string.h>
 
 /* CHRP note section */
-char arch[] = "PowerPC";
+static const char arch[] = "PowerPC";
 
 #define N_DESCR	6
 unsigned int descr[N_DESCR] = {
@@ -33,7 +33,7 @@ unsigned int descr[N_DESCR] = {
 };
 
 /* RPA note section */
-char rpaname[] = "IBM,RPA-Client-Config";
+static const char rpaname[] = "IBM,RPA-Client-Config";
 
 /*
  * Note: setting ignore_my_client_config *should* mean that OF ignores
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4a65386995d7..47fbc56e9e1c 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -1143,7 +1143,7 @@ static int virq_debug_show(struct seq_file *m, void *private)
 	unsigned long flags;
 	struct irq_desc *desc;
 	const char *p;
-	char none[] = "none";
+	static const char none[] = "none";
 	int i;
 
 	seq_printf(m, "%-5s  %-7s  %-15s  %s\n", "virq", "hwirq",
-- 
cgit v1.2.3


From 689fd14ae9b2af5c6862ddc11d4791ec9a938cb3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Sat, 11 Sep 2010 19:10:53 +0000
Subject: powerpc: Remove pr_<level> uses of KERN_<level>

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kvm/emulate.c | 4 ++--
 arch/powerpc/sysdev/pmi.c  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4568ec386c2a..b83ba581fd8e 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -145,7 +145,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
-	pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
+	pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
 
 	switch (get_op(inst)) {
 	case OP_TRAP:
@@ -275,7 +275,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			{
 				u64 jd = get_tb() - vcpu->arch.dec_jiffies;
 				kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
-				pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
+				pr_debug("mfDEC: %x - %llx = %lx\n",
 					 vcpu->arch.dec, jd,
 					 kvmppc_get_gpr(vcpu, rt));
 				break;
diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c
index 24a0bb955b18..4260f368db52 100644
--- a/arch/powerpc/sysdev/pmi.c
+++ b/arch/powerpc/sysdev/pmi.c
@@ -114,7 +114,7 @@ static void pmi_notify_handlers(struct work_struct *work)
 
 	spin_lock(&data->handler_spinlock);
 	list_for_each_entry(handler, &data->handler, node) {
-		pr_debug(KERN_INFO "pmi: notifying handler %p\n", handler);
+		pr_debug("pmi: notifying handler %p\n", handler);
 		if (handler->type == data->msg.type)
 			handler->handle_pmi_message(data->msg);
 	}
-- 
cgit v1.2.3


From a655237fa2f9e4afe9949abe2c511432ab9537dd Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 4 Sep 2010 00:12:44 +0000
Subject: powerpc/irq.c: Add of_node_put to avoid memory leak

In this case, a device_node structure is stored in another structure that
is then freed without first decrementing the reference count of the
device_node structure.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
expression x;
identifier f;
position p1,p2;
@@

x@p1->f = \(of_find_node_by_path\|of_find_node_by_name\|of_find_node_by_phandle\|of_get_parent\|of_get_next_parent\|of_get_next_child\|of_find_compatible_node\|of_match_node\|of_find_node_by_type\|of_find_node_with_property\|of_find_matching_node\|of_parse_phandle\|of_node_get\)(...);
... when != of_node_put(x)
kfree@p2(x)

@script:python@
p1 << r.p1;
p2 << r.p2;
@@
cocci.print_main("call",p1)
cocci.print_secs("free",p2)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/irq.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 47fbc56e9e1c..4002b48fd607 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -587,8 +587,10 @@ struct irq_host *irq_alloc_host(struct device_node *of_node,
 			 * this will be fixed once slab is made available early
 			 * instead of the current cruft
 			 */
-			if (mem_init_done)
+			if (mem_init_done) {
+				of_node_put(host->of_node);
 				kfree(host);
+			}
 			return NULL;
 		}
 		irq_map[0].host = host;
-- 
cgit v1.2.3


From dda804ad4023cc202466c46fcfcc163131953838 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:13:19 +0000
Subject: powerpc/pci: Fix return type of BUID_{HI,LO} macros

BUID_HI and BUID_LO are used to pass data to call_rtas, which expects
ints or u32s. But the macro doesn't cast the return, so the result is
still u64. Use the upper_32_bits and lower_32_bits macros that have been
added to kernel.h.

Found by getting printf format errors trying to debug print the args, no
actual code change for 64 bit kernels where the macros are actually
used.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: Linas Vepstas <linasvepstas@gmail.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/include/asm/ppc-pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index 42fdff0e4b32..43268f15004e 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -28,8 +28,8 @@ extern void find_and_init_phbs(void);
 extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
 
 /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
-#define BUID_HI(buid) ((buid) >> 32)
-#define BUID_LO(buid) ((buid) & 0xffffffff)
+#define BUID_HI(buid) upper_32_bits(buid)
+#define BUID_LO(buid) lower_32_bits(buid)
 
 /* PCI device_node operations */
 struct device_node;
-- 
cgit v1.2.3


From 1cb8e85a9d9da4192acfb5f70a80b0c5ce8c3e8f Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:05:45 +0000
Subject: powerpc/dma: Fix dma_iommu_dma_supported compare

The table offset is in entries, each of which imply a dma address of
an IOMMU page.

Also, we should check the device can reach the whole IOMMU table.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/dma-iommu.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index 37771a518119..6e54a0fd31aa 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -74,16 +74,17 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask)
 {
 	struct iommu_table *tbl = get_iommu_table_base(dev);
 
-	if (!tbl || tbl->it_offset > mask) {
-		printk(KERN_INFO
-		       "Warning: IOMMU offset too big for device mask\n");
-		if (tbl)
-			printk(KERN_INFO
-			       "mask: 0x%08llx, table offset: 0x%08lx\n",
-				mask, tbl->it_offset);
-		else
-			printk(KERN_INFO "mask: 0x%08llx, table unavailable\n",
-				mask);
+	if (!tbl) {
+		dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
+			", table unavailable\n", mask);
+		return 0;
+	}
+
+	if ((tbl->it_offset + tbl->it_size) > (mask >> IOMMU_PAGE_SHIFT)) {
+		dev_info(dev, "Warning: IOMMU window too big for device mask\n");
+		dev_info(dev, "mask: 0x%08llx, table end: 0x%08lx\n",
+				mask, (tbl->it_offset + tbl->it_size) <<
+				IOMMU_PAGE_SHIFT);
 		return 0;
 	} else
 		return 1;
-- 
cgit v1.2.3


From ffa56e555a6e4c205e879636e6cd6104ce03421f Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:05:46 +0000
Subject: powerpc/dma: Fix check for direct DMA support

The current check is wrong because it does not take the DMA offset intot
account, and in the case of a driver which doesn't actually support
64bits would falsely report that device as working.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index f368c075c90b..cf02cad62d9a 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -90,7 +90,7 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask)
 	/* Could be improved so platforms can set the limit in case
 	 * they have limited DMA windows
 	 */
-	return mask >= (memblock_end_of_DRAM() - 1);
+	return mask >= get_dma_offset(dev) + (memblock_end_of_DRAM() - 1);
 #else
 	return 1;
 #endif
-- 
cgit v1.2.3


From edea8f6f48416d9a6fd1babb76c19cf05c802325 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:05:47 +0000
Subject: powerpc/vio: Use put_device() on device_register failure

The kernel doc for device_register (and device_initialize) very clearly
state to call put_device not kfree after calling, even on error.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index fa3469ddaef8..72db4b021762 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1254,8 +1254,7 @@ struct vio_dev *vio_register_device_node(struct device_node *of_node)
 	if (device_register(&viodev->dev)) {
 		printk(KERN_ERR "%s: failed to register device %s\n",
 				__func__, dev_name(&viodev->dev));
-		/* XXX free TCE table */
-		kfree(viodev);
+		put_device(&viodev->dev);
 		return NULL;
 	}
 
-- 
cgit v1.2.3


From 45848e0fc1fce399651b3f480bdeb82cc6d3d15a Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:05:48 +0000
Subject: powerpc/viobus: Free TCE table on device release

Release the TCE table as the XXX suggests, except on FW_FEATURE_ISERIES,
where the tables are allocated globally and reused.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vio.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 72db4b021762..d692989a4318 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -1184,7 +1184,12 @@ EXPORT_SYMBOL(vio_unregister_driver);
 /* vio_dev refcount hit 0 */
 static void __devinit vio_dev_release(struct device *dev)
 {
-	/* XXX should free TCE table */
+	struct iommu_table *tbl = get_iommu_table_base(dev);
+
+	/* iSeries uses a common table for all vio devices */
+	if (!firmware_has_feature(FW_FEATURE_ISERIES) && tbl)
+		iommu_free_table(tbl, dev->of_node ?
+			dev->of_node->full_name : dev_name(dev));
 	of_node_put(dev->of_node);
 	kfree(to_vio_dev(dev));
 }
-- 
cgit v1.2.3


From e72ed6b509a62605fe9aca195c6037abdda6c1ac Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:05:49 +0000
Subject: powerpc/pseries: Use kmemdup

While looking at some code paths I came across this code that zeros
memory then copies over the entire length.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/dlpar.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 75d336ab01bc..b74a9230edc9 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -55,13 +55,12 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
 
 	prop->length = ccwa->prop_length;
 	value = (char *)ccwa + ccwa->prop_offset;
-	prop->value = kzalloc(prop->length, GFP_KERNEL);
+	prop->value = kmemdup(value, prop->length, GFP_KERNEL);
 	if (!prop->value) {
 		dlpar_free_cc_property(prop);
 		return NULL;
 	}
 
-	memcpy(prop->value, value, prop->length);
 	return prop;
 }
 
-- 
cgit v1.2.3


From bc0df9ec4c014dac85c0358f56be4223bf0f3334 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Wed, 15 Sep 2010 08:05:50 +0000
Subject: powerpc/pci: Cleanup device dma setup code

Use set_dma_ops and remove unused oddly-named temp pointer sd.

Signed-off-by: Milton Miller <miltonm@bga.com>
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/pci-common.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index 9021c4ad4bbd..10a44e68ef11 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1090,8 +1090,6 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 		 bus->number, bus->self ? pci_name(bus->self) : "PHB");
 
 	list_for_each_entry(dev, &bus->devices, bus_list) {
-		struct dev_archdata *sd = &dev->dev.archdata;
-
 		/* Cardbus can call us to add new devices to a bus, so ignore
 		 * those who are already fully discovered
 		 */
@@ -1107,7 +1105,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 		set_dev_node(&dev->dev, pcibus_to_node(dev->bus));
 
 		/* Hook up default DMA ops */
-		sd->dma_ops = pci_dma_ops;
+		set_dma_ops(&dev->dev, pci_dma_ops);
 		set_dma_offset(&dev->dev, PCI_DRAM_OFFSET);
 
 		/* Additional platform DMA/iommu setup */
-- 
cgit v1.2.3


From f56029b5eafda6175be4e5c91ca69c04ccda3661 Mon Sep 17 00:00:00 2001
From: Nishanth Aravamudan <nacc@us.ibm.com>
Date: Fri, 1 Oct 2010 11:26:18 +0000
Subject: powerpc/pseries/xics: Use cpu_possible_mask rather than cpu_all_mask

Current firmware only allows us to send IRQs to the first processor or
all processors. We currently check to see if the passed in mask is equal
to the all_mask, but the firmware is only considering whether the
request is for the equivalent of the possible_mask. Thus, we think the
request is for some subset of CPUs and only assign IRQs to the first CPU
(on systems without irqbalance running) as evidenced by
/proc/interrupts. By using possible_mask instead, we account for this
and proper interleaving of interrupts occurs.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/pseries/xics.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index 93834b0d8272..7c1e3426888b 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -178,7 +178,7 @@ static int get_irq_server(unsigned int virq, const struct cpumask *cpumask,
 	if (!distribute_irqs)
 		return default_server;
 
-	if (!cpumask_equal(cpumask, cpu_all_mask)) {
+	if (!cpumask_subset(cpu_possible_mask, cpumask)) {
 		int server = cpumask_first_and(cpu_online_mask, cpumask);
 
 		if (server < nr_cpu_ids)
-- 
cgit v1.2.3


From fc15351d9d63a35fd00c15850fa93a27940f16a0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Sep 2010 10:22:33 +0000
Subject: powerpc/spufs: Use llseek in all file operations

The default for llseek is changing, so we need
explicit operations everywhere.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Jeremy Kerr <jk@ozlabs.org>
Cc: linuxppc-dev@ozlabs.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/platforms/cell/spufs/file.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 1a40da92154c..02f7b113a31b 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -154,6 +154,7 @@ static const struct file_operations __fops = {				\
 	.release = spufs_attr_release,					\
 	.read	 = spufs_attr_read,					\
 	.write	 = spufs_attr_write,					\
+	.llseek  = generic_file_llseek,					\
 };
 
 
@@ -521,6 +522,7 @@ static const struct file_operations spufs_cntl_fops = {
 	.release = spufs_cntl_release,
 	.read = simple_attr_read,
 	.write = simple_attr_write,
+	.llseek	= generic_file_llseek,
 	.mmap = spufs_cntl_mmap,
 };
 
@@ -714,6 +716,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_mbox_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_read,
+	.llseek	= no_llseek,
 };
 
 static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
@@ -743,6 +746,7 @@ static ssize_t spufs_mbox_stat_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_mbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_mbox_stat_read,
+	.llseek = no_llseek,
 };
 
 /* low-level ibox access function */
@@ -863,6 +867,7 @@ static const struct file_operations spufs_ibox_fops = {
 	.read	= spufs_ibox_read,
 	.poll	= spufs_ibox_poll,
 	.fasync	= spufs_ibox_fasync,
+	.llseek = no_llseek,
 };
 
 static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
@@ -890,6 +895,7 @@ static ssize_t spufs_ibox_stat_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_ibox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_ibox_stat_read,
+	.llseek = no_llseek,
 };
 
 /* low-level mailbox write */
@@ -1011,6 +1017,7 @@ static const struct file_operations spufs_wbox_fops = {
 	.write	= spufs_wbox_write,
 	.poll	= spufs_wbox_poll,
 	.fasync	= spufs_wbox_fasync,
+	.llseek = no_llseek,
 };
 
 static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
@@ -1038,6 +1045,7 @@ static ssize_t spufs_wbox_stat_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_wbox_stat_fops = {
 	.open	= spufs_pipe_open,
 	.read	= spufs_wbox_stat_read,
+	.llseek = no_llseek,
 };
 
 static int spufs_signal1_open(struct inode *inode, struct file *file)
@@ -1166,6 +1174,7 @@ static const struct file_operations spufs_signal1_fops = {
 	.read = spufs_signal1_read,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
 };
 
 static const struct file_operations spufs_signal1_nosched_fops = {
@@ -1173,6 +1182,7 @@ static const struct file_operations spufs_signal1_nosched_fops = {
 	.release = spufs_signal1_release,
 	.write = spufs_signal1_write,
 	.mmap = spufs_signal1_mmap,
+	.llseek = no_llseek,
 };
 
 static int spufs_signal2_open(struct inode *inode, struct file *file)
@@ -1305,6 +1315,7 @@ static const struct file_operations spufs_signal2_fops = {
 	.read = spufs_signal2_read,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
 };
 
 static const struct file_operations spufs_signal2_nosched_fops = {
@@ -1312,6 +1323,7 @@ static const struct file_operations spufs_signal2_nosched_fops = {
 	.release = spufs_signal2_release,
 	.write = spufs_signal2_write,
 	.mmap = spufs_signal2_mmap,
+	.llseek = no_llseek,
 };
 
 /*
@@ -1451,6 +1463,7 @@ static const struct file_operations spufs_mss_fops = {
 	.open	 = spufs_mss_open,
 	.release = spufs_mss_release,
 	.mmap	 = spufs_mss_mmap,
+	.llseek  = no_llseek,
 };
 
 static int
@@ -1508,6 +1521,7 @@ static const struct file_operations spufs_psmap_fops = {
 	.open	 = spufs_psmap_open,
 	.release = spufs_psmap_release,
 	.mmap	 = spufs_psmap_mmap,
+	.llseek  = no_llseek,
 };
 
 
@@ -1871,6 +1885,7 @@ static const struct file_operations spufs_mfc_fops = {
 	.fsync	 = spufs_mfc_fsync,
 	.fasync	 = spufs_mfc_fasync,
 	.mmap	 = spufs_mfc_mmap,
+	.llseek  = no_llseek,
 };
 
 static int spufs_npc_set(void *data, u64 val)
@@ -2246,6 +2261,7 @@ static ssize_t spufs_dma_info_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_dma_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_dma_info_read,
+	.llseek = no_llseek,
 };
 
 static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx,
@@ -2299,6 +2315,7 @@ static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf,
 static const struct file_operations spufs_proxydma_info_fops = {
 	.open = spufs_info_open,
 	.read = spufs_proxydma_info_read,
+	.llseek = no_llseek,
 };
 
 static int spufs_show_tid(struct seq_file *s, void *private)
@@ -2585,6 +2602,7 @@ static const struct file_operations spufs_switch_log_fops = {
 	.read		= spufs_switch_log_read,
 	.poll		= spufs_switch_log_poll,
 	.release	= spufs_switch_log_release,
+	.llseek		= no_llseek,
 };
 
 /**
-- 
cgit v1.2.3


From 4108d9ba9091c55cfb968d42dd7dcae9a098b876 Mon Sep 17 00:00:00 2001
From: matt mooney <mfm@muteddisk.com>
Date: Wed, 22 Sep 2010 20:51:09 +0000
Subject: powerpc/Makefiles: Change to new flag variables

Replace EXTRA_CFLAGS with ccflags-y and EXTRA_AFLAGS with asflags-y.

Signed-off-by: matt mooney <mfm@muteddisk.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/powerpc/kernel/vdso32/Makefile     |  6 +++---
 arch/powerpc/kernel/vdso64/Makefile     |  6 +++---
 arch/powerpc/kvm/Makefile               |  2 +-
 arch/powerpc/lib/Makefile               |  4 +---
 arch/powerpc/math-emu/Makefile          |  2 +-
 arch/powerpc/mm/Makefile                |  4 +---
 arch/powerpc/oprofile/Makefile          |  4 +---
 arch/powerpc/platforms/iseries/Makefile |  2 +-
 arch/powerpc/platforms/pseries/Makefile | 11 +++--------
 arch/powerpc/sysdev/Makefile            |  4 +---
 arch/powerpc/xmon/Makefile              |  4 +---
 11 files changed, 17 insertions(+), 32 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index 51ead52141bd..9a7946c41738 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -14,10 +14,10 @@ obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32))
 
 GCOV_PROFILE := n
 
-EXTRA_CFLAGS := -shared -fno-common -fno-builtin
-EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-EXTRA_AFLAGS := -D__VDSO32__ -s
+asflags-y := -D__VDSO32__ -s
 
 obj-y += vdso32_wrapper.o
 extra-y += vdso32.lds
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 79da65d44a2a..8c500d8622e4 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -9,10 +9,10 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
 
 GCOV_PROFILE := n
 
-EXTRA_CFLAGS := -shared -fno-common -fno-builtin
-EXTRA_CFLAGS += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
+ccflags-y := -shared -fno-common -fno-builtin
+ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
 		$(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
-EXTRA_AFLAGS := -D__VDSO64__ -s
+asflags-y := -D__VDSO64__ -s
 
 obj-y += vdso64_wrapper.o
 extra-y += vdso64.lds
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index d45c818a384c..4d6863823f69 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -4,7 +4,7 @@
 
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm
+ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
 
 common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ad4a36848f25..889f2bc106dd 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -4,9 +4,7 @@
 
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS		+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
 
 CFLAGS_REMOVE_code-patching.o = -pg
 CFLAGS_REMOVE_feature-fixups.o = -pg
diff --git a/arch/powerpc/math-emu/Makefile b/arch/powerpc/math-emu/Makefile
index 0c16ab947f1f..7d1dba0d57f9 100644
--- a/arch/powerpc/math-emu/Makefile
+++ b/arch/powerpc/math-emu/Makefile
@@ -15,4 +15,4 @@ obj-$(CONFIG_SPE)		+= math_efp.o
 CFLAGS_fabs.o = -fno-builtin-fabs
 CFLAGS_math.o = -fno-builtin-fabs
 
-EXTRA_CFLAGS = -I. -Iinclude/math-emu -w
+ccflags-y = -I. -Iinclude/math-emu -w
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index ce68708bbad5..53102f306880 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -4,9 +4,7 @@
 
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS	+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
 
 obj-y				:= fault.o mem.o pgtable.o gup.o \
 				   init_$(CONFIG_WORD_SIZE).o \
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index e219ca43962d..73456c4cec28 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -1,8 +1,6 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS	+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)	:= -mno-minimal-toc
 
 obj-$(CONFIG_OPROFILE) += oprofile.o
 
diff --git a/arch/powerpc/platforms/iseries/Makefile b/arch/powerpc/platforms/iseries/Makefile
index ce014928d460..a7602b11ed9d 100644
--- a/arch/powerpc/platforms/iseries/Makefile
+++ b/arch/powerpc/platforms/iseries/Makefile
@@ -1,4 +1,4 @@
-EXTRA_CFLAGS	+= -mno-minimal-toc
+ccflags-y	:= -mno-minimal-toc
 
 obj-y += exception.o
 obj-y += hvlog.o hvlpconfig.o lpardata.o setup.o dt.o mf.o lpevents.o \
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 93541b39dd12..59eb8bdaa79d 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -1,10 +1,5 @@
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS		+= -mno-minimal-toc
-endif
-
-ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y)
-EXTRA_CFLAGS		+= -DDEBUG
-endif
+ccflags-$(CONFIG_PPC64)			:= -mno-minimal-toc
+ccflags-$(CONFIG_PPC_PSERIES_DEBUG)	+= -DDEBUG
 
 obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
 			   setup.o iommu.o event_sources.o ras.o \
@@ -23,7 +18,7 @@ obj-$(CONFIG_MEMORY_HOTPLUG)	+= hotplug-memory.o
 obj-$(CONFIG_HVC_CONSOLE)	+= hvconsole.o
 obj-$(CONFIG_HVCS)		+= hvcserver.o
 obj-$(CONFIG_HCALL_STATS)	+= hvCall_inst.o
-obj-$(CONFIG_PHYP_DUMP)	+= phyp_dump.o
+obj-$(CONFIG_PHYP_DUMP)		+= phyp_dump.o
 obj-$(CONFIG_CMM)		+= cmm.o
 obj-$(CONFIG_DTL)		+= dtl.o
 
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 5642924fb9fb..c20ad6de33ee 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -1,8 +1,6 @@
 subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
-ifeq ($(CONFIG_PPC64),y)
-EXTRA_CFLAGS			+= -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64)		:= -mno-minimal-toc
 
 mpic-msi-obj-$(CONFIG_PCI_MSI)	+= mpic_msi.o mpic_u3msi.o mpic_pasemi_msi.o
 obj-$(CONFIG_MPIC)		+= mpic.o $(mpic-msi-obj-y)
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index faa81b6a6612..c168c54e3c40 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -4,9 +4,7 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 GCOV_PROFILE := n
 
-ifdef CONFIG_PPC64
-EXTRA_CFLAGS += -mno-minimal-toc
-endif
+ccflags-$(CONFIG_PPC64) := -mno-minimal-toc
 
 obj-y			+= xmon.o start.o nonstdio.o
 
-- 
cgit v1.2.3


From 6edc323db720c65b9e6a770b4bed98f251dd49f0 Mon Sep 17 00:00:00 2001
From: Tirumala Marri <tmarri@apm.com>
Date: Mon, 13 Sep 2010 13:26:11 +0000
Subject: powerpc/44x: Add support for the AMCC APM821xx SoC

This patch adds CPU, device tree, defconfig and bluestone board
support for APM821xx SoC.

Signed-off-by: Tirumala R Marri <tmarri@apm.com>
Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/boot/dts/bluestone.dts          | 254 +++++++++++++++++++++++++++
 arch/powerpc/configs/44x/bluestone_defconfig |  68 +++++++
 arch/powerpc/kernel/cpu_setup_44x.S          |   1 +
 arch/powerpc/kernel/cputable.c               |  15 ++
 arch/powerpc/platforms/44x/Kconfig           |  16 ++
 arch/powerpc/platforms/44x/ppc44x_simple.c   |   1 +
 6 files changed, 355 insertions(+)
 create mode 100644 arch/powerpc/boot/dts/bluestone.dts
 create mode 100644 arch/powerpc/configs/44x/bluestone_defconfig

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
new file mode 100644
index 000000000000..9bb3d72c0e5a
--- /dev/null
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -0,0 +1,254 @@
+/*
+ * Device Tree for Bluestone (APM821xx) board.
+ *
+ * Copyright (c) 2010, Applied Micro Circuits Corporation
+ * Author: Tirumala R Marri <tmarri@apm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ */
+
+/dts-v1/;
+
+/ {
+	#address-cells = <2>;
+	#size-cells = <1>;
+	model = "apm,bluestone";
+	compatible = "apm,bluestone";
+	dcr-parent = <&{/cpus/cpu@0}>;
+
+	aliases {
+		ethernet0 = &EMAC0;
+		serial0 = &UART0;
+		serial1 = &UART1;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu@0 {
+			device_type = "cpu";
+			model = "PowerPC,apm821xx";
+			reg = <0x00000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+			timebase-frequency = <0>; /* Filled in by U-Boot */
+			i-cache-line-size = <32>;
+			d-cache-line-size = <32>;
+			i-cache-size = <32768>;
+			d-cache-size = <32768>;
+			dcr-controller;
+			dcr-access-method = "native";
+			next-level-cache = <&L2C0>;
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x00000000 0x00000000>; /* Filled in by U-Boot */
+	};
+
+	UIC0: interrupt-controller0 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <0>;
+		dcr-reg = <0x0c0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+	};
+
+	UIC1: interrupt-controller1 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <1>;
+		dcr-reg = <0x0d0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x1e 0x4 0x1f 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC2: interrupt-controller2 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <2>;
+		dcr-reg = <0x0e0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0xa 0x4 0xb 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	UIC3: interrupt-controller3 {
+		compatible = "ibm,uic";
+		interrupt-controller;
+		cell-index = <3>;
+		dcr-reg = <0x0f0 0x009>;
+		#address-cells = <0>;
+		#size-cells = <0>;
+		#interrupt-cells = <2>;
+		interrupts = <0x10 0x4 0x11 0x4>; /* cascade */
+		interrupt-parent = <&UIC0>;
+	};
+
+	SDR0: sdr {
+		compatible = "ibm,sdr-apm821xx";
+		dcr-reg = <0x00e 0x002>;
+	};
+
+	CPR0: cpr {
+		compatible = "ibm,cpr-apm821xx";
+		dcr-reg = <0x00c 0x002>;
+	};
+
+	plb {
+		compatible = "ibm,plb4";
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges;
+		clock-frequency = <0>; /* Filled in by U-Boot */
+
+		SDRAM0: sdram {
+			compatible = "ibm,sdram-apm821xx";
+			dcr-reg = <0x010 0x002>;
+		};
+
+		MAL0: mcmal {
+			compatible = "ibm,mcmal2";
+			descriptor-memory = "ocm";
+			dcr-reg = <0x180 0x062>;
+			num-tx-chans = <1>;
+			num-rx-chans = <1>;
+			#address-cells = <0>;
+			#size-cells = <0>;
+			interrupt-parent = <&UIC2>;
+			interrupts = <	/*TXEOB*/ 0x6 0x4
+					/*RXEOB*/ 0x7 0x4
+					/*SERR*/  0x3 0x4
+					/*TXDE*/  0x4 0x4
+					/*RXDE*/  0x5 0x4
+		};
+
+		POB0: opb {
+			compatible = "ibm,opb";
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0xb0000000 0x00000004 0xb0000000 0x50000000>;
+			clock-frequency = <0>; /* Filled in by U-Boot */
+
+			EBC0: ebc {
+				compatible = "ibm,ebc";
+				dcr-reg = <0x012 0x002>;
+				#address-cells = <2>;
+				#size-cells = <1>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				/* ranges property is supplied by U-Boot */
+				ranges = < 0x00000003 0x00000000 0xe0000000 0x8000000>;
+				interrupts = <0x6 0x4>;
+				interrupt-parent = <&UIC1>;
+
+				nor_flash@0,0 {
+					compatible = "amd,s29gl512n", "cfi-flash";
+					bank-width = <2>;
+					reg = <0x00000000 0x00000000 0x00400000>;
+					#address-cells = <1>;
+					#size-cells = <1>;
+					partition@0 {
+						label = "kernel";
+						reg = <0x00000000 0x00180000>;
+					};
+					partition@180000 {
+						label = "env";
+						reg = <0x00180000 0x00020000>;
+					};
+					partition@1a0000 {
+						label = "u-boot";
+						reg = <0x001a0000 0x00060000>;
+					};
+				};
+			}
+
+			UART0: serial@ef600300 {
+				device_type = "serial";
+				compatible = "ns16550";
+				reg = <0xef600300 0x00000008>;
+				virtual-reg = <0xef600300>;
+				clock-frequency = <0>; /* Filled in by U-Boot */
+				current-speed = <0>; /* Filled in by U-Boot */
+				interrupt-parent = <&UIC1>;
+				interrupts = <0x1 0x4>;
+			};
+
+			IIC0: i2c@ef600700 {
+				compatible = "ibm,iic";
+				reg = <0xef600700 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x2 0x4>;
+			};
+
+			IIC1: i2c@ef600800 {
+				compatible = "ibm,iic";
+				reg = <0xef600800 0x00000014>;
+				interrupt-parent = <&UIC0>;
+				interrupts = <0x3 0x4>;
+			};
+
+			RGMII0: emac-rgmii@ef601500 {
+				compatible = "ibm,rgmii";
+				reg = <0xef601500 0x00000008>;
+				has-mdio;
+			};
+
+			TAH0: emac-tah@ef601350 {
+				compatible = "ibm,tah";
+				reg = <0xef601350 0x00000030>;
+			};
+
+			EMAC0: ethernet@ef600c00 {
+				device_type = "network";
+				compatible = "ibm,emac4sync";
+				interrupt-parent = <&EMAC0>;
+				interrupts = <0x0 0x1>;
+				#interrupt-cells = <1>;
+				#address-cells = <0>;
+				#size-cells = <0>;
+				interrupt-map = </*Status*/ 0x0 &UIC2 0x10 0x4
+						 /*Wake*/   0x1 &UIC2 0x14 0x4>;
+				reg = <0xef600c00 0x000000c4>;
+				local-mac-address = [000000000000]; /* Filled in by U-Boot */
+				mal-device = <&MAL0>;
+				mal-tx-channel = <0>;
+				mal-rx-channel = <0>;
+				cell-index = <0>;
+				max-frame-size = <9000>;
+				rx-fifo-size = <16384>;
+				tx-fifo-size = <2048>;
+				phy-mode = "rgmii";
+				phy-map = <0x00000000>;
+				rgmii-device = <&RGMII0>;
+				rgmii-channel = <0>;
+				tah-device = <&TAH0>;
+				tah-channel = <0>;
+				has-inverted-stacr-oc;
+				has-new-stacr-staopc;
+			};
+		};
+
+	};
+};
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
new file mode 100644
index 000000000000..ac65b48b8ccd
--- /dev/null
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -0,0 +1,68 @@
+CONFIG_44x=y
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_PCI_QUIRKS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_BLUESTONE=y
+# CONFIG_EBONY is not set
+# CONFIG_KVM_GUEST is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE=""
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_IP_PNP_BOOTP=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_CONNECTOR=y
+CONFIG_MTD=y
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_OF_PARTS=y
+CONFIG_MTD_CHAR=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=35000
+CONFIG_NETDEVICES=y
+CONFIG_NET_ETHERNET=y
+CONFIG_IBM_NEW_EMAC=y
+CONFIG_IBM_NEW_EMAC_RXB=256
+CONFIG_IBM_NEW_EMAC_TXB=256
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=2
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+CONFIG_I2C_IBM_IIC=y
+CONFIG_SENSORS_AD7414=y
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_M41T80=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+CONFIG_CRAMFS=y
+CONFIG_NFS_FS=y
+CONFIG_NFS_V3=y
+CONFIG_ROOT_NFS=y
+CONFIG_NLS=y
diff --git a/arch/powerpc/kernel/cpu_setup_44x.S b/arch/powerpc/kernel/cpu_setup_44x.S
index 7d606f89a839..e32b4a9a2c22 100644
--- a/arch/powerpc/kernel/cpu_setup_44x.S
+++ b/arch/powerpc/kernel/cpu_setup_44x.S
@@ -35,6 +35,7 @@ _GLOBAL(__setup_cpu_440grx)
 _GLOBAL(__setup_cpu_460ex)
 _GLOBAL(__setup_cpu_460gt)
 _GLOBAL(__setup_cpu_460sx)
+_GLOBAL(__setup_cpu_apm821xx)
 	mflr	r4
 	bl	__init_fpu_44x
 	bl	__fixup_440A_mcheck
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1f9123f412ec..b7ac795e5270 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -48,6 +48,7 @@ extern void __setup_cpu_440x5(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_460sx(unsigned long offset, struct cpu_spec *spec);
+extern void __setup_cpu_apm821xx(unsigned long offset, struct cpu_spec *spec);
 extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec);
 extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec);
@@ -1805,6 +1806,20 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_440A,
 		.platform		= "ppc440",
 	},
+	{ /* 464 in APM821xx */
+		.pvr_mask		= 0xffffff00,
+		.pvr_value		= 0x12C41C80,
+		.cpu_name		= "APM821XX",
+		.cpu_features		= CPU_FTRS_44X,
+		.cpu_user_features	= COMMON_USER_BOOKE |
+			PPC_FEATURE_HAS_FPU,
+		.mmu_features		= MMU_FTR_TYPE_44x,
+		.icache_bsize		= 32,
+		.dcache_bsize		= 32,
+		.cpu_setup		= __setup_cpu_apm821xx,
+		.machine_check		= machine_check_440A,
+		.platform		= "ppc440",
+	},
 	{ /* 476 core */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x11a50000,
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 69d668c072ae..0f979c5c756b 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -17,6 +17,16 @@ config BAMBOO
 	help
 	  This option enables support for the IBM PPC440EP evaluation board.
 
+config BLUESTONE
+	bool "Bluestone"
+	depends on 44x
+	default n
+	select PPC44x_SIMPLE
+	select APM821xx
+	select IBM_NEW_EMAC_RGMII
+	help
+	  This option enables support for the APM APM821xx Evaluation board.
+
 config EBONY
 	bool "Ebony"
 	depends on 44x
@@ -293,6 +303,12 @@ config 460SX
 	select IBM_NEW_EMAC_ZMII
 	select IBM_NEW_EMAC_TAH
 
+config APM821xx
+	bool
+	select PPC_FPU
+	select IBM_NEW_EMAC_EMAC4
+	select IBM_NEW_EMAC_TAH
+
 # 44x errata/workaround config symbols, selected by the CPU models above
 config IBM440EP_ERR42
 	bool
diff --git a/arch/powerpc/platforms/44x/ppc44x_simple.c b/arch/powerpc/platforms/44x/ppc44x_simple.c
index 5f7a29d7f590..7ddcba3b9397 100644
--- a/arch/powerpc/platforms/44x/ppc44x_simple.c
+++ b/arch/powerpc/platforms/44x/ppc44x_simple.c
@@ -52,6 +52,7 @@ machine_device_initcall(ppc44x_simple, ppc44x_device_probe);
 static char *board[] __initdata = {
 	"amcc,arches",
 	"amcc,bamboo",
+	"amcc,bluestone",
 	"amcc,canyonlands",
 	"amcc,glacier",
 	"ibm,ebony",
-- 
cgit v1.2.3


From b8f44ec2c05f9cfe1647173ac60c0cccb1118c91 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 5 Aug 2010 02:45:08 -0500
Subject: powerpc/fsl-pci: Fix MSI support on 83xx platforms

The following commit broke 83xx because it assumed the 83xx platforms
exposed the "IMMR" address in BAR0 like the 85xx/86xx/QoriQ devices do:

commit 3da34aae03d498ee62f75aa7467de93cce3030fd
Author: Kumar Gala <galak@kernel.crashing.org>
Date:   Tue May 12 15:51:56 2009 -0500

    powerpc/fsl: Support unique MSI addresses per PCIe Root Complex

However that is not true, so we have to search through the inbound
window settings on 83xx to find which one matches the IMMR address to
determine its PCI address.

Reported-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_msi.c |  9 ++++----
 arch/powerpc/sysdev/fsl_pci.c | 52 +++++++++++++++++++++++++++++++++++++++++--
 arch/powerpc/sysdev/fsl_pci.h |  1 +
 3 files changed, 55 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 87991d3abbab..20cdcd2b0eed 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -24,6 +24,7 @@
 #include <asm/ppc-pci.h>
 #include <asm/mpic.h>
 #include "fsl_msi.h"
+#include "fsl_pci.h"
 
 LIST_HEAD(msi_head);
 
@@ -125,13 +126,11 @@ static void fsl_compose_msi_msg(struct pci_dev *pdev, int hwirq,
 {
 	struct fsl_msi *msi_data = fsl_msi_data;
 	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	u32 base = 0;
+	u64 base = fsl_pci_immrbar_base(hose);
 
-	pci_bus_read_config_dword(hose->bus,
-		PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base);
+	msg->address_lo = msi_data->msi_addr_lo + lower_32_bits(base);
+	msg->address_hi = msi_data->msi_addr_hi + upper_32_bits(base);
 
-	msg->address_lo = msi_data->msi_addr_lo + base;
-	msg->address_hi = msi_data->msi_addr_hi;
 	msg->data = hwirq;
 
 	pr_debug("%s: allocated srs: %d, ibs: %d\n",
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 4ae933225251..505c8f0ece9b 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1,7 +1,7 @@
 /*
  * MPC83xx/85xx/86xx PCI/PCIE support routing.
  *
- * Copyright 2007-2009 Freescale Semiconductor, Inc.
+ * Copyright 2007-2010 Freescale Semiconductor, Inc.
  * Copyright 2008-2009 MontaVista Software, Inc.
  *
  * Initial author: Xianghua Xiao <x.xiao@freescale.com>
@@ -34,7 +34,7 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
-static int fsl_pcie_bus_fixup;
+static int fsl_pcie_bus_fixup, is_mpc83xx_pci;
 
 static void __init quirk_fsl_pcie_header(struct pci_dev *dev)
 {
@@ -430,6 +430,13 @@ struct mpc83xx_pcie_priv {
 	u32 dev_base;
 };
 
+struct pex_inbound_window {
+	u32 ar;
+	u32 tar;
+	u32 barl;
+	u32 barh;
+};
+
 /*
  * With the convention of u-boot, the PCIE outbound window 0 serves
  * as configuration transactions outbound.
@@ -437,6 +444,8 @@ struct mpc83xx_pcie_priv {
 #define PEX_OUTWIN0_BAR		0xCA4
 #define PEX_OUTWIN0_TAL		0xCA8
 #define PEX_OUTWIN0_TAH		0xCAC
+#define PEX_RC_INWIN_BASE	0xE60
+#define PEX_RCIWARn_EN		0x1
 
 static int mpc83xx_pcie_exclude_device(struct pci_bus *bus, unsigned int devfn)
 {
@@ -604,6 +613,8 @@ int __init mpc83xx_add_bridge(struct device_node *dev)
 	const int *bus_range;
 	int primary;
 
+	is_mpc83xx_pci = 1;
+
 	if (!of_device_is_available(dev)) {
 		pr_warning("%s: disabled by the firmware.\n",
 			   dev->full_name);
@@ -683,3 +694,40 @@ err0:
 	return ret;
 }
 #endif /* CONFIG_PPC_83xx */
+
+u64 fsl_pci_immrbar_base(struct pci_controller *hose)
+{
+#ifdef CONFIG_PPC_83xx
+	if (is_mpc83xx_pci) {
+		struct mpc83xx_pcie_priv *pcie = hose->dn->data;
+		struct pex_inbound_window *in;
+		int i;
+
+		/* Walk the Root Complex Inbound windows to match IMMR base */
+		in = pcie->cfg_type0 + PEX_RC_INWIN_BASE;
+		for (i = 0; i < 4; i++) {
+			/* not enabled, skip */
+			if (!in_le32(&in[i].ar) & PEX_RCIWARn_EN)
+				 continue;
+
+			if (get_immrbase() == in_le32(&in[i].tar))
+				return (u64)in_le32(&in[i].barh) << 32 |
+					    in_le32(&in[i].barl);
+		}
+
+		printk(KERN_WARNING "could not find PCI BAR matching IMMR\n");
+	}
+#endif
+
+#if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
+	if (!is_mpc83xx_pci) {
+		u32 base;
+
+		pci_bus_read_config_dword(hose->bus,
+			PCI_DEVFN(0, 0), PCI_BASE_ADDRESS_0, &base);
+		return base;
+	}
+#endif
+
+	return 0;
+}
diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
index a9d8bbebed80..8ad72a11f77b 100644
--- a/arch/powerpc/sysdev/fsl_pci.h
+++ b/arch/powerpc/sysdev/fsl_pci.h
@@ -88,6 +88,7 @@ struct ccsr_pci {
 extern int fsl_add_bridge(struct device_node *dev, int is_primary);
 extern void fsl_pcibios_fixup_bus(struct pci_bus *bus);
 extern int mpc83xx_add_bridge(struct device_node *dev);
+u64 fsl_pci_immrbar_base(struct pci_controller *hose);
 
 #endif /* __POWERPC_FSL_PCI_H */
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 55ec2fca3e99f83b5c674e9aba713d848392f6cc Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Mon, 20 Sep 2010 11:23:41 -0500
Subject: powerpc: export ppc_proc_freq and ppc_tb_freq as GPL symbols

Export the global variable 'ppc_tb_freq', so that modules (like the Book-E
watchdog driver) can use it.  To maintain consistency, ppc_proc_freq is
changed to a GPL-only export.  This is okay, because any module that needs
this symbol should be an actual Linux driver, which must be GPL-licensed.

Signed-off-by: Timur Tabi <timur@freescale.com>
Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/time.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bcb738b9ff8c..644f9188d8e7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -161,8 +161,9 @@ extern struct timezone sys_tz;
 static long timezone_offset;
 
 unsigned long ppc_proc_freq;
-EXPORT_SYMBOL(ppc_proc_freq);
+EXPORT_SYMBOL_GPL(ppc_proc_freq);
 unsigned long ppc_tb_freq;
+EXPORT_SYMBOL_GPL(ppc_tb_freq);
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING
 /*
-- 
cgit v1.2.3


From c71635d288ffd3bcdfb30308f681f9af34f0fc81 Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Thu, 16 Sep 2010 17:58:23 -0500
Subject: powerpc/kexec: make masking/disabling interrupts generic

Right now just the kexec crash pathway turns turns off the interrupts.
Pull that out and make a generic version for use elsewhere

Signed-off-by: Matthew McClintock <msm@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/kexec.h       |  1 +
 arch/powerpc/kernel/crash.c            | 13 +------------
 arch/powerpc/kernel/machine_kexec.c    | 24 ++++++++++++++++++++++++
 arch/powerpc/kernel/machine_kexec_32.c |  4 ++++
 4 files changed, 30 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index 076327f2eff7..f54408d995b5 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -91,6 +91,7 @@ extern void machine_kexec_simple(struct kimage *image);
 extern void crash_kexec_secondary(struct pt_regs *regs);
 extern int overlaps_crashkernel(unsigned long start, unsigned long size);
 extern void reserve_crashkernel(void);
+extern void machine_kexec_mask_interrupts(void);
 
 #else /* !CONFIG_KEXEC */
 static inline int kexec_sr_activated(int cpu) { return 0; }
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
index 4457382f8667..832c8c4db254 100644
--- a/arch/powerpc/kernel/crash.c
+++ b/arch/powerpc/kernel/crash.c
@@ -414,18 +414,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
 	crash_kexec_wait_realmode(crashing_cpu);
 #endif
 
-	for_each_irq(i) {
-		struct irq_desc *desc = irq_to_desc(i);
-
-		if (!desc || !desc->chip || !desc->chip->eoi)
-			continue;
-
-		if (desc->status & IRQ_INPROGRESS)
-			desc->chip->eoi(i);
-
-		if (!(desc->status & IRQ_DISABLED))
-			desc->chip->shutdown(i);
-	}
+	machine_kexec_mask_interrupts();
 
 	/*
 	 * Call registered shutdown routines savely.  Swap out
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
index dd6c141f1662..df7e20c191cd 100644
--- a/arch/powerpc/kernel/machine_kexec.c
+++ b/arch/powerpc/kernel/machine_kexec.c
@@ -14,10 +14,34 @@
 #include <linux/threads.h>
 #include <linux/memblock.h>
 #include <linux/of.h>
+#include <linux/irq.h>
+
 #include <asm/machdep.h>
 #include <asm/prom.h>
 #include <asm/sections.h>
 
+void machine_kexec_mask_interrupts(void) {
+	unsigned int i;
+
+	for_each_irq(i) {
+		struct irq_desc *desc = irq_to_desc(i);
+
+		if (!desc || !desc->chip)
+			continue;
+
+		if (desc->chip->eoi &&
+		    desc->status & IRQ_INPROGRESS)
+			desc->chip->eoi(i);
+
+		if (desc->chip->mask)
+			desc->chip->mask(i);
+
+		if (desc->chip->disable &&
+		    !(desc->status & IRQ_DISABLED))
+			desc->chip->disable(i);
+	}
+}
+
 void machine_crash_shutdown(struct pt_regs *regs)
 {
 	if (ppc_md.machine_crash_shutdown)
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
index ae63a964b858..e63f2e7d2efb 100644
--- a/arch/powerpc/kernel/machine_kexec_32.c
+++ b/arch/powerpc/kernel/machine_kexec_32.c
@@ -39,6 +39,10 @@ void default_machine_kexec(struct kimage *image)
 	/* Interrupts aren't acceptable while we reboot */
 	local_irq_disable();
 
+	/* mask each interrupt so we are in a more sane state for the
+	 * kexec kernel */
+	machine_kexec_mask_interrupts();
+
 	page_list = image->head;
 
 	/* we need both effective and real address here */
-- 
cgit v1.2.3


From edb85800101c354a395ecc5ed3b52a9499bd4cfc Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Thu, 16 Sep 2010 17:58:24 -0500
Subject: powerpc/85xx: Remove call to mpic_teardown_this_cpu in kexec

We no longer need to call this explicitly as a generic version is called
by default.

Signed-off-by: Matthew McClintock <msm@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/smp.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index bd38b6a240de..2aee4b391319 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -125,8 +125,6 @@ static int kexec_down_cpus = 0;
 
 void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
 {
-	mpic_teardown_this_cpu(1);
-
 	/* When crashing, this gets called on all CPU's we only
 	 * take down the non-boot cpus */
 	if (smp_processor_id() != boot_cpuid)
-- 
cgit v1.2.3


From 5d692961633d4ecd1ca07313b75ddf35520a4c28 Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Thu, 16 Sep 2010 17:58:25 -0500
Subject: powerpc/85xx: Minor fixups for kexec on 85xx

Make kexec_down_cpus atmoic since it will be incremented by all cores as
they are coming down.

Remove duplicate calls to mpc85xx_smp_kexec_down, now it's called by the
crash and normal kexec pathway only once.

Increase the timeout to wait for other cores to shutdown.

Signed-off-by: Matthew McClintock <msm@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/smp.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 2aee4b391319..c9a77fa62744 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -121,17 +121,15 @@ struct smp_ops_t smp_85xx_ops = {
 };
 
 #ifdef CONFIG_KEXEC
-static int kexec_down_cpus = 0;
+atomic_t kexec_down_cpus = ATOMIC_INIT(0);
 
 void mpc85xx_smp_kexec_cpu_down(int crash_shutdown, int secondary)
 {
-	/* When crashing, this gets called on all CPU's we only
-	 * take down the non-boot cpus */
-	if (smp_processor_id() != boot_cpuid)
-	{
-		local_irq_disable();
-		kexec_down_cpus++;
+	local_irq_disable();
 
+	if (secondary) {
+		atomic_inc(&kexec_down_cpus);
+		/* loop forever */
 		while (1);
 	}
 }
@@ -144,14 +142,14 @@ static void mpc85xx_smp_kexec_down(void *arg)
 
 static void mpc85xx_smp_machine_kexec(struct kimage *image)
 {
-	int timeout = 2000;
-	int i;
+	int timeout = INT_MAX;
+	int i, num_cpus = num_present_cpus();
 
-	set_cpus_allowed(current, cpumask_of_cpu(boot_cpuid));
 
-	smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
+	if (image->type == KEXEC_TYPE_DEFAULT)
+		smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
 
-	while ( (kexec_down_cpus != (num_online_cpus() - 1)) &&
+	while ( (atomic_read(&kexec_down_cpus) != (num_cpus - 1)) &&
 		( timeout > 0 ) )
 	{
 		timeout--;
@@ -160,7 +158,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
 	if ( !timeout )
 		printk(KERN_ERR "Unable to bring down secondary cpu(s)");
 
-	for (i = 0; i < num_present_cpus(); i++)
+	for (i = 0; i < num_cpus; i++)
 	{
 		if ( i == smp_processor_id() ) continue;
 		mpic_reset_core(i);
-- 
cgit v1.2.3


From 677de425583b43bf1af3aea0fa8d433120f0f13c Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Thu, 16 Sep 2010 17:58:26 -0500
Subject: powerpc/85xx: flush dcache before resetting cores

When we do an mpic_reset_core we need to make sure the dcache is flushed.

Signed-off-by: Matthew McClintock <msm@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/smp.c | 50 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index c9a77fa62744..5c91a992f02b 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/of.h>
 #include <linux/kexec.h>
+#include <linux/highmem.h>
 
 #include <asm/machdep.h>
 #include <asm/pgtable.h>
@@ -140,11 +141,60 @@ static void mpc85xx_smp_kexec_down(void *arg)
 		ppc_md.kexec_cpu_down(0,1);
 }
 
+static void map_and_flush(unsigned long paddr)
+{
+	struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
+	unsigned long kaddr  = (unsigned long)kmap(page);
+
+	flush_dcache_range(kaddr, kaddr + PAGE_SIZE);
+	kunmap(page);
+}
+
+/**
+ * Before we reset the other cores, we need to flush relevant cache
+ * out to memory so we don't get anything corrupted, some of these flushes
+ * are performed out of an overabundance of caution as interrupts are not
+ * disabled yet and we can switch cores
+ */
+static void mpc85xx_smp_flush_dcache_kexec(struct kimage *image)
+{
+	kimage_entry_t *ptr, entry;
+	unsigned long paddr;
+	int i;
+
+	if (image->type == KEXEC_TYPE_DEFAULT) {
+		/* normal kexec images are stored in temporary pages */
+		for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE);
+		     ptr = (entry & IND_INDIRECTION) ?
+				phys_to_virt(entry & PAGE_MASK) : ptr + 1) {
+			if (!(entry & IND_DESTINATION)) {
+				map_and_flush(entry);
+			}
+		}
+		/* flush out last IND_DONE page */
+		map_and_flush(entry);
+	} else {
+		/* crash type kexec images are copied to the crash region */
+		for (i = 0; i < image->nr_segments; i++) {
+			struct kexec_segment *seg = &image->segment[i];
+			for (paddr = seg->mem; paddr < seg->mem + seg->memsz;
+			     paddr += PAGE_SIZE) {
+				map_and_flush(paddr);
+			}
+		}
+	}
+
+	/* also flush the kimage struct to be passed in as well */
+	flush_dcache_range((unsigned long)image,
+			   (unsigned long)image + sizeof(*image));
+}
+
 static void mpc85xx_smp_machine_kexec(struct kimage *image)
 {
 	int timeout = INT_MAX;
 	int i, num_cpus = num_present_cpus();
 
+	mpc85xx_smp_flush_dcache_kexec(image);
 
 	if (image->type == KEXEC_TYPE_DEFAULT)
 		smp_call_function(mpc85xx_smp_kexec_down, NULL, 0);
-- 
cgit v1.2.3


From cd2bd44ea8a2e8be8fff583fa5759e8f3758e67a Mon Sep 17 00:00:00 2001
From: Ilya Yanok <yanok@emcraft.com>
Date: Thu, 9 Sep 2010 01:55:16 +0200
Subject: powerpc/mpc83xx: Support for MPC8308 P1M board

This patch adds support for MPC8308 P1M board.
Supported devices:
 DUART
 Dual Ethernet
 NOR flash
 Both I2C controllers
 USB in peripheral mode
 PCI Express

Signed-off-by: Ilya Yanok <yanok@emcraft.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/mpc8308_p1m.dts     | 332 ++++++++++++++++++++++++++++++
 arch/powerpc/platforms/83xx/Kconfig       |   4 +-
 arch/powerpc/platforms/83xx/mpc830x_rdb.c |   3 +-
 3 files changed, 336 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/boot/dts/mpc8308_p1m.dts

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/mpc8308_p1m.dts b/arch/powerpc/boot/dts/mpc8308_p1m.dts
new file mode 100644
index 000000000000..05a76ccfd499
--- /dev/null
+++ b/arch/powerpc/boot/dts/mpc8308_p1m.dts
@@ -0,0 +1,332 @@
+/*
+ * mpc8308_p1m Device Tree Source
+ *
+ * Copyright 2010 Ilya Yanok, Emcraft Systems, yanok@emcraft.com
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+/dts-v1/;
+
+/ {
+	compatible = "denx,mpc8308_p1m";
+	#address-cells = <1>;
+	#size-cells = <1>;
+
+	aliases {
+		ethernet0 = &enet0;
+		ethernet1 = &enet1;
+		serial0 = &serial0;
+		serial1 = &serial1;
+		pci0 = &pci0;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		PowerPC,8308@0 {
+			device_type = "cpu";
+			reg = <0x0>;
+			d-cache-line-size = <32>;
+			i-cache-line-size = <32>;
+			d-cache-size = <16384>;
+			i-cache-size = <16384>;
+			timebase-frequency = <0>;	// from bootloader
+			bus-frequency = <0>;		// from bootloader
+			clock-frequency = <0>;		// from bootloader
+		};
+	};
+
+	memory {
+		device_type = "memory";
+		reg = <0x00000000 0x08000000>;	// 128MB at 0
+	};
+
+	localbus@e0005000 {
+		#address-cells = <2>;
+		#size-cells = <1>;
+		compatible = "fsl,mpc8315-elbc", "fsl,elbc", "simple-bus";
+		reg = <0xe0005000 0x1000>;
+		interrupts = <77 0x8>;
+		interrupt-parent = <&ipic>;
+
+		ranges = <0x0 0x0 0xfc000000 0x04000000
+		          0x1 0x0 0xfbff0000 0x00008000
+		          0x2 0x0 0xfbff8000 0x00008000>;
+
+		flash@0,0 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			compatible = "cfi-flash";
+			reg = <0x0 0x0 0x4000000>;
+			bank-width = <2>;
+			device-width = <1>;
+
+			u-boot@0 {
+				reg = <0x0 0x60000>;
+				read-only;
+			};
+			env@60000 {
+				reg = <0x60000 0x20000>;
+			};
+			env1@80000 {
+				reg = <0x80000 0x20000>;
+			};
+			kernel@a0000 {
+				reg = <0xa0000 0x200000>;
+			};
+			dtb@2a0000 {
+				reg = <0x2a0000 0x20000>;
+			};
+			ramdisk@2c0000 {
+				reg = <0x2c0000 0x640000>;
+			};
+			user@700000 {
+				reg = <0x700000 0x3900000>;
+			};
+		};
+
+		can@1,0 {
+			compatible = "nxp,sja1000";
+			reg = <0x1 0x0 0x80>;
+			interrupts = <18 0x8>;
+			interrups-parent = <&ipic>;
+		};
+
+		cpld@2,0 {
+			compatible = "denx,mpc8308_p1m-cpld";
+			reg = <0x2 0x0 0x8>;
+			interrupts = <48 0x8>;
+			interrups-parent = <&ipic>;
+		};
+	};
+
+	immr@e0000000 {
+		#address-cells = <1>;
+		#size-cells = <1>;
+		device_type = "soc";
+		compatible = "fsl,mpc8308-immr", "simple-bus";
+		ranges = <0 0xe0000000 0x00100000>;
+		reg = <0xe0000000 0x00000200>;
+		bus-frequency = <0>;
+
+		i2c@3000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3000 0x100>;
+			interrupts = <14 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+			fram@50 {
+				compatible = "ramtron,24c64";
+				reg = <0x50>;
+			};
+		};
+
+		i2c@3100 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "fsl-i2c";
+			reg = <0x3100 0x100>;
+			interrupts = <15 0x8>;
+			interrupt-parent = <&ipic>;
+			dfsrr;
+			pwm@28 {
+				compatible = "maxim,ds1050";
+				reg = <0x28>;
+			};
+			sensor@48 {
+				compatible = "maxim,max6625";
+				reg = <0x48>;
+			};
+			sensor@49 {
+				compatible = "maxim,max6625";
+				reg = <0x49>;
+			};
+			sensor@4b {
+				compatible = "maxim,max6625";
+				reg = <0x4b>;
+			};
+		};
+
+		usb@23000 {
+			compatible = "fsl-usb2-dr";
+			reg = <0x23000 0x1000>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			interrupt-parent = <&ipic>;
+			interrupts = <38 0x8>;
+			dr_mode = "peripheral";
+			phy_type = "ulpi";
+		};
+
+		enet0: ethernet@24000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			ranges = <0x0 0x24000 0x1000>;
+
+			cell-index = <0>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x24000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <32 0x8 33 0x8 34 0x8>;
+			interrupt-parent = <&ipic>;
+			phy-handle = < &phy1 >;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-mdio";
+				reg = <0x520 0x20>;
+				phy1: ethernet-phy@1 {
+					interrupt-parent = <&ipic>;
+					interrupts = <17 0x8>;
+					reg = <0x1>;
+					device_type = "ethernet-phy";
+				};
+				phy2: ethernet-phy@2 {
+					interrupt-parent = <&ipic>;
+					interrupts = <19 0x8>;
+					reg = <0x2>;
+					device_type = "ethernet-phy";
+				};
+				tbi0: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		enet1: ethernet@25000 {
+			#address-cells = <1>;
+			#size-cells = <1>;
+			cell-index = <1>;
+			device_type = "network";
+			model = "eTSEC";
+			compatible = "gianfar";
+			reg = <0x25000 0x1000>;
+			ranges = <0x0 0x25000 0x1000>;
+			local-mac-address = [ 00 00 00 00 00 00 ];
+			interrupts = <35 0x8 36 0x8 37 0x8>;
+			interrupt-parent = <&ipic>;
+			phy-handle = < &phy2 >;
+
+			mdio@520 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				compatible = "fsl,gianfar-tbi";
+				reg = <0x520 0x20>;
+				tbi1: tbi-phy@11 {
+					reg = <0x11>;
+					device_type = "tbi-phy";
+				};
+			};
+		};
+
+		serial0: serial@4500 {
+			cell-index = <0>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4500 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <9 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		serial1: serial@4600 {
+			cell-index = <1>;
+			device_type = "serial";
+			compatible = "ns16550";
+			reg = <0x4600 0x100>;
+			clock-frequency = <133333333>;
+			interrupts = <10 0x8>;
+			interrupt-parent = <&ipic>;
+		};
+
+		gpio@c00 {
+			#gpio-cells = <2>;
+			compatible = "fsl,mpc8308-gpio", "fsl,mpc8349-gpio";
+			reg = <0xc00 0x18>;
+			interrupts = <74 0x8>;
+			interrupt-parent = <&ipic>;
+			gpio-controller;
+		};
+
+		timer@500 {
+			compatible = "fsl,mpc8308-gtm", "fsl,gtm";
+			reg = <0x500 0x100>;
+			interrupts = <90 8 78 8 84 8 72 8>;
+			interrupt-parent = <&ipic>;
+			clock-frequency = <133333333>;
+		};
+
+		/* IPIC
+		 * interrupts cell = <intr #, sense>
+		 * sense values match linux IORESOURCE_IRQ_* defines:
+		 * sense == 8: Level, low assertion
+		 * sense == 2: Edge, high-to-low change
+		 */
+		ipic: interrupt-controller@700 {
+			compatible = "fsl,ipic";
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <2>;
+			reg = <0x700 0x100>;
+			device_type = "ipic";
+		};
+
+		ipic-msi@7c0 {
+			compatible = "fsl,ipic-msi";
+			reg = <0x7c0 0x40>;
+			msi-available-ranges = <0x0 0x100>;
+			interrupts = < 0x43 0x8
+					0x4  0x8
+					0x51 0x8
+					0x52 0x8
+					0x56 0x8
+					0x57 0x8
+					0x58 0x8
+					0x59 0x8 >;
+			interrupt-parent = < &ipic >;
+		};
+
+	};
+
+	pci0: pcie@e0009000 {
+		#address-cells = <3>;
+		#size-cells = <2>;
+		#interrupt-cells = <1>;
+		device_type = "pci";
+		compatible = "fsl,mpc8308-pcie", "fsl,mpc8314-pcie";
+		reg = <0xe0009000 0x00001000
+			0xb0000000 0x01000000>;
+		ranges = <0x02000000 0 0xa0000000 0xa0000000 0 0x10000000
+		          0x01000000 0 0x00000000 0xb1000000 0 0x00800000>;
+		bus-range = <0 0>;
+		interrupt-map-mask = <0 0 0 0>;
+		interrupt-map = <0 0 0 0 &ipic 1 8>;
+		interrupts = <0x1 0x8>;
+		interrupt-parent = <&ipic>;
+		clock-frequency = <0>;
+
+		pcie@0 {
+			#address-cells = <3>;
+			#size-cells = <2>;
+			device_type = "pci";
+			reg = <0 0 0 0 0>;
+			ranges = <0x02000000 0 0xa0000000
+				  0x02000000 0 0xa0000000
+				  0 0x10000000
+				  0x01000000 0 0x00000000
+				  0x01000000 0 0x00000000
+				  0 0x00800000>;
+		};
+	};
+};
diff --git a/arch/powerpc/platforms/83xx/Kconfig b/arch/powerpc/platforms/83xx/Kconfig
index 021763a32c2f..73f4135f3a1a 100644
--- a/arch/powerpc/platforms/83xx/Kconfig
+++ b/arch/powerpc/platforms/83xx/Kconfig
@@ -10,12 +10,12 @@ menuconfig PPC_83xx
 if PPC_83xx
 
 config MPC830x_RDB
-	bool "Freescale MPC830x RDB"
+	bool "Freescale MPC830x RDB and derivatives"
 	select DEFAULT_UIMAGE
 	select PPC_MPC831x
 	select FSL_GTM
 	help
-	  This option enables support for the MPC8308 RDB board.
+	  This option enables support for the MPC8308 RDB and MPC8308 P1M boards.
 
 config MPC831x_RDB
 	bool "Freescale MPC831x RDB"
diff --git a/arch/powerpc/platforms/83xx/mpc830x_rdb.c b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
index ac102ee9abe8..846831d495b5 100644
--- a/arch/powerpc/platforms/83xx/mpc830x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc830x_rdb.c
@@ -65,7 +65,8 @@ static int __init mpc830x_rdb_probe(void)
 	unsigned long root = of_get_flat_dt_root();
 
 	return of_flat_dt_is_compatible(root, "MPC8308RDB") ||
-	       of_flat_dt_is_compatible(root, "fsl,mpc8308rdb");
+	       of_flat_dt_is_compatible(root, "fsl,mpc8308rdb") ||
+	       of_flat_dt_is_compatible(root, "denx,mpc8308_p1m");
 }
 
 static struct of_device_id __initdata of_bus_ids[] = {
-- 
cgit v1.2.3


From 92437d41374bf59b1914b53bd10ca69d31b1b581 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Fri, 24 Sep 2010 12:44:52 -0400
Subject: powerpc: Fix invalid page flags in create TLB CAM path for PTE_64BIT

There exists a four line chunk of code, which when configured for
64 bit address space, can incorrectly set certain page flags during
the TLB creation.  It turns out that this is code which isn't used,
but might still serve a purpose.  Since it isn't obvious why it exists
or why it causes problems, the below description covers both in detail.

For powerpc bootstrap, the physical memory (at most 768M), is mapped
into the kernel space via the following path:

MMU_init()
    |
    + adjust_total_lowmem()
            |
            + map_mem_in_cams()
                    |
                    + settlbcam(i, virt, phys, cam_sz, PAGE_KERNEL_X, 0);

On settlbcam(), the kernel will create TLB entries according to the flag,
PAGE_KERNEL_X.

settlbcam()
{
        ...
        TLBCAM[index].MAS1 = MAS1_VALID
                        | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid);
                                ^
			These entries cannot be invalidated by the
			kernel since MAS1_IPROT is set on TLB property.
        ...
        if (flags & _PAGE_USER) {
           TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
           TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
        }

For classic BookE (flags & _PAGE_USER) is 'zero' so it's fine.
But on boards like the the Freescale P4080, we want to support 36-bit
physical address on it. So the following options may be set:

CONFIG_FSL_BOOKE=y
CONFIG_PTE_64BIT=y
CONFIG_PHYS_64BIT=y

As a result, boards like the P4080 will introduce PTE format as Book3E.
As per the file: arch/powerpc/include/asm/pgtable-ppc32.h

  * #elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
  * #include <asm/pte-book3e.h>

So PAGE_KERNEL_X is __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX) and the
book3E version of _PAGE_KERNEL_RWX is defined with:

  (_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)

Note the _PAGE_BAP_SR, which is also defined in the book3E _PAGE_USER:

  #define _PAGE_USER        (_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */

So the possibility exists to wrongly assign the user MAS3_U<RWX> bits
to kernel (PAGE_KERNEL_X) address space via the following code fragment:

        if (flags & _PAGE_USER) {
           TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
           TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
        }

Here is a dump of the TLB info from Simics with the above code present:
------
L2 TLB1
                                            GT                   SSS UUU V I
 Row  Logical           Physical            SS TLPID  TID  WIMGE XWR XWR F P   V
----- ----------------- ------------------- -- ----- ----- ----- --- --- - -   -
  0   c0000000-cfffffff 000000000-00fffffff 00     0     0   M   XWR XWR 0 1   1
  1   d0000000-dfffffff 010000000-01fffffff 00     0     0   M   XWR XWR 0 1   1
  2   e0000000-efffffff 020000000-02fffffff 00     0     0   M   XWR XWR 0 1   1

Actually this conditional code was used for two legacy functions:

  1: support KGDB to set break point.
     KGDB already dropped this; now uses its core write to set break point.

  2: io_block_mapping() to create TLB in segmentation size (not PAGE_SIZE)
     for device IO space.
     This use case is also removed from the latest PowerPC kernel.

However, there may still be a use case for it in the future, like
large user pages, so we can't remove it entirely.  As an alternative,
we match on all bits of _PAGE_USER instead of just any bits, so the
case where just _PAGE_BAP_SR is set can't sneak through.

With this done, the TLB appears without U having XWR as below:

-------
L2 TLB1
                                            GT                   SSS UUU V I
 Row  Logical           Physical            SS TLPID  TID  WIMGE XWR XWR F P   V
----- ----------------- ------------------- -- ----- ----- ----- --- --- - -   -
  0   c0000000-cfffffff 000000000-00fffffff 00     0     0   M   XWR     0 1   1
  1   d0000000-dfffffff 010000000-01fffffff 00     0     0   M   XWR     0 1   1
  2   e0000000-efffffff 020000000-02fffffff 00     0     0   M   XWR     0 1   1

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/pte-common.h | 7 +++++++
 arch/powerpc/mm/fsl_booke_mmu.c       | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index f2b370180a09..76bb195e4f24 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -171,6 +171,13 @@ extern unsigned long bad_call_to_PMD_PAGE_SIZE(void);
 /* Make modules code happy. We don't set RO yet */
 #define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
 
+/*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER.  Need to explictly match _PAGE_BAP_UR bit in that case too.
+ */
+#define pte_user(val)		((val & _PAGE_USER) == _PAGE_USER)
+
 /* Advertise special mapping type for AGP */
 #define PAGE_AGP		(PAGE_KERNEL_NC)
 #define HAVE_PAGE_AGP
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 4b66a1ece6d8..1b4354db51bb 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -137,7 +137,8 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys,
 	if (mmu_has_feature(MMU_FTR_BIG_PHYS))
 		TLBCAM[index].MAS7 = (u64)phys >> 32;
 
-	if (flags & _PAGE_USER) {
+	/* Below is unlikely -- only for large user pages or similar */
+	if (pte_user(flags)) {
 	   TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR;
 	   TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0);
 	}
-- 
cgit v1.2.3


From 4ea7c88bec9221031fa57fc7c290fdb5d279748c Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Tue, 31 Aug 2010 17:44:51 -0500
Subject: powerpc/fsl_soc: Search all global-utilities nodes for rstccr

The first global-utilities node might not contain the rstcr
property, so we should search all the nodes

Signed-off-by: Matthew McClintock <msm@freescale.com>
Acked-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_soc.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index b91f7acdda6f..6c67d9ebf166 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -378,17 +378,23 @@ static __be32 __iomem *rstcr;
 static int __init setup_rstcr(void)
 {
 	struct device_node *np;
-	np = of_find_node_by_name(NULL, "global-utilities");
-	if ((np && of_get_property(np, "fsl,has-rstcr", NULL))) {
-		rstcr = of_iomap(np, 0) + 0xb0;
-		if (!rstcr)
-			printk (KERN_EMERG "Error: reset control register "
-					"not mapped!\n");
-	} else if (ppc_md.restart == fsl_rstcr_restart)
+
+	for_each_node_by_name(np, "global-utilities") {
+		if ((of_get_property(np, "fsl,has-rstcr", NULL))) {
+			rstcr = of_iomap(np, 0) + 0xb0;
+			if (!rstcr)
+				printk (KERN_ERR "Error: reset control "
+						"register not mapped!\n");
+			break;
+		}
+	}
+
+	if (!rstcr && ppc_md.restart == fsl_rstcr_restart)
 		printk(KERN_ERR "No RSTCR register, warm reboot won't work\n");
 
 	if (np)
 		of_node_put(np);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From f7a07fd9617140c6111de82400ba1bad3162fb85 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Thu, 19 Aug 2010 16:28:12 -0500
Subject: powerpc/p1022: Add probing for individual DMA channels

Like the MPC8610 HPCD, the P1022DS ASoC DMA driver probes on individual DMA
channel nodes, so the DMA controller nodes' compatible string must be
listed in p1022_ds_ids[] to work.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/p1022_ds.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c
index 34e00902ce86..2b390d19a1d1 100644
--- a/arch/powerpc/platforms/85xx/p1022_ds.c
+++ b/arch/powerpc/platforms/85xx/p1022_ds.c
@@ -112,6 +112,8 @@ static struct of_device_id __initdata p1022_ds_ids[] = {
 	{ .compatible = "soc", },
 	{ .compatible = "simple-bus", },
 	{ .compatible = "gianfar", },
+	/* So that the DMA channel nodes can be probed individually: */
+	{ .compatible = "fsl,eloplus-dma", },
 	{},
 };
 
-- 
cgit v1.2.3


From 2ed38b23597284cc96a97e295cb145a6202dfcd4 Mon Sep 17 00:00:00 2001
From: Matthew McClintock <msm@freescale.com>
Date: Tue, 31 Aug 2010 18:24:45 -0500
Subject: powerpc/fsl_booke: Add support to boot from core other than 0

First we check to see if we are the first core booting up. This
is accomplished by comparing the boot_cpuid with -1, if it is we
assume this is the first core coming up.

Secondly, we need to update the initial thread info structure
to reflect the actual cpu we are running on otherwise
smp_processor_id() and related functions will return the default
initialization value of the struct or 0.

Signed-off-by: Matthew McClintock <msm@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/head_fsl_booke.S | 10 ++++++++--
 arch/powerpc/kernel/setup_32.c       |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 4faeba247854..529b817f473b 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -152,8 +152,11 @@ _ENTRY(__early_start)
 	/* Check to see if we're the second processor, and jump
 	 * to the secondary_start code if so
 	 */
-	mfspr	r24,SPRN_PIR
-	cmpwi	r24,0
+	lis	r24, boot_cpuid@h
+	ori	r24, r24, boot_cpuid@l
+	lwz	r24, 0(r24)
+	cmpwi	r24, -1
+	mfspr   r24,SPRN_PIR
 	bne	__secondary_start
 #endif
 
@@ -175,6 +178,9 @@ _ENTRY(__early_start)
 	li	r0,0
 	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
 
+	rlwinm  r22,r1,0,0,31-THREAD_SHIFT      /* current thread_info */
+	stw	r24, TI_CPU(r22)
+
 	bl	early_init
 
 #ifdef CONFIG_RELOCATABLE
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 93666f9cabf1..8da1632f9fe7 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -46,7 +46,7 @@
 
 extern void bootx_init(unsigned long r4, unsigned long phys);
 
-int boot_cpuid;
+int boot_cpuid = -1;
 EXPORT_SYMBOL_GPL(boot_cpuid);
 int boot_cpuid_phys;
 
-- 
cgit v1.2.3


From da3ed89e7ce272ebcc918487e2a28736ca0dd6bb Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Tue, 18 May 2010 07:52:36 -0500
Subject: powerpc/mpc8xxx_gpio: Add support for 'qoriq-gpio' controllers

Add 'fsl,qoriq-gpio' compatiable to the list we search for to bind
against for mpc8xxx_gpio.  This compatiable will be used on P1-P5xxx
QorIQ devices like P4080.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/mpc8xxx_gpio.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/mpc8xxx_gpio.c b/arch/powerpc/sysdev/mpc8xxx_gpio.c
index 2b69084d0f0c..c0ea05e87f1d 100644
--- a/arch/powerpc/sysdev/mpc8xxx_gpio.c
+++ b/arch/powerpc/sysdev/mpc8xxx_gpio.c
@@ -330,6 +330,9 @@ static int __init mpc8xxx_add_gpiochips(void)
 	for_each_compatible_node(np, NULL, "fsl,mpc8610-gpio")
 		mpc8xxx_add_controller(np);
 
+	for_each_compatible_node(np, NULL, "fsl,qoriq-gpio")
+		mpc8xxx_add_controller(np);
+
 	return 0;
 }
 arch_initcall(mpc8xxx_add_gpiochips);
-- 
cgit v1.2.3


From 4f0e332239e2b5f79757cb8f8f3db16c66f5d220 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 24 Sep 2010 13:34:42 -0500
Subject: powerpc/fsl-booke: Add PCI device ids for P2040/P3041/P5010/P5020
 QoirQ chips

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_pci.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 505c8f0ece9b..818f7c6c8fa1 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -407,10 +407,18 @@ DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2010, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2020, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2040E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P2040, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P3041E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P3041, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4040, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080E, quirk_fsl_pcie_header);
 DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P4080, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5010E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5010, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5020E, quirk_fsl_pcie_header);
+DECLARE_PCI_FIXUP_HEADER(0x1957, PCI_DEVICE_ID_P5020, quirk_fsl_pcie_header);
 #endif /* CONFIG_FSL_SOC_BOOKE || CONFIG_PPC_86xx */
 
 #if defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_MPC512x)
-- 
cgit v1.2.3


From 4267ea72bb09dc58f006df26c8d3e897489fabca Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 19 May 2010 15:32:21 -0500
Subject: oprofile/fsl emb: Don't set MSR[PMM] until after clearing the
 interrupt.

On an arch 2.06 hypervisor, a pending perfmon interrupt will be delivered
to the hypervisor at any point the guest is running, regardless of
MSR[EE].  In order to reflect this interrupt, the hypervisor has to mask
the interrupt in PMGC0 -- and set MSRP[PMMP] to intercept futher guest
accesses to the PMRs to detect when to unmask (and prevent the guest from
unmasking early, or seeing inconsistent state).

This has the side effect of ignoring any changes the guest makes to
MSR[PMM], so wait until after the interrupt is clear, and thus the
hypervisor should have cleared MSRP[PMMP], before setting MSR[PMM].  The
counters wil not actually run until PMGC0[FAC] is cleared in
pmc_start_ctrs(), so this will not reduce the effectiveness of PMM.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/oprofile/op_model_fsl_emb.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c
index 62312abffa28..d4e6507277b5 100644
--- a/arch/powerpc/oprofile/op_model_fsl_emb.c
+++ b/arch/powerpc/oprofile/op_model_fsl_emb.c
@@ -2,7 +2,7 @@
  * Freescale Embedded oprofile support, based on ppc64 oprofile support
  * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
  *
- * Copyright (c) 2004 Freescale Semiconductor, Inc
+ * Copyright (c) 2004, 2010 Freescale Semiconductor, Inc
  *
  * Author: Andy Fleming
  * Maintainer: Kumar Gala <galak@kernel.crashing.org>
@@ -321,9 +321,6 @@ static void fsl_emb_handle_interrupt(struct pt_regs *regs,
 	int val;
 	int i;
 
-	/* set the PMM bit (see comment below) */
-	mtmsr(mfmsr() | MSR_PMM);
-
 	pc = regs->nip;
 	is_kernel = is_kernel_addr(pc);
 
@@ -340,9 +337,13 @@ static void fsl_emb_handle_interrupt(struct pt_regs *regs,
 	}
 
 	/* The freeze bit was set by the interrupt. */
-	/* Clear the freeze bit, and reenable the interrupt.
-	 * The counters won't actually start until the rfi clears
-	 * the PMM bit */
+	/* Clear the freeze bit, and reenable the interrupt.  The
+	 * counters won't actually start until the rfi clears the PMM
+	 * bit.  The PMM bit should not be set until after the interrupt
+	 * is cleared to avoid it getting lost in some hypervisor
+	 * environments.
+	 */
+	mtmsr(mfmsr() | MSR_PMM);
 	pmc_start_ctrs(1);
 }
 
-- 
cgit v1.2.3


From b6f9e595d27371c4f2157a294da3caa7aed5fe53 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 7 Oct 2010 14:47:10 -0500
Subject: powerpc/fsl-booke: Add p3041 DS board support

The P3041DS is in the same family of boards as the P4080DS and thus
shares the corenet_ds code.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/Kconfig    | 11 ++++++
 arch/powerpc/platforms/85xx/Makefile   |  1 +
 arch/powerpc/platforms/85xx/p3041_ds.c | 64 ++++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+)
 create mode 100644 arch/powerpc/platforms/85xx/p3041_ds.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index bea1f5905ad4..068a6e3b3d76 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -153,6 +153,17 @@ config SBC8560
 	help
 	  This option enables support for the Wind River SBC8560 board
 
+config P3041_DS
+	bool "Freescale P3041 DS"
+	select DEFAULT_UIMAGE
+	select PPC_E500MC
+	select PHYS_64BIT
+	select SWIOTLB
+	select MPC8xxx_GPIO
+	select HAS_RAPIDIO
+	help
+	  This option enables support for the P3041 DS board
+
 config P4080_DS
 	bool "Freescale P4080 DS"
 	select DEFAULT_UIMAGE
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index a2ec3f8f4d06..c3ac07189284 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_MPC85xx_DS)  += mpc85xx_ds.o
 obj-$(CONFIG_MPC85xx_MDS) += mpc85xx_mds.o
 obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
 obj-$(CONFIG_P1022_DS)    += p1022_ds.o
+obj-$(CONFIG_P3041_DS)    += p3041_ds.o corenet_ds.o
 obj-$(CONFIG_P4080_DS)    += p4080_ds.o corenet_ds.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
diff --git a/arch/powerpc/platforms/85xx/p3041_ds.c b/arch/powerpc/platforms/85xx/p3041_ds.c
new file mode 100644
index 000000000000..0ed52e18298c
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p3041_ds.c
@@ -0,0 +1,64 @@
+/*
+ * P3041 DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009-2010 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/phy.h>
+
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "corenet_ds.h"
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p3041_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P3041DS");
+}
+
+define_machine(p3041_ds) {
+	.name			= "P3041 DS",
+	.probe			= p3041_ds_probe,
+	.setup_arch		= corenet_ds_setup_arch,
+	.init_IRQ		= corenet_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+	.get_irq		= mpic_get_coreint_irq,
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+machine_device_initcall(p3041_ds, corenet_ds_publish_devices);
+
+#ifdef CONFIG_SWIOTLB
+machine_arch_initcall(p3041_ds, swiotlb_setup_bus_notifier);
+#endif
-- 
cgit v1.2.3


From 3c4b76449b4efc1a1cbd0cade09486bbc8b56401 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 7 Oct 2010 17:05:08 -0500
Subject: powerpc: Fix compile error with paca code on ppc64e

arch/powerpc/kernel/paca.c: In function 'allocate_lppacas':
arch/powerpc/kernel/paca.c:111:1: error: parameter name omitted
arch/powerpc/kernel/paca.c:111:1: error: parameter name omitted

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/paca.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 1e068a46e6c3..cefc0df8f1e5 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -108,7 +108,7 @@ static void free_lppacas(void)
 
 #else
 
-static inline void allocate_lppacas(int, unsigned long) { }
+static inline void allocate_lppacas(int nr_cpus, unsigned long limit) { }
 static inline void free_lppacas(void) { }
 
 #endif /* CONFIG_PPC_BOOK3S */
-- 
cgit v1.2.3


From 6341efe4b9bd1e1f9c0d0d6ec57fa77949c88bb1 Mon Sep 17 00:00:00 2001
From: Timur Tabi <timur@freescale.com>
Date: Thu, 7 Oct 2010 14:36:42 -0500
Subject: powerpc/85xx: add ngPIXIS FPGA device tree node to the P1022DS board

The device tree for Freescale's P1022DS reference board is missing the node
for the ngPIXIS FPGA.

Signed-off-by: Timur Tabi <timur@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/boot/dts/p1022ds.dts | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/p1022ds.dts b/arch/powerpc/boot/dts/p1022ds.dts
index 8bcb10b92677..2bbecbb4cbf9 100644
--- a/arch/powerpc/boot/dts/p1022ds.dts
+++ b/arch/powerpc/boot/dts/p1022ds.dts
@@ -148,6 +148,17 @@
 				label = "reserved-nand";
 			};
 		};
+
+		board-control@3,0 {
+			compatible = "fsl,p1022ds-pixis";
+			reg = <3 0 0x30>;
+			interrupt-parent = <&mpic>;
+			/*
+			 * IRQ8 is generated if the "EVENT" switch is pressed
+			 * and PX_CTL[EVESEL] is set to 00.
+			 */
+			interrupts = <8 8>;
+		};
 	};
 
 	soc@fffe00000 {
-- 
cgit v1.2.3


From 6db92cc9d07db9f713da8554b4bcdfc8e54ad386 Mon Sep 17 00:00:00 2001
From: Harninder Rai <harninder.rai@freescale.com>
Date: Wed, 13 Oct 2010 17:30:56 +0530
Subject: powerpc/85xx: add cache-sram support

It adds cache-sram support in P1/P2 QorIQ platforms as under:

* A small abstraction over powerpc's remote heap allocator
* Exports mpc85xx_cache_sram_alloc()/free() APIs
* Supports only one contiguous SRAM window
* Drivers can do the following in Kconfig to use these APIs
    "select FSL_85XX_CACHE_SRAM if MPC85xx"
* Required SRAM size and the offset where SRAM should be mapped must be
  provided at kernel command line as :
    cache-sram-size=<value>
    cache-sram-offset=<offset>

Signed-off-by: Harninder Rai <harninder.rai@freescale.com>
Signed-off-by: Vivek Mahajan <vivek.mahajan@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/fsl_85xx_cache_sram.h |  48 +++++
 arch/powerpc/sysdev/Makefile                   |   1 +
 arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h      | 101 +++++++++++
 arch/powerpc/sysdev/fsl_85xx_cache_sram.c      | 159 +++++++++++++++++
 arch/powerpc/sysdev/fsl_85xx_l2ctlr.c          | 231 +++++++++++++++++++++++++
 5 files changed, 540 insertions(+)
 create mode 100644 arch/powerpc/include/asm/fsl_85xx_cache_sram.h
 create mode 100644 arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h
 create mode 100644 arch/powerpc/sysdev/fsl_85xx_cache_sram.c
 create mode 100644 arch/powerpc/sysdev/fsl_85xx_l2ctlr.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/fsl_85xx_cache_sram.h b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
new file mode 100644
index 000000000000..2af2bdc37b2e
--- /dev/null
+++ b/arch/powerpc/include/asm/fsl_85xx_cache_sram.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * Cache SRAM handling for QorIQ platform
+ *
+ * Author: Vivek Mahajan <vivek.mahajan@freescale.com>
+
+ * This file is derived from the original work done
+ * by Sylvain Munaut for the Bestcomm SRAM allocator.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__
+#define __ASM_POWERPC_FSL_85XX_CACHE_SRAM_H__
+
+#include <asm/rheap.h>
+#include <linux/spinlock.h>
+
+/*
+ * Cache-SRAM
+ */
+
+struct mpc85xx_cache_sram {
+	phys_addr_t base_phys;
+	void *base_virt;
+	unsigned int size;
+	rh_info_t *rh;
+	spinlock_t lock;
+};
+
+extern void mpc85xx_cache_sram_free(void *ptr);
+extern void *mpc85xx_cache_sram_alloc(unsigned int size,
+				  phys_addr_t *phys, unsigned int align);
+
+#endif /* __AMS_POWERPC_FSL_85XX_CACHE_SRAM_H__ */
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index c20ad6de33ee..0bef9dacb64e 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_FSL_PMC)		+= fsl_pmc.o
 obj-$(CONFIG_FSL_LBC)		+= fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)		+= fsl_gtm.o
 obj-$(CONFIG_MPC8xxx_GPIO)	+= mpc8xxx_gpio.o
+obj-$(CONFIG_FSL_85XX_CACHE_SRAM)	+= fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
 obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_RAPIDIO)		+= fsl_rio.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h b/arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h
new file mode 100644
index 000000000000..60c9c0bd5ba2
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_ctlr.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2009-2010 Freescale Semiconductor, Inc
+ *
+ * QorIQ based Cache Controller Memory Mapped Registers
+ *
+ * Author: Vivek Mahajan <vivek.mahajan@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __FSL_85XX_CACHE_CTLR_H__
+#define __FSL_85XX_CACHE_CTLR_H__
+
+#define L2CR_L2FI		0x40000000	/* L2 flash invalidate */
+#define L2CR_L2IO		0x00200000	/* L2 instruction only */
+#define L2CR_SRAM_ZERO		0x00000000	/* L2SRAM zero size */
+#define L2CR_SRAM_FULL		0x00010000	/* L2SRAM full size */
+#define L2CR_SRAM_HALF		0x00020000	/* L2SRAM half size */
+#define L2CR_SRAM_TWO_HALFS	0x00030000	/* L2SRAM two half sizes */
+#define L2CR_SRAM_QUART		0x00040000	/* L2SRAM one quarter size */
+#define L2CR_SRAM_TWO_QUARTS	0x00050000	/* L2SRAM two quarter size */
+#define L2CR_SRAM_EIGHTH	0x00060000	/* L2SRAM one eighth size */
+#define L2CR_SRAM_TWO_EIGHTH	0x00070000	/* L2SRAM two eighth size */
+
+#define L2SRAM_OPTIMAL_SZ_SHIFT	0x00000003	/* Optimum size for L2SRAM */
+
+#define L2SRAM_BAR_MSK_LO18	0xFFFFC000	/* Lower 18 bits */
+#define L2SRAM_BARE_MSK_HI4	0x0000000F	/* Upper 4 bits */
+
+enum cache_sram_lock_ways {
+	LOCK_WAYS_ZERO,
+	LOCK_WAYS_EIGHTH,
+	LOCK_WAYS_TWO_EIGHTH,
+	LOCK_WAYS_HALF = 4,
+	LOCK_WAYS_FULL = 8,
+};
+
+struct mpc85xx_l2ctlr {
+	u32	ctl;		/* 0x000 - L2 control */
+	u8	res1[0xC];
+	u32	ewar0;		/* 0x010 - External write address 0 */
+	u32	ewarea0;	/* 0x014 - External write address extended 0 */
+	u32	ewcr0;		/* 0x018 - External write ctrl */
+	u8	res2[4];
+	u32	ewar1;		/* 0x020 - External write address 1 */
+	u32	ewarea1;	/* 0x024 - External write address extended 1 */
+	u32	ewcr1;		/* 0x028 - External write ctrl 1 */
+	u8	res3[4];
+	u32	ewar2;		/* 0x030 - External write address 2 */
+	u32	ewarea2;	/* 0x034 - External write address extended 2 */
+	u32	ewcr2;		/* 0x038 - External write ctrl 2 */
+	u8	res4[4];
+	u32	ewar3;		/* 0x040 - External write address 3 */
+	u32	ewarea3;	/* 0x044 - External write address extended 3 */
+	u32	ewcr3;		/* 0x048 - External write ctrl 3 */
+	u8	res5[0xB4];
+	u32	srbar0;		/* 0x100 - SRAM base address 0 */
+	u32	srbarea0;	/* 0x104 - SRAM base addr reg ext address 0 */
+	u32	srbar1;		/* 0x108 - SRAM base address 1 */
+	u32	srbarea1;	/* 0x10C - SRAM base addr reg ext address 1 */
+	u8	res6[0xCF0];
+	u32	errinjhi;	/* 0xE00 - Error injection mask high */
+	u32	errinjlo;	/* 0xE04 - Error injection mask low */
+	u32	errinjctl;	/* 0xE08 - Error injection tag/ecc control */
+	u8	res7[0x14];
+	u32	captdatahi;	/* 0xE20 - Error data high capture */
+	u32	captdatalo;	/* 0xE24 - Error data low capture */
+	u32	captecc;	/* 0xE28 - Error syndrome */
+	u8	res8[0x14];
+	u32	errdet;		/* 0xE40 - Error detect */
+	u32	errdis;		/* 0xE44 - Error disable */
+	u32	errinten;	/* 0xE48 - Error interrupt enable */
+	u32	errattr;	/* 0xE4c - Error attribute capture */
+	u32	erradrrl;	/* 0xE50 - Error address capture low */
+	u32	erradrrh;	/* 0xE54 - Error address capture high */
+	u32	errctl;		/* 0xE58 - Error control */
+	u8	res9[0x1A4];
+};
+
+struct sram_parameters {
+	unsigned int sram_size;
+	uint64_t sram_offset;
+};
+
+extern int instantiate_cache_sram(struct platform_device *dev,
+		struct sram_parameters sram_params);
+extern void remove_cache_sram(struct platform_device *dev);
+
+#endif /* __FSL_85XX_CACHE_CTLR_H__ */
diff --git a/arch/powerpc/sysdev/fsl_85xx_cache_sram.c b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
new file mode 100644
index 000000000000..54fb1922fe30
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_85xx_cache_sram.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2009-2010 Freescale Semiconductor, Inc.
+ *
+ * Simple memory allocator abstraction for QorIQ (P1/P2) based Cache-SRAM
+ *
+ * Author: Vivek Mahajan <vivek.mahajan@freescale.com>
+ *
+ * This file is derived from the original work done
+ * by Sylvain Munaut for the Bestcomm SRAM allocator.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/of_platform.h>
+#include <asm/pgtable.h>
+#include <asm/fsl_85xx_cache_sram.h>
+
+#include "fsl_85xx_cache_ctlr.h"
+
+struct mpc85xx_cache_sram *cache_sram;
+
+void *mpc85xx_cache_sram_alloc(unsigned int size,
+				phys_addr_t *phys, unsigned int align)
+{
+	unsigned long offset;
+	unsigned long flags;
+
+	if (unlikely(cache_sram == NULL))
+		return NULL;
+
+	if (!size || (size > cache_sram->size) || (align > cache_sram->size)) {
+		pr_err("%s(): size(=%x) or align(=%x) zero or too big\n",
+			__func__, size, align);
+		return NULL;
+	}
+
+	if ((align & (align - 1)) || align <= 1) {
+		pr_err("%s(): align(=%x) must be power of two and >1\n",
+			__func__, align);
+		return NULL;
+	}
+
+	spin_lock_irqsave(&cache_sram->lock, flags);
+	offset = rh_alloc_align(cache_sram->rh, size, align, NULL);
+	spin_unlock_irqrestore(&cache_sram->lock, flags);
+
+	if (IS_ERR_VALUE(offset))
+		return NULL;
+
+	*phys = cache_sram->base_phys + offset;
+
+	return (unsigned char *)cache_sram->base_virt + offset;
+}
+EXPORT_SYMBOL(mpc85xx_cache_sram_alloc);
+
+void mpc85xx_cache_sram_free(void *ptr)
+{
+	unsigned long flags;
+	BUG_ON(!ptr);
+
+	spin_lock_irqsave(&cache_sram->lock, flags);
+	rh_free(cache_sram->rh, ptr - cache_sram->base_virt);
+	spin_unlock_irqrestore(&cache_sram->lock, flags);
+}
+EXPORT_SYMBOL(mpc85xx_cache_sram_free);
+
+int __init instantiate_cache_sram(struct platform_device *dev,
+		struct sram_parameters sram_params)
+{
+	int ret = 0;
+
+	if (cache_sram) {
+		dev_err(&dev->dev, "Already initialized cache-sram\n");
+		return -EBUSY;
+	}
+
+	cache_sram = kzalloc(sizeof(struct mpc85xx_cache_sram), GFP_KERNEL);
+	if (!cache_sram) {
+		dev_err(&dev->dev, "Out of memory for cache_sram structure\n");
+		return -ENOMEM;
+	}
+
+	cache_sram->base_phys = sram_params.sram_offset;
+	cache_sram->size = sram_params.sram_size;
+
+	if (!request_mem_region(cache_sram->base_phys, cache_sram->size,
+						"fsl_85xx_cache_sram")) {
+		dev_err(&dev->dev, "%s: request memory failed\n",
+				dev->dev.of_node->full_name);
+		ret = -ENXIO;
+		goto out_free;
+	}
+
+	cache_sram->base_virt = ioremap_flags(cache_sram->base_phys,
+				cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
+	if (!cache_sram->base_virt) {
+		dev_err(&dev->dev, "%s: ioremap_flags failed\n",
+				dev->dev.of_node->full_name);
+		ret = -ENOMEM;
+		goto out_release;
+	}
+
+	cache_sram->rh = rh_create(sizeof(unsigned int));
+	if (IS_ERR(cache_sram->rh)) {
+		dev_err(&dev->dev, "%s: Unable to create remote heap\n",
+				dev->dev.of_node->full_name);
+		ret = PTR_ERR(cache_sram->rh);
+		goto out_unmap;
+	}
+
+	rh_attach_region(cache_sram->rh, 0, cache_sram->size);
+	spin_lock_init(&cache_sram->lock);
+
+	dev_info(&dev->dev, "[base:0x%llx, size:0x%x] configured and loaded\n",
+		(unsigned long long)cache_sram->base_phys, cache_sram->size);
+
+	return 0;
+
+out_unmap:
+	iounmap(cache_sram->base_virt);
+
+out_release:
+	release_mem_region(cache_sram->base_phys, cache_sram->size);
+
+out_free:
+	kfree(cache_sram);
+	return ret;
+}
+
+void remove_cache_sram(struct platform_device *dev)
+{
+	BUG_ON(!cache_sram);
+
+	rh_detach_region(cache_sram->rh, 0, cache_sram->size);
+	rh_destroy(cache_sram->rh);
+
+	iounmap(cache_sram->base_virt);
+	release_mem_region(cache_sram->base_phys, cache_sram->size);
+
+	kfree(cache_sram);
+	cache_sram = NULL;
+
+	dev_info(&dev->dev, "MPC85xx Cache-SRAM driver unloaded\n");
+}
diff --git a/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c
new file mode 100644
index 000000000000..cc8d6556d799
--- /dev/null
+++ b/arch/powerpc/sysdev/fsl_85xx_l2ctlr.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2009-2010 Freescale Semiconductor, Inc.
+ *
+ * QorIQ (P1/P2) L2 controller init for Cache-SRAM instantiation
+ *
+ * Author: Vivek Mahajan <vivek.mahajan@freescale.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/of_platform.h>
+#include <asm/io.h>
+
+#include "fsl_85xx_cache_ctlr.h"
+
+static char *sram_size;
+static char *sram_offset;
+struct mpc85xx_l2ctlr __iomem *l2ctlr;
+
+static long get_cache_sram_size(void)
+{
+	unsigned long val;
+
+	if (!sram_size || (strict_strtoul(sram_size, 0, &val) < 0))
+		return -EINVAL;
+
+	return val;
+}
+
+static long get_cache_sram_offset(void)
+{
+	unsigned long val;
+
+	if (!sram_offset || (strict_strtoul(sram_offset, 0, &val) < 0))
+		return -EINVAL;
+
+	return val;
+}
+
+static int __init get_size_from_cmdline(char *str)
+{
+	if (!str)
+		return 0;
+
+	sram_size = str;
+	return 1;
+}
+
+static int __init get_offset_from_cmdline(char *str)
+{
+	if (!str)
+		return 0;
+
+	sram_offset = str;
+	return 1;
+}
+
+__setup("cache-sram-size=", get_size_from_cmdline);
+__setup("cache-sram-offset=", get_offset_from_cmdline);
+
+static int __devinit mpc85xx_l2ctlr_of_probe(struct platform_device *dev,
+					  const struct of_device_id *match)
+{
+	long rval;
+	unsigned int rem;
+	unsigned char ways;
+	const unsigned int *prop;
+	unsigned int l2cache_size;
+	struct sram_parameters sram_params;
+
+	if (!dev->dev.of_node) {
+		dev_err(&dev->dev, "Device's OF-node is NULL\n");
+		return -EINVAL;
+	}
+
+	prop = of_get_property(dev->dev.of_node, "cache-size", NULL);
+	if (!prop) {
+		dev_err(&dev->dev, "Missing L2 cache-size\n");
+		return -EINVAL;
+	}
+	l2cache_size = *prop;
+
+	sram_params.sram_size  = get_cache_sram_size();
+	if (sram_params.sram_size <= 0) {
+		dev_err(&dev->dev,
+			"Entire L2 as cache, Aborting Cache-SRAM stuff\n");
+		return -EINVAL;
+	}
+
+	sram_params.sram_offset  = get_cache_sram_offset();
+	if (sram_params.sram_offset <= 0) {
+		dev_err(&dev->dev,
+			"Entire L2 as cache, provide a valid sram offset\n");
+		return -EINVAL;
+	}
+
+
+	rem = l2cache_size % sram_params.sram_size;
+	ways = LOCK_WAYS_FULL * sram_params.sram_size / l2cache_size;
+	if (rem || (ways & (ways - 1))) {
+		dev_err(&dev->dev, "Illegal cache-sram-size in command line\n");
+		return -EINVAL;
+	}
+
+	l2ctlr = of_iomap(dev->dev.of_node, 0);
+	if (!l2ctlr) {
+		dev_err(&dev->dev, "Can't map L2 controller\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Write bits[0-17] to srbar0
+	 */
+	out_be32(&l2ctlr->srbar0,
+		sram_params.sram_offset & L2SRAM_BAR_MSK_LO18);
+
+	/*
+	 * Write bits[18-21] to srbare0
+	 */
+#ifdef CONFIG_PHYS_64BIT
+	out_be32(&l2ctlr->srbarea0,
+		(sram_params.sram_offset >> 32) & L2SRAM_BARE_MSK_HI4);
+#endif
+
+	clrsetbits_be32(&l2ctlr->ctl, L2CR_L2E, L2CR_L2FI);
+
+	switch (ways) {
+	case LOCK_WAYS_EIGHTH:
+		setbits32(&l2ctlr->ctl,
+			L2CR_L2E | L2CR_L2FI | L2CR_SRAM_EIGHTH);
+		break;
+
+	case LOCK_WAYS_TWO_EIGHTH:
+		setbits32(&l2ctlr->ctl,
+			L2CR_L2E | L2CR_L2FI | L2CR_SRAM_QUART);
+		break;
+
+	case LOCK_WAYS_HALF:
+		setbits32(&l2ctlr->ctl,
+			L2CR_L2E | L2CR_L2FI | L2CR_SRAM_HALF);
+		break;
+
+	case LOCK_WAYS_FULL:
+	default:
+		setbits32(&l2ctlr->ctl,
+			L2CR_L2E | L2CR_L2FI | L2CR_SRAM_FULL);
+		break;
+	}
+	eieio();
+
+	rval = instantiate_cache_sram(dev, sram_params);
+	if (rval < 0) {
+		dev_err(&dev->dev, "Can't instantiate Cache-SRAM\n");
+		iounmap(l2ctlr);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __devexit mpc85xx_l2ctlr_of_remove(struct platform_device *dev)
+{
+	BUG_ON(!l2ctlr);
+
+	iounmap(l2ctlr);
+	remove_cache_sram(dev);
+	dev_info(&dev->dev, "MPC85xx L2 controller unloaded\n");
+
+	return 0;
+}
+
+static struct of_device_id mpc85xx_l2ctlr_of_match[] = {
+	{
+		.compatible = "fsl,p2020-l2-cache-controller",
+	},
+	{
+		.compatible = "fsl,p2010-l2-cache-controller",
+	},
+	{
+		.compatible = "fsl,p1020-l2-cache-controller",
+	},
+	{
+		.compatible = "fsl,p1011-l2-cache-controller",
+	},
+	{
+		.compatible = "fsl,p1013-l2-cache-controller",
+	},
+	{
+		.compatible = "fsl,p1022-l2-cache-controller",
+	},
+	{},
+};
+
+static struct of_platform_driver mpc85xx_l2ctlr_of_platform_driver = {
+	.driver	= {
+		.name		= "fsl-l2ctlr",
+		.owner		= THIS_MODULE,
+		.of_match_table	= mpc85xx_l2ctlr_of_match,
+	},
+	.probe		= mpc85xx_l2ctlr_of_probe,
+	.remove		= __devexit_p(mpc85xx_l2ctlr_of_remove),
+};
+
+static __init int mpc85xx_l2ctlr_of_init(void)
+{
+	return of_register_platform_driver(&mpc85xx_l2ctlr_of_platform_driver);
+}
+
+static void __exit mpc85xx_l2ctlr_of_exit(void)
+{
+	of_unregister_platform_driver(&mpc85xx_l2ctlr_of_platform_driver);
+}
+
+subsys_initcall(mpc85xx_l2ctlr_of_init);
+module_exit(mpc85xx_l2ctlr_of_exit);
+
+MODULE_DESCRIPTION("Freescale MPC85xx L2 controller init");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From 4490c06b581ad7d6392bb398960ef86dfd203a91 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 8 Oct 2010 08:32:11 -0500
Subject: powerpc/fsl-booke: Add support for FSL 64-bit e5500 core

The new e5500 core is similar to the e500mc core but adds 64-bit
support.  We support running it in 32-bit mode as it is identical to the
e500mc.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/Makefile              |  4 +++-
 arch/powerpc/kernel/cpu_setup_fsl_booke.S | 15 +++++++++++++++
 arch/powerpc/kernel/cputable.c            | 28 +++++++++++++++++++++++++++-
 arch/powerpc/kernel/traps.c               |  5 +++++
 arch/powerpc/platforms/85xx/Kconfig       |  5 ++++-
 arch/powerpc/platforms/Kconfig.cputype    |  8 +++++++-
 6 files changed, 61 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1dda70129141..4ed076a4db24 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -55,7 +55,9 @@ obj-$(CONFIG_IBMVIO)		+= vio.o
 obj-$(CONFIG_IBMEBUS)           += ibmebus.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+ifeq ($(CONFIG_PPC32),y)
 obj-$(CONFIG_E500)		+= idle_e500.o
+endif
 obj-$(CONFIG_6xx)		+= idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
 obj-$(CONFIG_TAU)		+= tau_6xx.o
 obj-$(CONFIG_HIBERNATION)	+= swsusp.o suspend.o
@@ -67,7 +69,7 @@ endif
 obj64-$(CONFIG_HIBERNATION)	+= swsusp_asm64.o
 obj-$(CONFIG_MODULES)		+= module.o module_$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_44x)		+= cpu_setup_44x.o
-obj-$(CONFIG_FSL_BOOKE)		+= cpu_setup_fsl_booke.o dbell.o
+obj-$(CONFIG_PPC_FSL_BOOK3E)	+= cpu_setup_fsl_booke.o dbell.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= dbell.o
 
 extra-y				:= head_$(CONFIG_WORD_SIZE).o
diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
index 0adb50ad8031..894e64fa481e 100644
--- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
+++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
@@ -51,6 +51,7 @@ _GLOBAL(__e500_dcache_setup)
 	isync
 	blr
 
+#ifdef CONFIG_PPC32
 _GLOBAL(__setup_cpu_e200)
 	/* enable dedicated debug exception handling resources (Debug APU) */
 	mfspr	r3,SPRN_HID0
@@ -72,3 +73,17 @@ _GLOBAL(__setup_cpu_e500mc)
 	bl	__setup_e500mc_ivors
 	mtlr	r4
 	blr
+#endif
+/* Right now, restore and setup are the same thing */
+_GLOBAL(__restore_cpu_e5500)
+_GLOBAL(__setup_cpu_e5500)
+	mflr	r4
+	bl	__e500_icache_setup
+	bl	__e500_dcache_setup
+#ifdef CONFIG_PPC_BOOK3E_64
+	bl	.__setup_base_ivors
+#else
+	bl	__setup_e500mc_ivors
+#endif
+	mtlr	r4
+	blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1f9123f412ec..cd5519133f86 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -66,6 +66,10 @@ extern void __restore_cpu_ppc970(void);
 extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec);
 extern void __restore_cpu_power7(void);
 #endif /* CONFIG_PPC64 */
+#if defined(CONFIG_E500)
+extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec);
+extern void __restore_cpu_e5500(void);
+#endif /* CONFIG_E500 */
 
 /* This table only contains "desktop" CPUs, it need to be filled with embedded
  * ones as well...
@@ -1891,7 +1895,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.platform		= "ppc5554",
 	}
 #endif /* CONFIG_E200 */
+#endif /* CONFIG_PPC32 */
 #ifdef CONFIG_E500
+#ifdef CONFIG_PPC32
 	{	/* e500 */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x80200000,
@@ -1946,6 +1952,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_e500mc,
 		.platform		= "ppce500mc",
 	},
+#endif /* CONFIG_PPC32 */
+	{	/* e5500 */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x80240000,
+		.cpu_name		= "e5500",
+		.cpu_features		= CPU_FTRS_E500MC,
+		.cpu_user_features	= COMMON_USER_BOOKE,
+		.mmu_features		= MMU_FTR_TYPE_FSL_E | MMU_FTR_BIG_PHYS |
+			MMU_FTR_USE_TLBILX,
+		.icache_bsize		= 64,
+		.dcache_bsize		= 64,
+		.num_pmcs		= 4,
+		.oprofile_cpu_type	= "ppc/e500mc",
+		.oprofile_type		= PPC_OPROFILE_FSL_EMB,
+		.cpu_setup		= __setup_cpu_e5500,
+		.cpu_restore		= __restore_cpu_e5500,
+		.machine_check		= machine_check_e500mc,
+		.platform		= "ppce5500",
+	},
+#ifdef CONFIG_PPC32
 	{	/* default match */
 		.pvr_mask		= 0x00000000,
 		.pvr_value		= 0x00000000,
@@ -1960,8 +1986,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
 		.machine_check		= machine_check_e500,
 		.platform		= "powerpc",
 	}
-#endif /* CONFIG_E500 */
 #endif /* CONFIG_PPC32 */
+#endif /* CONFIG_E500 */
 
 #ifdef CONFIG_PPC_BOOK3E_64
 	{	/* This is a default entry to get going, to be replaced by
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index a45a63c3a0c7..1b2cdc8eec90 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -538,6 +538,11 @@ int machine_check_e500(struct pt_regs *regs)
 
 	return 0;
 }
+
+int machine_check_generic(struct pt_regs *regs)
+{
+	return 0;
+}
 #elif defined(CONFIG_E200)
 int machine_check_e200(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 068a6e3b3d76..4bac9e00fccc 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -11,6 +11,8 @@ menuconfig FSL_SOC_BOOKE
 
 if FSL_SOC_BOOKE
 
+if PPC32
+
 config MPC8540_ADS
 	bool "Freescale MPC8540 ADS"
 	select DEFAULT_UIMAGE
@@ -167,7 +169,6 @@ config P3041_DS
 config P4080_DS
 	bool "Freescale P4080 DS"
 	select DEFAULT_UIMAGE
-	select PPC_FSL_BOOK3E
 	select PPC_E500MC
 	select PHYS_64BIT
 	select SWIOTLB
@@ -176,6 +177,8 @@ config P4080_DS
 	help
 	  This option enables support for the P4080 DS board
 
+endif # PPC32
+
 endif # FSL_SOC_BOOKE
 
 config TQM85xx
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index d361f8119b1e..111138c55f9c 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -125,6 +125,7 @@ config 8xx
 
 config E500
 	select FSL_EMB_PERFMON
+	select PPC_FSL_BOOK3E
 	bool
 
 config PPC_E500MC
@@ -166,9 +167,14 @@ config BOOKE
 
 config FSL_BOOKE
 	bool
-	depends on E200 || E500
+	depends on (E200 || E500) && PPC32
 	default y
 
+# this is for common code between PPC32 & PPC64 FSL BOOKE
+config PPC_FSL_BOOK3E
+	bool
+	select FSL_EMB_PERFMON
+	default y if FSL_BOOKE
 
 config PTE_64BIT
 	bool
-- 
cgit v1.2.3


From 988cf86d4f0da4150e808300c145ba87c0aad02f Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 8 Oct 2010 02:13:25 -0500
Subject: powerpc/fsl-booke: Add support for FSL Arch v1.0 MMU in
 setup_page_sizes

Update setup_page_sizes() to support for a MMU v1.0 FSL style MMU
implementation.  In such a processor, we don't have TLB0PS or EPTCFG
registers (and access to these registers may cause exceptions).  We need
to parse the older format of TLBnCFG for page size support.  Additionaly,
assume since we are an FSL implementation that we have 2 TLB arrays and
the second array contains the variable size pages.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/include/asm/mmu-book3e.h | 15 +++++++++++++
 arch/powerpc/mm/tlb_nohash.c          | 42 ++++++++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 87a1d787c5b6..8eaed81ea642 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -114,6 +114,17 @@
 
 #define MAS7_RPN		0xFFFFFFFF
 
+/* Bit definitions for MMUCFG */
+#define MMUCFG_MAVN	0x00000003	/* MMU Architecture Version Number */
+#define MMUCFG_MAVN_V1	0x00000000	/* v1.0 */
+#define MMUCFG_MAVN_V2	0x00000001	/* v2.0 */
+#define MMUCFG_NTLBS	0x0000000c	/* Number of TLBs */
+#define MMUCFG_PIDSIZE	0x000007c0	/* PID Reg Size */
+#define MMUCFG_TWC	0x00008000	/* TLB Write Conditional (v2.0) */
+#define MMUCFG_LRAT	0x00010000	/* LRAT Supported (v2.0) */
+#define MMUCFG_RASIZE	0x00fe0000	/* Real Addr Size */
+#define MMUCFG_LPIDSIZE	0x0f000000	/* LPID Reg Size */
+
 /* Bit definitions for MMUCSR0 */
 #define MMUCSR0_TLB1FI	0x00000002	/* TLB1 Flash invalidate */
 #define MMUCSR0_TLB0FI	0x00000004	/* TLB0 Flash invalidate */
@@ -133,6 +144,10 @@
 #define TLBnCFG_GTWE		0x00010000	/* Guest can write */
 #define TLBnCFG_IND		0x00020000	/* IND entries supported */
 #define TLBnCFG_PT		0x00040000	/* Can load from page table */
+#define TLBnCFG_MINSIZE		0x00f00000	/* Minimum Page Size (v1.0) */
+#define TLBnCFG_MINSIZE_SHIFT	20
+#define TLBnCFG_MAXSIZE		0x000f0000	/* Maximum Page Size (v1.0) */
+#define TLBnCFG_MAXSIZE_SHIFT	16
 #define TLBnCFG_ASSOC		0xff000000	/* Associativity */
 
 /* TLBnPS encoding */
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index fe391e942521..665189920762 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -349,11 +349,47 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
 
 static void setup_page_sizes(void)
 {
-	unsigned int tlb0cfg = mfspr(SPRN_TLB0CFG);
-	unsigned int tlb0ps = mfspr(SPRN_TLB0PS);
-	unsigned int eptcfg = mfspr(SPRN_EPTCFG);
+	unsigned int tlb0cfg;
+	unsigned int tlb0ps;
+	unsigned int eptcfg;
 	int i, psize;
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+	unsigned int mmucfg = mfspr(SPRN_MMUCFG);
+
+	if (((mmucfg & MMUCFG_MAVN) == MMUCFG_MAVN_V1) &&
+		(mmu_has_feature(MMU_FTR_TYPE_FSL_E))) {
+		unsigned int tlb1cfg = mfspr(SPRN_TLB1CFG);
+		unsigned int min_pg, max_pg;
+
+		min_pg = (tlb1cfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
+		max_pg = (tlb1cfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
+
+		for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
+			struct mmu_psize_def *def;
+			unsigned int shift;
+
+			def = &mmu_psize_defs[psize];
+			shift = def->shift;
+
+			if (shift == 0)
+				continue;
+
+			/* adjust to be in terms of 4^shift Kb */
+			shift = (shift - 10) >> 1;
+
+			if ((shift >= min_pg) && (shift <= max_pg))
+				def->flags |= MMU_PAGE_SIZE_DIRECT;
+		}
+
+		goto no_indirect;
+	}
+#endif
+
+	tlb0cfg = mfspr(SPRN_TLB0CFG);
+	tlb0ps = mfspr(SPRN_TLB0PS);
+	eptcfg = mfspr(SPRN_EPTCFG);
+
 	/* Look for supported direct sizes */
 	for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
 		struct mmu_psize_def *def = &mmu_psize_defs[psize];
-- 
cgit v1.2.3


From 55fd766b5fad8240b7a6e994b5779a46d28f73d4 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Fri, 16 Oct 2009 18:48:40 -0500
Subject: powerpc/fsl-booke64: Use TLB CAMs to cover linear mapping on FSL
 64-bit chips

On Freescale parts typically have TLB array for large mappings that we can
bolt the linear mapping into.  We utilize the code that already exists
on PPC32 on the 64-bit side to setup the linear mapping to be cover by
bolted TLB entries.  We utilize a quarter of the variable size TLB array
for this purpose.

Additionally, we limit the amount of memory to what we can cover via
bolted entries so we don't get secondary faults in the TLB miss
handlers.  We should fix this limitation in the future.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/kernel/asm-offsets.c |  4 ++--
 arch/powerpc/mm/Makefile          |  2 +-
 arch/powerpc/mm/fsl_booke_mmu.c   | 12 +++++++-----
 arch/powerpc/mm/mmu_decl.h        |  5 ++++-
 arch/powerpc/mm/tlb_nohash.c      | 14 ++++++++++++++
 arch/powerpc/mm/tlb_nohash_low.S  |  2 +-
 6 files changed, 29 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c63494090854..c3e01945ad4f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -61,7 +61,7 @@
 #endif
 #endif
 
-#if defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_PPC_FSL_BOOK3E)
 #include "../mm/mmu_decl.h"
 #endif
 
@@ -470,7 +470,7 @@ int main(void)
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
 #endif
-#ifdef CONFIG_FSL_BOOKE
+#ifdef CONFIG_PPC_FSL_BOOK3E
 	DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam));
 	DEFINE(TLBCAM_MAS0, offsetof(struct tlbcam, MAS0));
 	DEFINE(TLBCAM_MAS1, offsetof(struct tlbcam, MAS1));
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 53102f306880..bdca46e08382 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -23,7 +23,7 @@ obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
 				   mmu_context_hash$(CONFIG_WORD_SIZE).o
 obj-$(CONFIG_40x)		+= 40x_mmu.o
 obj-$(CONFIG_44x)		+= 44x_mmu.o
-obj-$(CONFIG_FSL_BOOKE)		+= fsl_booke_mmu.o
+obj-$(CONFIG_PPC_FSL_BOOK3E)	+= fsl_booke_mmu.o
 obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
 obj-$(CONFIG_PPC_MM_SLICES)	+= slice.o
 ifeq ($(CONFIG_HUGETLB_PAGE),y)
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c
index 1b4354db51bb..67bc8a7c7e0b 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/fsl_booke_mmu.c
@@ -56,11 +56,6 @@
 
 unsigned int tlbcam_index;
 
-
-#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
-#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
-#endif
-
 #define NUM_TLBCAMS	(64)
 struct tlbcam TLBCAM[NUM_TLBCAMS];
 
@@ -185,6 +180,12 @@ unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx)
 	return amount_mapped;
 }
 
+#ifdef CONFIG_PPC32
+
+#if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS)
+#error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS"
+#endif
+
 unsigned long __init mmu_mapin_ram(unsigned long top)
 {
 	return tlbcam_addrs[tlbcam_index - 1].limit - PAGE_OFFSET + 1;
@@ -216,3 +217,4 @@ void __init adjust_total_lowmem(void)
 
 	__initial_memory_limit_addr = memstart_addr + __max_low_memory;
 }
+#endif
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 63b84a0d3b10..dd0a2589591d 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -140,10 +140,13 @@ extern void wii_memory_fixups(void);
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(unsigned long top);
 
-#elif defined(CONFIG_FSL_BOOKE)
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx);
+#ifdef CONFIG_PPC32
 extern void MMU_init_hw(void);
 extern unsigned long mmu_mapin_ram(unsigned long top);
 extern void adjust_total_lowmem(void);
+#endif
 extern void loadcam_entry(unsigned int index);
 
 struct tlbcam {
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 665189920762..61fe32a256da 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -541,6 +541,20 @@ static void __early_init_mmu(int boot_cpu)
 	 */
 	linear_map_top = memblock_end_of_DRAM();
 
+#ifdef CONFIG_PPC_FSL_BOOK3E
+	if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+		unsigned int num_cams;
+
+		/* use a quarter of the TLBCAM for bolted linear map */
+		num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
+		linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
+
+		/* limit memory so we dont have linear faults */
+		memblock_enforce_memory_limit(linear_map_top);
+		memblock_analyze();
+	}
+#endif
+
 	/* A sync won't hurt us after mucking around with
 	 * the MMU configuration
 	 */
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S
index b9d9fed8f36e..af405eefe48d 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/tlb_nohash_low.S
@@ -367,7 +367,7 @@ _GLOBAL(set_context)
 #error Unsupported processor type !
 #endif
 
-#if defined(CONFIG_FSL_BOOKE)
+#if defined(CONFIG_PPC_FSL_BOOK3E)
 /*
  * extern void loadcam_entry(unsigned int index)
  *
-- 
cgit v1.2.3


From 95400415c7b3f3dd43034c6c860897ac397ebe1b Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 7 Oct 2010 14:05:47 -0500
Subject: powerpc/fsl-booke: Add p5020 DS board support

The P5020DS is in the same family of boards as the P4080 DS and thus
shares the corenet_ds code.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/platforms/85xx/Kconfig    | 12 ++++++
 arch/powerpc/platforms/85xx/Makefile   |  1 +
 arch/powerpc/platforms/85xx/p5020_ds.c | 69 ++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 arch/powerpc/platforms/85xx/p5020_ds.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index 4bac9e00fccc..b6976e1726e4 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -179,6 +179,18 @@ config P4080_DS
 
 endif # PPC32
 
+config P5020_DS
+	bool "Freescale P5020 DS"
+	select DEFAULT_UIMAGE
+	select E500
+	select PPC_E500MC
+	select PHYS_64BIT
+	select SWIOTLB
+	select MPC8xxx_GPIO
+	select HAS_RAPIDIO
+	help
+	  This option enables support for the P5020 DS board
+
 endif # FSL_SOC_BOOKE
 
 config TQM85xx
diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
index c3ac07189284..dd70db77d63e 100644
--- a/arch/powerpc/platforms/85xx/Makefile
+++ b/arch/powerpc/platforms/85xx/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_MPC85xx_RDB) += mpc85xx_rdb.o
 obj-$(CONFIG_P1022_DS)    += p1022_ds.o
 obj-$(CONFIG_P3041_DS)    += p3041_ds.o corenet_ds.o
 obj-$(CONFIG_P4080_DS)    += p4080_ds.o corenet_ds.o
+obj-$(CONFIG_P5020_DS)    += p5020_ds.o corenet_ds.o
 obj-$(CONFIG_STX_GP3)	  += stx_gp3.o
 obj-$(CONFIG_TQM85xx)	  += tqm85xx.o
 obj-$(CONFIG_SBC8560)     += sbc8560.o
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
new file mode 100644
index 000000000000..7467b712ee00
--- /dev/null
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -0,0 +1,69 @@
+/*
+ * P5020 DS Setup
+ *
+ * Maintained by Kumar Gala (see MAINTAINERS for contact information)
+ *
+ * Copyright 2009-2010 Freescale Semiconductor Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kdev_t.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/phy.h>
+
+#include <asm/system.h>
+#include <asm/time.h>
+#include <asm/machdep.h>
+#include <asm/pci-bridge.h>
+#include <mm/mmu_decl.h>
+#include <asm/prom.h>
+#include <asm/udbg.h>
+#include <asm/mpic.h>
+
+#include <linux/of_platform.h>
+#include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
+
+#include "corenet_ds.h"
+
+/*
+ * Called very early, device-tree isn't unflattened
+ */
+static int __init p5020_ds_probe(void)
+{
+	unsigned long root = of_get_flat_dt_root();
+
+	return of_flat_dt_is_compatible(root, "fsl,P5020DS");
+}
+
+define_machine(p5020_ds) {
+	.name			= "P5020 DS",
+	.probe			= p5020_ds_probe,
+	.setup_arch		= corenet_ds_setup_arch,
+	.init_IRQ		= corenet_ds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
+/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
+#ifdef CONFIG_PPC64
+	.get_irq		= mpic_get_irq,
+#else
+	.get_irq		= mpic_get_coreint_irq,
+#endif
+	.restart		= fsl_rstcr_restart,
+	.calibrate_decr		= generic_calibrate_decr,
+	.progress		= udbg_progress,
+};
+
+machine_device_initcall(p5020_ds, corenet_ds_publish_devices);
+
+#ifdef CONFIG_SWIOTLB
+machine_arch_initcall(p5020_ds, swiotlb_setup_bus_notifier);
+#endif
-- 
cgit v1.2.3


From 6249a26a4cfe945c0840f222e3669deb9bd41425 Mon Sep 17 00:00:00 2001
From: Kumar Gala <galak@kernel.crashing.org>
Date: Thu, 3 Jun 2010 03:23:21 -0500
Subject: powerpc/fsl-booke: Add e55xx (64-bit) smp defconfig

The p5020 SoC from Freescale is the first 64-bit Book-E processor and
utilizes the two e5500 cores.  Adding a defconfig that enables basic kernel
for e5500 based processors.

Also added the p5020 / e5500 support to the ppc64e defconfig.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/configs/e55xx_smp_defconfig | 84 ++++++++++++++++++++++++++++++++
 arch/powerpc/configs/ppc64e_defconfig    |  4 +-
 2 files changed, 85 insertions(+), 3 deletions(-)
 create mode 100644 arch/powerpc/configs/e55xx_smp_defconfig

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/configs/e55xx_smp_defconfig b/arch/powerpc/configs/e55xx_smp_defconfig
new file mode 100644
index 000000000000..94d120ef99cf
--- /dev/null
+++ b/arch/powerpc/configs/e55xx_smp_defconfig
@@ -0,0 +1,84 @@
+CONFIG_PPC64=y
+CONFIG_PPC_BOOK3E_64=y
+# CONFIG_VIRT_CPU_ACCOUNTING is not set
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_EXPERIMENTAL=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_SYSFS_DEPRECATED_V2=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_EMBEDDED=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KALLSYMS_EXTRA_PASS=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULE_FORCE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_P5020_DS=y
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BINFMT_MISC=m
+CONFIG_SPARSE_IRQ=y
+# CONFIG_PCI is not set
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+CONFIG_PROC_DEVICETREE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=131072
+CONFIG_EEPROM_LEGACY=y
+CONFIG_INPUT_FF_MEMLESS=m
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_I2C=y
+# CONFIG_HWMON is not set
+CONFIG_VIDEO_OUTPUT_CONTROL=y
+# CONFIG_HID_SUPPORT is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_DMADEVICES=y
+CONFIG_FSL_DMA=y
+CONFIG_EXT2_FS=y
+CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
+CONFIG_PROC_KCORE=y
+CONFIG_TMPFS=y
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_MAC_PARTITION=y
+CONFIG_NLS=y
+CONFIG_NLS_UTF8=m
+CONFIG_CRC_T10DIF=y
+CONFIG_CRC_ITU_T=m
+CONFIG_LIBCRC32C=m
+CONFIG_FRAME_WARN=1024
+CONFIG_DEBUG_FS=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DETECT_HUNG_TASK=y
+# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_DEBUG_INFO=y
+# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_VIRQ_DEBUG=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_DES=y
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRYPTO_DEV_TALITOS=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 04ae0740b6d0..7bd1763877ba 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -18,6 +18,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_P5020_DS=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_GOV_POWERSAVE=y
 CONFIG_CPU_FREQ_GOV_USERSPACE=y
@@ -256,7 +257,6 @@ CONFIG_HID_ZEROPLUS=y
 CONFIG_USB=y
 CONFIG_USB_DEVICEFS=y
 CONFIG_USB_EHCI_HCD=y
-CONFIG_USB_EHCI_TT_NEWSCHED=y
 # CONFIG_USB_EHCI_HCD_PPC_OF is not set
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_STORAGE=m
@@ -290,7 +290,6 @@ CONFIG_JFS_POSIX_ACL=y
 CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_POSIX_ACL=y
-CONFIG_INOTIFY=y
 CONFIG_AUTOFS4_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_UDF_FS=m
@@ -384,7 +383,6 @@ CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES=m
 CONFIG_CRYPTO_ANUBIS=m
-CONFIG_CRYPTO_ARC4=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_KHAZAD=m
-- 
cgit v1.2.3


From abd12fe4d1249f6c2c4b34d5ced82f179e6b5d30 Mon Sep 17 00:00:00 2001
From: Shaohui Xie <b21989@freescale.com>
Date: Thu, 14 Oct 2010 10:04:02 +0800
Subject: fsl_rio: Add comments for sRIO registers.

Add some comments to make sRIO registers map better readable.

Signed-off-by: Shaohui Xie <b21989@freescale.com>
Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---
 arch/powerpc/sysdev/fsl_rio.c | 65 ++++++++++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 25 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c
index 3017532319c8..412763672d23 100644
--- a/arch/powerpc/sysdev/fsl_rio.c
+++ b/arch/powerpc/sysdev/fsl_rio.c
@@ -117,44 +117,59 @@ struct rio_atmu_regs {
 };
 
 struct rio_msg_regs {
-	u32 omr;
-	u32 osr;
+	u32 omr;	/* 0xD_3000 - Outbound message 0 mode register */
+	u32 osr;	/* 0xD_3004 - Outbound message 0 status register */
 	u32 pad1;
-	u32 odqdpar;
+	u32 odqdpar;	/* 0xD_300C - Outbound message 0 descriptor queue
+			   dequeue pointer address register */
 	u32 pad2;
-	u32 osar;
-	u32 odpr;
-	u32 odatr;
-	u32 odcr;
+	u32 osar;	/* 0xD_3014 - Outbound message 0 source address
+			   register */
+	u32 odpr;	/* 0xD_3018 - Outbound message 0 destination port
+			   register */
+	u32 odatr;	/* 0xD_301C - Outbound message 0 destination attributes
+			   Register*/
+	u32 odcr;	/* 0xD_3020 - Outbound message 0 double-word count
+			   register */
 	u32 pad3;
-	u32 odqepar;
+	u32 odqepar;	/* 0xD_3028 - Outbound message 0 descriptor queue
+			   enqueue pointer address register */
 	u32 pad4[13];
-	u32 imr;
-	u32 isr;
+	u32 imr;	/* 0xD_3060 - Inbound message 0 mode register */
+	u32 isr;	/* 0xD_3064 - Inbound message 0 status register */
 	u32 pad5;
-	u32 ifqdpar;
+	u32 ifqdpar;	/* 0xD_306C - Inbound message 0 frame queue dequeue
+			   pointer address register*/
 	u32 pad6;
-	u32 ifqepar;
+	u32 ifqepar;	/* 0xD_3074 - Inbound message 0 frame queue enqueue
+			   pointer address register */
 	u32 pad7[226];
-	u32 odmr;
-	u32 odsr;
+	u32 odmr;	/* 0xD_3400 - Outbound doorbell mode register */
+	u32 odsr;	/* 0xD_3404 - Outbound doorbell status register */
 	u32 res0[4];
-	u32 oddpr;
-	u32 oddatr;
+	u32 oddpr;	/* 0xD_3418 - Outbound doorbell destination port
+			   register */
+	u32 oddatr;	/* 0xD_341c - Outbound doorbell destination attributes
+			   register */
 	u32 res1[3];
-	u32 odretcr;
+	u32 odretcr;	/* 0xD_342C - Outbound doorbell retry error threshold
+			   configuration register */
 	u32 res2[12];
-	u32 dmr;
-	u32 dsr;
+	u32 dmr;	/* 0xD_3460 - Inbound doorbell mode register */
+	u32 dsr;	/* 0xD_3464 - Inbound doorbell status register */
 	u32 pad8;
-	u32 dqdpar;
+	u32 dqdpar;	/* 0xD_346C - Inbound doorbell queue dequeue Pointer
+			   address register */
 	u32 pad9;
-	u32 dqepar;
+	u32 dqepar;	/* 0xD_3474 - Inbound doorbell Queue enqueue pointer
+			   address register */
 	u32 pad10[26];
-	u32 pwmr;
-	u32 pwsr;
-	u32 epwqbar;
-	u32 pwqbar;
+	u32 pwmr;	/* 0xD_34E0 - Inbound port-write mode register */
+	u32 pwsr;	/* 0xD_34E4 - Inbound port-write status register */
+	u32 epwqbar;	/* 0xD_34E8 - Extended Port-Write Queue Base Address
+			   register */
+	u32 pwqbar;	/* 0xD_34EC - Inbound port-write queue base address
+			   register */
 };
 
 struct rio_tx_desc {
-- 
cgit v1.2.3


From 2989b722c9c4694cf6c5aa378f9c8a14e106d320 Mon Sep 17 00:00:00 2001
From: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Date: Tue, 28 Sep 2010 09:03:49 -0400
Subject: powerpc/44x: Update ppc44x_defconfig

Make sure the new bluestone board is selected for the multiplatform defconfig.
Also build logfs and squashfs as modules.

Signed-off-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
---
 arch/powerpc/configs/ppc44x_defconfig | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index cd446fba3fae..2fa05f7be4cb 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -12,6 +12,7 @@ CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_BAMBOO=y
+CONFIG_BLUESTONE=y
 CONFIG_SAM440EP=y
 CONFIG_SEQUOIA=y
 CONFIG_TAISHAN=y
@@ -97,14 +98,17 @@ CONFIG_USB_STORAGE=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=m
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_INOTIFY=y
 CONFIG_VFAT_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
 CONFIG_UBIFS_FS=m
 CONFIG_UBIFS_FS_XATTR=y
+CONFIG_LOGFS=m
 CONFIG_CRAMFS=y
+CONFIG_SQUASHFS=m
+CONFIG_SQUASHFS_XATTR=y
+CONFIG_SQUASHFS_LZO=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
 CONFIG_ROOT_NFS=y
@@ -116,11 +120,8 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_RCU_CPU_STALL_DETECTOR is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
-CONFIG_CRYPTO_CBC=y
 CONFIG_CRYPTO_ECB=y
 CONFIG_CRYPTO_PCBC=y
-CONFIG_CRYPTO_MD5=y
-CONFIG_CRYPTO_DES=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
 CONFIG_VIRTUALIZATION=y
-- 
cgit v1.2.3


From 6038f373a3dc1f1c26496e60b6c40b164716f07e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 15 Aug 2010 18:52:59 +0200
Subject: llseek: automatically add .llseek fop

All file_operations should get a .llseek operation so we can make
nonseekable_open the default for future file operations without a
.llseek pointer.

The three cases that we can automatically detect are no_llseek, seq_lseek
and default_llseek. For cases where we can we can automatically prove that
the file offset is always ignored, we use noop_llseek, which maintains
the current behavior of not returning an error from a seek.

New drivers should normally not use noop_llseek but instead use no_llseek
and call nonseekable_open at open time.  Existing drivers can be converted
to do the same when the maintainer knows for certain that no user code
relies on calling seek on the device file.

The generated code is often incorrectly indented and right now contains
comments that clarify for each added line why a specific variant was
chosen. In the version that gets submitted upstream, the comments will
be gone and I will manually fix the indentation, because there does not
seem to be a way to do that using coccinelle.

Some amount of new code is currently sitting in linux-next that should get
the same modifications, which I will do at the end of the merge window.

Many thanks to Julia Lawall for helping me learn to write a semantic
patch that does all this.

===== begin semantic patch =====
// This adds an llseek= method to all file operations,
// as a preparation for making no_llseek the default.
//
// The rules are
// - use no_llseek explicitly if we do nonseekable_open
// - use seq_lseek for sequential files
// - use default_llseek if we know we access f_pos
// - use noop_llseek if we know we don't access f_pos,
//   but we still want to allow users to call lseek
//
@ open1 exists @
identifier nested_open;
@@
nested_open(...)
{
<+...
nonseekable_open(...)
...+>
}

@ open exists@
identifier open_f;
identifier i, f;
identifier open1.nested_open;
@@
int open_f(struct inode *i, struct file *f)
{
<+...
(
nonseekable_open(...)
|
nested_open(...)
)
...+>
}

@ read disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
<+...
(
   *off = E
|
   *off += E
|
   func(..., off, ...)
|
   E = *off
)
...+>
}

@ read_no_fpos disable optional_qualifier exists @
identifier read_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t read_f(struct file *f, char *p, size_t s, loff_t *off)
{
... when != off
}

@ write @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
expression E;
identifier func;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
<+...
(
  *off = E
|
  *off += E
|
  func(..., off, ...)
|
  E = *off
)
...+>
}

@ write_no_fpos @
identifier write_f;
identifier f, p, s, off;
type ssize_t, size_t, loff_t;
@@
ssize_t write_f(struct file *f, const char *p, size_t s, loff_t *off)
{
... when != off
}

@ fops0 @
identifier fops;
@@
struct file_operations fops = {
 ...
};

@ has_llseek depends on fops0 @
identifier fops0.fops;
identifier llseek_f;
@@
struct file_operations fops = {
...
 .llseek = llseek_f,
...
};

@ has_read depends on fops0 @
identifier fops0.fops;
identifier read_f;
@@
struct file_operations fops = {
...
 .read = read_f,
...
};

@ has_write depends on fops0 @
identifier fops0.fops;
identifier write_f;
@@
struct file_operations fops = {
...
 .write = write_f,
...
};

@ has_open depends on fops0 @
identifier fops0.fops;
identifier open_f;
@@
struct file_operations fops = {
...
 .open = open_f,
...
};

// use no_llseek if we call nonseekable_open
////////////////////////////////////////////
@ nonseekable1 depends on !has_llseek && has_open @
identifier fops0.fops;
identifier nso ~= "nonseekable_open";
@@
struct file_operations fops = {
...  .open = nso, ...
+.llseek = no_llseek, /* nonseekable */
};

@ nonseekable2 depends on !has_llseek @
identifier fops0.fops;
identifier open.open_f;
@@
struct file_operations fops = {
...  .open = open_f, ...
+.llseek = no_llseek, /* open uses nonseekable */
};

// use seq_lseek for sequential files
/////////////////////////////////////
@ seq depends on !has_llseek @
identifier fops0.fops;
identifier sr ~= "seq_read";
@@
struct file_operations fops = {
...  .read = sr, ...
+.llseek = seq_lseek, /* we have seq_read */
};

// use default_llseek if there is a readdir
///////////////////////////////////////////
@ fops1 depends on !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier readdir_e;
@@
// any other fop is used that changes pos
struct file_operations fops = {
... .readdir = readdir_e, ...
+.llseek = default_llseek, /* readdir is present */
};

// use default_llseek if at least one of read/write touches f_pos
/////////////////////////////////////////////////////////////////
@ fops2 depends on !fops1 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read.read_f;
@@
// read fops use offset
struct file_operations fops = {
... .read = read_f, ...
+.llseek = default_llseek, /* read accesses f_pos */
};

@ fops3 depends on !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write.write_f;
@@
// write fops use offset
struct file_operations fops = {
... .write = write_f, ...
+	.llseek = default_llseek, /* write accesses f_pos */
};

// Use noop_llseek if neither read nor write accesses f_pos
///////////////////////////////////////////////////////////

@ fops4 depends on !fops1 && !fops2 && !fops3 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
identifier write_no_fpos.write_f;
@@
// write fops use offset
struct file_operations fops = {
...
 .write = write_f,
 .read = read_f,
...
+.llseek = noop_llseek, /* read and write both use no f_pos */
};

@ depends on has_write && !has_read && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier write_no_fpos.write_f;
@@
struct file_operations fops = {
... .write = write_f, ...
+.llseek = noop_llseek, /* write uses no f_pos */
};

@ depends on has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
identifier read_no_fpos.read_f;
@@
struct file_operations fops = {
... .read = read_f, ...
+.llseek = noop_llseek, /* read uses no f_pos */
};

@ depends on !has_read && !has_write && !fops1 && !fops2 && !has_llseek && !nonseekable1 && !nonseekable2 && !seq @
identifier fops0.fops;
@@
struct file_operations fops = {
...
+.llseek = noop_llseek, /* no read or write fn */
};
===== End semantic patch =====

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Julia Lawall <julia@diku.dk>
Cc: Christoph Hellwig <hch@infradead.org>
---
 arch/powerpc/kernel/lparcfg.c             | 1 +
 arch/powerpc/kernel/rtas_flash.c          | 3 +++
 arch/powerpc/kernel/rtasd.c               | 1 +
 arch/powerpc/platforms/iseries/mf.c       | 1 +
 arch/powerpc/platforms/pseries/reconfig.c | 3 ++-
 arch/powerpc/platforms/pseries/scanlog.c  | 1 +
 6 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c
index 50362b6ef6e9..b1dd962e247e 100644
--- a/arch/powerpc/kernel/lparcfg.c
+++ b/arch/powerpc/kernel/lparcfg.c
@@ -780,6 +780,7 @@ static const struct file_operations lparcfg_fops = {
 	.write		= lparcfg_write,
 	.open		= lparcfg_open,
 	.release	= single_release,
+	.llseek		= seq_lseek,
 };
 
 static int __init lparcfg_init(void)
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 67a84d8f118d..2b442e6c21e6 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -716,6 +716,7 @@ static const struct file_operations rtas_flash_operations = {
 	.write		= rtas_flash_write,
 	.open		= rtas_excl_open,
 	.release	= rtas_flash_release,
+	.llseek		= default_llseek,
 };
 
 static const struct file_operations manage_flash_operations = {
@@ -724,6 +725,7 @@ static const struct file_operations manage_flash_operations = {
 	.write		= manage_flash_write,
 	.open		= rtas_excl_open,
 	.release	= rtas_excl_release,
+	.llseek		= default_llseek,
 };
 
 static const struct file_operations validate_flash_operations = {
@@ -732,6 +734,7 @@ static const struct file_operations validate_flash_operations = {
 	.write		= validate_flash_write,
 	.open		= rtas_excl_open,
 	.release	= validate_flash_release,
+	.llseek		= default_llseek,
 };
 
 static int __init rtas_flash_init(void)
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 638883e23e3a..0438f819fe6b 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -354,6 +354,7 @@ static const struct file_operations proc_rtas_log_operations = {
 	.poll =		rtas_log_poll,
 	.open =		rtas_log_open,
 	.release =	rtas_log_release,
+	.llseek =	noop_llseek,
 };
 
 static int enable_surveillance(int timeout)
diff --git a/arch/powerpc/platforms/iseries/mf.c b/arch/powerpc/platforms/iseries/mf.c
index 33e5fc7334fc..42d0a886de05 100644
--- a/arch/powerpc/platforms/iseries/mf.c
+++ b/arch/powerpc/platforms/iseries/mf.c
@@ -1249,6 +1249,7 @@ out:
 
 static const struct file_operations proc_vmlinux_operations = {
 	.write		= proc_mf_change_vmlinux,
+	.llseek		= default_llseek,
 };
 
 static int __init mf_proc_init(void)
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 57ddbb43b33a..1de2cbb92303 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -539,7 +539,8 @@ out:
 }
 
 static const struct file_operations ofdt_fops = {
-	.write = ofdt_write
+	.write = ofdt_write,
+	.llseek = noop_llseek,
 };
 
 /* create /proc/powerpc/ofdt write-only by root */
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index 80e9e7652a4d..554457294a2b 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -170,6 +170,7 @@ const struct file_operations scanlog_fops = {
 	.write		= scanlog_write,
 	.open		= scanlog_open,
 	.release	= scanlog_release,
+	.llseek		= noop_llseek,
 };
 
 static int __init scanlog_init(void)
-- 
cgit v1.2.3


From 50a23e6eec6f20d55a3a920e47adb455bff6046e Mon Sep 17 00:00:00 2001
From: "Justin P. Mattock" <justinmattock@gmail.com>
Date: Sat, 16 Oct 2010 10:36:23 -0700
Subject: Update broken web addresses in arch directory.

The patch below updates broken web addresses in the arch directory.

Signed-off-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Finn Thain <fthain@telegraphics.com.au>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Reviewed-by: Finn Thain <fthain@telegraphics.com.au>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/powerpc/include/asm/hydra.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
index 1ad4eed07fbe..5b0c98bd46ab 100644
--- a/arch/powerpc/include/asm/hydra.h
+++ b/arch/powerpc/include/asm/hydra.h
@@ -10,7 +10,7 @@
  *
  *	© Copyright 1995 Apple Computer, Inc. All rights reserved.
  *
- *  It's available online from http://chrp.apple.com/MacTech.pdf.
+ *  It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf
  *  You can obtain paper copies of this book from computer bookstores or by
  *  writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San
  *  Francisco, CA 94104. Reference ISBN 1-55860-393-X.
-- 
cgit v1.2.3


From e360adbe29241a0194e10e20595360dd7b98a2b3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 14 Oct 2010 14:01:34 +0800
Subject: irq_work: Add generic hardirq context callbacks

Provide a mechanism that allows running code in IRQ context. It is
most useful for NMI code that needs to interact with the rest of the
system -- like wakeup a task to drain buffers.

Perf currently has such a mechanism, so extract that and provide it as
a generic feature, independent of perf so that others may also
benefit.

The IRQ context callback is generated through self-IPIs where
possible, or on architectures like powerpc the decrementer (the
built-in timer facility) is set to generate an interrupt immediately.

Architectures that don't have anything like this get to do with a
callback from the timer tick. These architectures can call
irq_work_run() at the tail of any IRQ handlers that might enqueue such
work (like the perf IRQ handler) to avoid undue latencies in
processing the work.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
[ various fixes ]
Signed-off-by: Huang Ying <ying.huang@intel.com>
LKML-Reference: <1287036094.7768.291.camel@yhuang-dev>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/Kconfig            |  1 +
 arch/powerpc/include/asm/paca.h |  2 +-
 arch/powerpc/kernel/time.c      | 42 ++++++++++++++++++++---------------------
 3 files changed, 23 insertions(+), 22 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 631e5a0fb6ab..4b1e521d966f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
 	select HAVE_OPROFILE
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
+	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1ff6662f7faf..9b287fdd8ea3 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -129,7 +129,7 @@ struct paca_struct {
 	u8 soft_enabled;		/* irq soft-enable flag */
 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
 	u8 io_sync;			/* writel() needs spin_unlock sync */
-	u8 perf_event_pending;		/* PM interrupt while soft-disabled */
+	u8 irq_work_pending;		/* IRQ_WORK interrupt while soft-disable */
 
 	/* Stuff for accurate time accounting */
 	u64 user_time;			/* accumulated usermode TB ticks */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 8533b3b83f5d..54888eb10c3b 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -53,7 +53,7 @@
 #include <linux/posix-timers.h>
 #include <linux/irq.h>
 #include <linux/delay.h>
-#include <linux/perf_event.h>
+#include <linux/irq_work.h>
 #include <asm/trace.h>
 
 #include <asm/io.h>
@@ -493,60 +493,60 @@ void __init iSeries_time_init_early(void)
 }
 #endif /* CONFIG_PPC_ISERIES */
 
-#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_IRQ_WORK
 
 /*
  * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
  */
 #ifdef CONFIG_PPC64
-static inline unsigned long test_perf_event_pending(void)
+static inline unsigned long test_irq_work_pending(void)
 {
 	unsigned long x;
 
 	asm volatile("lbz %0,%1(13)"
 		: "=r" (x)
-		: "i" (offsetof(struct paca_struct, perf_event_pending)));
+		: "i" (offsetof(struct paca_struct, irq_work_pending)));
 	return x;
 }
 
-static inline void set_perf_event_pending_flag(void)
+static inline void set_irq_work_pending_flag(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (1),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
-static inline void clear_perf_event_pending(void)
+static inline void clear_irq_work_pending(void)
 {
 	asm volatile("stb %0,%1(13)" : :
 		"r" (0),
-		"i" (offsetof(struct paca_struct, perf_event_pending)));
+		"i" (offsetof(struct paca_struct, irq_work_pending)));
 }
 
 #else /* 32-bit */
 
-DEFINE_PER_CPU(u8, perf_event_pending);
+DEFINE_PER_CPU(u8, irq_work_pending);
 
-#define set_perf_event_pending_flag()	__get_cpu_var(perf_event_pending) = 1
-#define test_perf_event_pending()	__get_cpu_var(perf_event_pending)
-#define clear_perf_event_pending()	__get_cpu_var(perf_event_pending) = 0
+#define set_irq_work_pending_flag()	__get_cpu_var(irq_work_pending) = 1
+#define test_irq_work_pending()		__get_cpu_var(irq_work_pending)
+#define clear_irq_work_pending()	__get_cpu_var(irq_work_pending) = 0
 
 #endif /* 32 vs 64 bit */
 
-void set_perf_event_pending(void)
+void set_irq_work_pending(void)
 {
 	preempt_disable();
-	set_perf_event_pending_flag();
+	set_irq_work_pending_flag();
 	set_dec(1);
 	preempt_enable();
 }
 
-#else  /* CONFIG_PERF_EVENTS */
+#else  /* CONFIG_IRQ_WORK */
 
-#define test_perf_event_pending()	0
-#define clear_perf_event_pending()
+#define test_irq_work_pending()	0
+#define clear_irq_work_pending()
 
-#endif /* CONFIG_PERF_EVENTS */
+#endif /* CONFIG_IRQ_WORK */
 
 /*
  * For iSeries shared processors, we have to let the hypervisor
@@ -587,9 +587,9 @@ void timer_interrupt(struct pt_regs * regs)
 
 	calculate_steal_time();
 
-	if (test_perf_event_pending()) {
-		clear_perf_event_pending();
-		perf_event_do_pending();
+	if (test_irq_work_pending()) {
+		clear_irq_work_pending();
+		irq_work_run();
 	}
 
 #ifdef CONFIG_PPC_ISERIES
-- 
cgit v1.2.3


From e1e10a265d28273ab8c70be19d43dcbdeead6c5a Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venki@google.com>
Date: Mon, 4 Oct 2010 17:03:17 -0700
Subject: sched: Consolidate account_system_vtime extern declaration

Just a minor cleanup patch that makes things easier to the following patches.
No functionality change in this patch.

Signed-off-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1286237003-12406-3-git-send-email-venki@google.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/include/asm/system.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/system.h b/arch/powerpc/include/asm/system.h
index 6c294acac848..9c3d160670b4 100644
--- a/arch/powerpc/include/asm/system.h
+++ b/arch/powerpc/include/asm/system.h
@@ -542,10 +542,6 @@ extern void reloc_got2(unsigned long);
 
 #define PTRRELOC(x)	((typeof(x)) add_reloc_offset((unsigned long)(x)))
 
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-extern void account_system_vtime(struct task_struct *);
-#endif
-
 extern struct dentry *powerpc_debugfs_root;
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 57fa7214330be2e292ddb1402834ff0b221ef29a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 19 Oct 2010 16:55:35 +1100
Subject: perf, powerpc: Fix power_pmu_event_init to not use event->ctx

Commit c3f00c70 ("perf: Separate find_get_context() from event
initialization") changed the generic perf_event code to call
perf_event_alloc, which calls the arch-specific event_init code,
before looking up the context for the new event.  Unfortunately,
power_pmu_event_init uses event->ctx->task to see whether the
new event is a per-task event or a system-wide event, and thus
crashes since event->ctx is NULL at the point where
power_pmu_event_init gets called.

(The reason it needs to know whether it is a per-task event is
because there are some hardware events on Power systems which
only count when the processor is not idle, and there are some
fixed-function counters which count such events.  For example,
the "run cycles" event counts cycles when the processor is not
idle.  If the user asks to count cycles, we can use "run cycles"
if this is a per-task event, since the processor is running when
the task is running, by definition.  We can't use "run cycles"
if the user asks for "cycles" on a system-wide counter.)

Fortunately the information we need is in the
event->attach_state field, so we just use that instead.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <20101019055535.GA10398@drongo>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Reported-by: Alexey Kardashevskiy <aik@au1.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/powerpc/kernel/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 9cb4924b6c07..3129c855933c 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1092,7 +1092,7 @@ static int power_pmu_event_init(struct perf_event *event)
 	 * XXX we should check if the task is an idle task.
 	 */
 	flags = 0;
-	if (event->ctx->task)
+	if (event->attach_state & PERF_ATTACH_TASK)
 		flags |= PPMU_ONLY_COUNT_RUN;
 
 	/*
-- 
cgit v1.2.3


From 7096d0422153ffcc2264eef652fc3a7bca3e6d3c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 20 Oct 2010 11:45:13 -0600
Subject: of/device: Rework to use common platform_device_alloc() for
 allocating devices

The current code allocates and manages platform_devices created from
the device tree manually.  It also uses an unsafe shortcut for
allocating the platform_device and the resource table at the same
time. (which I added in the last rework; sorry).

This patch refactors the code to use platform_device_alloc() for
allocating new devices.  This reduces the amount of custom code
implemented by of_platform, eliminates the unsafe alloc trick, and has
the side benefit of letting the platform_bus code manage freeing the
device data and resources when the device is freed.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Michal Simek <monstr@monstr.eu>
---
 arch/powerpc/kernel/ibmebus.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
index 9b626cfffce1..f62efdfd1769 100644
--- a/arch/powerpc/kernel/ibmebus.c
+++ b/arch/powerpc/kernel/ibmebus.c
@@ -162,13 +162,10 @@ static int ibmebus_create_device(struct device_node *dn)
 	dev->dev.bus = &ibmebus_bus_type;
 	dev->dev.archdata.dma_ops = &ibmebus_dma_ops;
 
-	ret = of_device_register(dev);
-	if (ret) {
-		of_device_free(dev);
-		return ret;
-	}
-
-	return 0;
+	ret = of_device_add(dev);
+	if (ret)
+		platform_device_put(dev);
+	return ret;
 }
 
 static int ibmebus_create_devices(const struct of_device_id *matches)
-- 
cgit v1.2.3


From 32c97689c46b272302053778f1a6c2facb0e220c Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@secretlab.ca>
Date: Wed, 20 Oct 2010 11:45:14 -0600
Subject: of/flattree: Eliminate need to provide early_init_dt_scan_chosen_arch

This patch refactors the early init parsing of the chosen node so that
architectures aren't forced to provide an empty implementation of
early_init_dt_scan_chosen_arch.  Instead, if an architecture wants to
do something different, it can either use a wrapper function around
early_init_dt_scan_chosen(), or it can replace it altogether.

This patch was written in preparation to adding device tree support to
both x86 ad MIPS.

Signed-off-by: Grant Likely <grant.likely@secretlab.ca>
Tested-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/powerpc/kernel/prom.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index fed9bf6187d1..e296aae63c60 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -363,10 +363,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	return 0;
 }
 
-void __init early_init_dt_scan_chosen_arch(unsigned long node)
+int __init early_init_dt_scan_chosen_ppc(unsigned long node, const char *uname,
+					 int depth, void *data)
 {
 	unsigned long *lprop;
 
+	/* Use common scan routine to determine if this is the chosen node */
+	if (early_init_dt_scan_chosen(node, uname, depth, data) == 0)
+		return 0;
+
 #ifdef CONFIG_PPC64
 	/* check if iommu is forced on or off */
 	if (of_get_flat_dt_prop(node, "linux,iommu-off", NULL) != NULL)
@@ -398,6 +403,9 @@ void __init early_init_dt_scan_chosen_arch(unsigned long node)
 	if (lprop)
 		crashk_res.end = crashk_res.start + *lprop - 1;
 #endif
+
+	/* break now */
+	return 1;
 }
 
 #ifdef CONFIG_PPC_PSERIES
@@ -679,7 +687,7 @@ void __init early_init_devtree(void *params)
 	 * device-tree, including the platform type, initrd location and
 	 * size, TCE reserve, and more ...
 	 */
-	of_scan_flat_dt(early_init_dt_scan_chosen, NULL);
+	of_scan_flat_dt(early_init_dt_scan_chosen_ppc, NULL);
 
 	/* Scan memory nodes and rebuild MEMBLOCKs */
 	memblock_init();
-- 
cgit v1.2.3


From 126512e3f274802ca65ebeca8660237f0361ad48 Mon Sep 17 00:00:00 2001
From: Anatolij Gustschin <agust@denx.de>
Date: Tue, 28 Sep 2010 20:55:20 +0200
Subject: USB: add platform glue driver for FSL USB DR controller

Replace FSL USB platform code by simple platform driver for
creation of FSL USB platform devices.

The driver creates platform devices based on the information
from USB nodes in the flat device tree. This is the replacement
for old arch fsl_soc usb code removed by this patch. The driver
uses usual of-style binding, available EHCI-HCD and UDC
drivers can be bound to the created devices. The new of-style
driver additionaly instantiates USB OTG platform device, as the
appropriate USB OTG driver will be added soon.

Signed-off-by: Anatolij Gustschin <agust@denx.de>
Cc: Kumar Gala <galak@kernel.crashing.org>
Cc: Grant Likely <grant.likely@secretlab.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/powerpc/sysdev/fsl_soc.c | 163 ------------------------------------------
 1 file changed, 163 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index b91f7acdda6f..49a51f134c51 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -209,169 +209,6 @@ static int __init of_add_fixed_phys(void)
 arch_initcall(of_add_fixed_phys);
 #endif /* CONFIG_FIXED_PHY */
 
-static enum fsl_usb2_phy_modes determine_usb_phy(const char *phy_type)
-{
-	if (!phy_type)
-		return FSL_USB2_PHY_NONE;
-	if (!strcasecmp(phy_type, "ulpi"))
-		return FSL_USB2_PHY_ULPI;
-	if (!strcasecmp(phy_type, "utmi"))
-		return FSL_USB2_PHY_UTMI;
-	if (!strcasecmp(phy_type, "utmi_wide"))
-		return FSL_USB2_PHY_UTMI_WIDE;
-	if (!strcasecmp(phy_type, "serial"))
-		return FSL_USB2_PHY_SERIAL;
-
-	return FSL_USB2_PHY_NONE;
-}
-
-static int __init fsl_usb_of_init(void)
-{
-	struct device_node *np;
-	unsigned int i = 0;
-	struct platform_device *usb_dev_mph = NULL, *usb_dev_dr_host = NULL,
-		*usb_dev_dr_client = NULL;
-	int ret;
-
-	for_each_compatible_node(np, NULL, "fsl-usb2-mph") {
-		struct resource r[2];
-		struct fsl_usb2_platform_data usb_data;
-		const unsigned char *prop = NULL;
-
-		memset(&r, 0, sizeof(r));
-		memset(&usb_data, 0, sizeof(usb_data));
-
-		ret = of_address_to_resource(np, 0, &r[0]);
-		if (ret)
-			goto err;
-
-		of_irq_to_resource(np, 0, &r[1]);
-
-		usb_dev_mph =
-		    platform_device_register_simple("fsl-ehci", i, r, 2);
-		if (IS_ERR(usb_dev_mph)) {
-			ret = PTR_ERR(usb_dev_mph);
-			goto err;
-		}
-
-		usb_dev_mph->dev.coherent_dma_mask = 0xffffffffUL;
-		usb_dev_mph->dev.dma_mask = &usb_dev_mph->dev.coherent_dma_mask;
-
-		usb_data.operating_mode = FSL_USB2_MPH_HOST;
-
-		prop = of_get_property(np, "port0", NULL);
-		if (prop)
-			usb_data.port_enables |= FSL_USB2_PORT0_ENABLED;
-
-		prop = of_get_property(np, "port1", NULL);
-		if (prop)
-			usb_data.port_enables |= FSL_USB2_PORT1_ENABLED;
-
-		prop = of_get_property(np, "phy_type", NULL);
-		usb_data.phy_mode = determine_usb_phy(prop);
-
-		ret =
-		    platform_device_add_data(usb_dev_mph, &usb_data,
-					     sizeof(struct
-						    fsl_usb2_platform_data));
-		if (ret)
-			goto unreg_mph;
-		i++;
-	}
-
-	for_each_compatible_node(np, NULL, "fsl-usb2-dr") {
-		struct resource r[2];
-		struct fsl_usb2_platform_data usb_data;
-		const unsigned char *prop = NULL;
-
-		if (!of_device_is_available(np))
-			continue;
-
-		memset(&r, 0, sizeof(r));
-		memset(&usb_data, 0, sizeof(usb_data));
-
-		ret = of_address_to_resource(np, 0, &r[0]);
-		if (ret)
-			goto unreg_mph;
-
-		of_irq_to_resource(np, 0, &r[1]);
-
-		prop = of_get_property(np, "dr_mode", NULL);
-
-		if (!prop || !strcmp(prop, "host")) {
-			usb_data.operating_mode = FSL_USB2_DR_HOST;
-			usb_dev_dr_host = platform_device_register_simple(
-					"fsl-ehci", i, r, 2);
-			if (IS_ERR(usb_dev_dr_host)) {
-				ret = PTR_ERR(usb_dev_dr_host);
-				goto err;
-			}
-		} else if (prop && !strcmp(prop, "peripheral")) {
-			usb_data.operating_mode = FSL_USB2_DR_DEVICE;
-			usb_dev_dr_client = platform_device_register_simple(
-					"fsl-usb2-udc", i, r, 2);
-			if (IS_ERR(usb_dev_dr_client)) {
-				ret = PTR_ERR(usb_dev_dr_client);
-				goto err;
-			}
-		} else if (prop && !strcmp(prop, "otg")) {
-			usb_data.operating_mode = FSL_USB2_DR_OTG;
-			usb_dev_dr_host = platform_device_register_simple(
-					"fsl-ehci", i, r, 2);
-			if (IS_ERR(usb_dev_dr_host)) {
-				ret = PTR_ERR(usb_dev_dr_host);
-				goto err;
-			}
-			usb_dev_dr_client = platform_device_register_simple(
-					"fsl-usb2-udc", i, r, 2);
-			if (IS_ERR(usb_dev_dr_client)) {
-				ret = PTR_ERR(usb_dev_dr_client);
-				goto err;
-			}
-		} else {
-			ret = -EINVAL;
-			goto err;
-		}
-
-		prop = of_get_property(np, "phy_type", NULL);
-		usb_data.phy_mode = determine_usb_phy(prop);
-
-		if (usb_dev_dr_host) {
-			usb_dev_dr_host->dev.coherent_dma_mask = 0xffffffffUL;
-			usb_dev_dr_host->dev.dma_mask = &usb_dev_dr_host->
-				dev.coherent_dma_mask;
-			if ((ret = platform_device_add_data(usb_dev_dr_host,
-						&usb_data, sizeof(struct
-						fsl_usb2_platform_data))))
-				goto unreg_dr;
-		}
-		if (usb_dev_dr_client) {
-			usb_dev_dr_client->dev.coherent_dma_mask = 0xffffffffUL;
-			usb_dev_dr_client->dev.dma_mask = &usb_dev_dr_client->
-				dev.coherent_dma_mask;
-			if ((ret = platform_device_add_data(usb_dev_dr_client,
-						&usb_data, sizeof(struct
-						fsl_usb2_platform_data))))
-				goto unreg_dr;
-		}
-		i++;
-	}
-	return 0;
-
-unreg_dr:
-	if (usb_dev_dr_host)
-		platform_device_unregister(usb_dev_dr_host);
-	if (usb_dev_dr_client)
-		platform_device_unregister(usb_dev_dr_client);
-unreg_mph:
-	if (usb_dev_mph)
-		platform_device_unregister(usb_dev_mph);
-err:
-	return ret;
-}
-
-arch_initcall(fsl_usb_of_init);
-
 #if defined(CONFIG_FSL_SOC_BOOKE) || defined(CONFIG_PPC_86xx)
 static __be32 __iomem *rstcr;
 
-- 
cgit v1.2.3


From 96bc451a153297bf1f99ef2d633d512ea349ae7a Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:42 +0200
Subject: KVM: PPC: Introduce shared page

For transparent variable sharing between the hypervisor and guest, I introduce
a shared page. This shared page will contain all the registers the guest can
read and write safely without exiting guest context.

This patch only implements the stubs required for the basic structure of the
shared page. The actual register moving follows.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 arch/powerpc/include/asm/kvm_para.h | 5 +++++
 arch/powerpc/kernel/asm-offsets.c   | 1 +
 arch/powerpc/kvm/44x.c              | 7 +++++++
 arch/powerpc/kvm/book3s.c           | 9 ++++++++-
 arch/powerpc/kvm/e500.c             | 7 +++++++
 6 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index b0b23c007d6e..53edacdf6940 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/kvm_types.h>
+#include <linux/kvm_para.h>
 #include <asm/kvm_asm.h>
 
 #define KVM_MAX_VCPUS 1
@@ -290,6 +291,7 @@ struct kvm_vcpu_arch {
 	struct tasklet_struct tasklet;
 	u64 dec_jiffies;
 	unsigned long pending_exceptions;
+	struct kvm_vcpu_arch_shared *shared;
 
 #ifdef CONFIG_PPC_BOOK3S
 	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 2d48f6a63d0b..1485ba87a52a 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -20,6 +20,11 @@
 #ifndef __POWERPC_KVM_PARA_H__
 #define __POWERPC_KVM_PARA_H__
 
+#include <linux/types.h>
+
+struct kvm_vcpu_arch_shared {
+};
+
 #ifdef __KERNEL__
 
 static inline int kvm_para_available(void)
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1c0607ddccc0..60e7db4c13af 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -400,6 +400,7 @@ int main(void)
 	DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
 	DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
 	DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
+	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
 
 	/* book3s */
 #ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 73c0a3f64ed1..e7b1f3fca5dc 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -123,8 +123,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_vcpu;
 
+	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!vcpu->arch.shared)
+		goto uninit_vcpu;
+
 	return vcpu;
 
+uninit_vcpu:
+	kvm_vcpu_uninit(vcpu);
 free_vcpu:
 	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
 out:
@@ -135,6 +141,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 
+	free_page((unsigned long)vcpu->arch.shared);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
 }
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a3cef30d1d42..b3385dd6f28d 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1242,6 +1242,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_shadow_vcpu;
 
+	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!vcpu->arch.shared)
+		goto uninit_vcpu;
+
 	vcpu->arch.host_retip = kvm_return_point;
 	vcpu->arch.host_msr = mfmsr();
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1268,10 +1272,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 
 	err = kvmppc_mmu_init(vcpu);
 	if (err < 0)
-		goto free_shadow_vcpu;
+		goto uninit_vcpu;
 
 	return vcpu;
 
+uninit_vcpu:
+	kvm_vcpu_uninit(vcpu);
 free_shadow_vcpu:
 	kfree(vcpu_book3s->shadow_vcpu);
 free_vcpu:
@@ -1284,6 +1290,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
+	free_page((unsigned long)vcpu->arch.shared);
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu_book3s->shadow_vcpu);
 	vfree(vcpu_book3s);
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index e8a00b0c4449..71750f2dd5d3 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -117,8 +117,14 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto uninit_vcpu;
 
+	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
+	if (!vcpu->arch.shared)
+		goto uninit_tlb;
+
 	return vcpu;
 
+uninit_tlb:
+	kvmppc_e500_tlb_uninit(vcpu_e500);
 uninit_vcpu:
 	kvm_vcpu_uninit(vcpu);
 free_vcpu:
@@ -131,6 +137,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 
+	free_page((unsigned long)vcpu->arch.shared);
 	kvmppc_e500_tlb_uninit(vcpu_e500);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vcpu_e500);
-- 
cgit v1.2.3


From 666e7252a15b7fc4a116e65deaf6da5e4ce660e3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:43 +0200
Subject: KVM: PPC: Convert MSR to shared page

One of the most obvious registers to share with the guest directly is the
MSR. The MSR contains the "interrupts enabled" flag which the guest has to
toggle in critical sections.

So in order to bring the overhead of interrupt en- and disabling down, let's
put msr into the shared page. Keep in mind that even though you can fully read
its contents, writing to it doesn't always update all state. There are a few
safe fields that don't require hypervisor interaction. See the documentation
for a list of MSR bits that are safe to be set from inside the guest.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h      |  1 -
 arch/powerpc/include/asm/kvm_para.h      |  1 +
 arch/powerpc/kernel/asm-offsets.c        |  2 +-
 arch/powerpc/kvm/44x_tlb.c               |  8 ++--
 arch/powerpc/kvm/book3s.c                | 65 +++++++++++++++++---------------
 arch/powerpc/kvm/book3s_32_mmu.c         | 12 +++---
 arch/powerpc/kvm/book3s_32_mmu_host.c    |  4 +-
 arch/powerpc/kvm/book3s_64_mmu.c         | 12 +++---
 arch/powerpc/kvm/book3s_64_mmu_host.c    |  4 +-
 arch/powerpc/kvm/book3s_emulate.c        |  9 +++--
 arch/powerpc/kvm/book3s_paired_singles.c |  7 ++--
 arch/powerpc/kvm/booke.c                 | 20 +++++-----
 arch/powerpc/kvm/booke.h                 |  6 +--
 arch/powerpc/kvm/booke_emulate.c         |  6 +--
 arch/powerpc/kvm/booke_interrupts.S      |  3 +-
 arch/powerpc/kvm/e500_tlb.c              | 12 +++---
 arch/powerpc/kvm/e500_tlb.h              |  2 +-
 arch/powerpc/kvm/powerpc.c               |  3 +-
 18 files changed, 93 insertions(+), 84 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 53edacdf6940..ba20f90655f3 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -211,7 +211,6 @@ struct kvm_vcpu_arch {
 	u32 cr;
 #endif
 
-	ulong msr;
 #ifdef CONFIG_PPC_BOOK3S
 	ulong shadow_msr;
 	ulong hflags;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 1485ba87a52a..a17dc5229d99 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct kvm_vcpu_arch_shared {
+	__u64 msr;
 };
 
 #ifdef __KERNEL__
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 60e7db4c13af..1221bcdff52f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -394,13 +394,13 @@ int main(void)
 	DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
 	DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
 	DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
-	DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
 	DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4));
 	DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5));
 	DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6));
 	DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7));
 	DEFINE(VCPU_SHADOW_PID, offsetof(struct kvm_vcpu, arch.shadow_pid));
 	DEFINE(VCPU_SHARED, offsetof(struct kvm_vcpu, arch.shared));
+	DEFINE(VCPU_SHARED_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
 
 	/* book3s */
 #ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9b9b5cdea840..9f71b8d6eb0d 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -221,14 +221,14 @@ gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
 
 int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
 
 	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
 
 int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
 
 	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
 }
@@ -354,7 +354,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 
 	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
 	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
-	                                            vcpu->arch.msr & MSR_PR);
+	                                            vcpu->arch.shared->msr & MSR_PR);
 	stlbe.tid = !(asid & 0xff);
 
 	/* Keep track of the reference so we can properly release it later. */
@@ -423,7 +423,7 @@ static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 
 	/* Does it match current guest AS? */
 	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
 		return 0;
 
 	gpa = get_tlb_raddr(tlbe);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b3385dd6f28d..2efe69240e1b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -115,31 +115,31 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
 
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.shadow_msr = vcpu->arch.msr;
+	ulong smsr = vcpu->arch.shared->msr;
+
 	/* Guest MSR values */
-	vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
-				 MSR_BE | MSR_DE;
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
 	/* Process MSR values */
-	vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
-				 MSR_EE;
+	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 	/* External providers the guest reserved */
-	vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
+	smsr |= (vcpu->arch.shared->msr & vcpu->arch.guest_owned_ext);
 	/* 64-bit Process MSR values */
 #ifdef CONFIG_PPC_BOOK3S_64
-	vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
+	smsr |= MSR_ISF | MSR_HV;
 #endif
+	vcpu->arch.shadow_msr = smsr;
 }
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 {
-	ulong old_msr = vcpu->arch.msr;
+	ulong old_msr = vcpu->arch.shared->msr;
 
 #ifdef EXIT_DEBUG
 	printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
 
 	msr &= to_book3s(vcpu)->msr_mask;
-	vcpu->arch.msr = msr;
+	vcpu->arch.shared->msr = msr;
 	kvmppc_recalc_shadow_msr(vcpu);
 
 	if (msr & (MSR_WE|MSR_POW)) {
@@ -149,21 +149,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 		}
 	}
 
-	if ((vcpu->arch.msr & (MSR_PR|MSR_IR|MSR_DR)) !=
+	if ((vcpu->arch.shared->msr & (MSR_PR|MSR_IR|MSR_DR)) !=
 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
 	}
 
 	/* Preload FPU if it's enabled */
-	if (vcpu->arch.msr & MSR_FP)
+	if (vcpu->arch.shared->msr & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 }
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
 	vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
-	vcpu->arch.srr1 = vcpu->arch.msr | flags;
+	vcpu->arch.srr1 = vcpu->arch.shared->msr | flags;
 	kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
 	vcpu->arch.mmu.reset_msr(vcpu);
 }
@@ -254,11 +254,11 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 
 	switch (priority) {
 	case BOOK3S_IRQPRIO_DECREMENTER:
-		deliver = vcpu->arch.msr & MSR_EE;
+		deliver = vcpu->arch.shared->msr & MSR_EE;
 		vec = BOOK3S_INTERRUPT_DECREMENTER;
 		break;
 	case BOOK3S_IRQPRIO_EXTERNAL:
-		deliver = vcpu->arch.msr & MSR_EE;
+		deliver = vcpu->arch.shared->msr & MSR_EE;
 		vec = BOOK3S_INTERRUPT_EXTERNAL;
 		break;
 	case BOOK3S_IRQPRIO_SYSTEM_RESET:
@@ -437,7 +437,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 			 struct kvmppc_pte *pte)
 {
-	int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
+	int relocated = (vcpu->arch.shared->msr & (data ? MSR_DR : MSR_IR));
 	int r;
 
 	if (relocated) {
@@ -545,8 +545,8 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int page_found = 0;
 	struct kvmppc_pte pte;
 	bool is_mmio = false;
-	bool dr = (vcpu->arch.msr & MSR_DR) ? true : false;
-	bool ir = (vcpu->arch.msr & MSR_IR) ? true : false;
+	bool dr = (vcpu->arch.shared->msr & MSR_DR) ? true : false;
+	bool ir = (vcpu->arch.shared->msr & MSR_IR) ? true : false;
 	u64 vsid;
 
 	relocated = data ? dr : ir;
@@ -563,7 +563,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.vpage = eaddr >> 12;
 	}
 
-	switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 	case 0:
 		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
 		break;
@@ -571,7 +571,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case MSR_IR:
 		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 
-		if ((vcpu->arch.msr & (MSR_DR|MSR_IR)) == MSR_DR)
+		if ((vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) == MSR_DR)
 			pte.vpage |= ((u64)VSID_REAL_DR << (SID_SHIFT - 12));
 		else
 			pte.vpage |= ((u64)VSID_REAL_IR << (SID_SHIFT - 12));
@@ -596,14 +596,16 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		/* Page not found in guest PTE entries */
 		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
 		to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
-		vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+		vcpu->arch.shared->msr |=
+			(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
 		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
 		to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
 		to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
-		vcpu->arch.msr |= (to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
+		vcpu->arch.shared->msr |=
+			(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
@@ -695,9 +697,11 @@ static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
 
 	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
 	if (ret == -ENOENT) {
-		vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1);
-		vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0);
-		vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
+		ulong msr = vcpu->arch.shared->msr;
+
+		msr = kvmppc_set_field(msr, 33, 33, 1);
+		msr = kvmppc_set_field(msr, 34, 36, 0);
+		vcpu->arch.shared->msr = kvmppc_set_field(msr, 42, 47, 0);
 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
 		return EMULATE_AGAIN;
 	}
@@ -736,7 +740,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)
 		return RESUME_GUEST;
 
-	if (!(vcpu->arch.msr & msr)) {
+	if (!(vcpu->arch.shared->msr & msr)) {
 		kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		return RESUME_GUEST;
 	}
@@ -804,7 +808,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if ((exit_nr != 0x900) && (exit_nr != 0x500))
 		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
 			exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
-			vcpu->arch.msr);
+			vcpu->arch.shared->msr);
 #endif
 	kvm_resched(vcpu);
 	switch (exit_nr) {
@@ -836,7 +840,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
 			r = RESUME_GUEST;
 		} else {
-			vcpu->arch.msr |= to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
+			vcpu->arch.shared->msr |=
+				to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
 			r = RESUME_GUEST;
@@ -904,7 +909,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 program_interrupt:
 		flags = to_svcpu(vcpu)->shadow_srr1 & 0x1f0000ull;
 
-		if (vcpu->arch.msr & MSR_PR) {
+		if (vcpu->arch.shared->msr & MSR_PR) {
 #ifdef EXIT_DEBUG
 			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
 #endif
@@ -1052,7 +1057,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->ctr = kvmppc_get_ctr(vcpu);
 	regs->lr = kvmppc_get_lr(vcpu);
 	regs->xer = kvmppc_get_xer(vcpu);
-	regs->msr = vcpu->arch.msr;
+	regs->msr = vcpu->arch.shared->msr;
 	regs->srr0 = vcpu->arch.srr0;
 	regs->srr1 = vcpu->arch.srr1;
 	regs->pid = vcpu->arch.pid;
@@ -1353,7 +1358,7 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	local_irq_enable();
 
 	/* Preload FPU if it's enabled */
-	if (vcpu->arch.msr & MSR_FP)
+	if (vcpu->arch.shared->msr & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 
 	ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 3292d76101d2..449bce5f021a 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -133,7 +133,7 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 		else
 			bat = &vcpu_book3s->ibat[i];
 
-		if (vcpu->arch.msr & MSR_PR) {
+		if (vcpu->arch.shared->msr & MSR_PR) {
 			if (!bat->vp)
 				continue;
 		} else {
@@ -214,8 +214,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 			pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
 			pp = pteg[i+1] & 3;
 
-			if ((sre->Kp &&  (vcpu->arch.msr & MSR_PR)) ||
-			    (sre->Ks && !(vcpu->arch.msr & MSR_PR)))
+			if ((sre->Kp &&  (vcpu->arch.shared->msr & MSR_PR)) ||
+			    (sre->Ks && !(vcpu->arch.shared->msr & MSR_PR)))
 				pp |= 4;
 
 			pte->may_write = false;
@@ -334,7 +334,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	struct kvmppc_sr *sr;
 	u64 gvsid = esid;
 
-	if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 		sr = find_sr(to_book3s(vcpu), ea);
 		if (sr->valid)
 			gvsid = sr->vsid;
@@ -343,7 +343,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	/* In case we only have one of MSR_IR or MSR_DR set, let's put
 	   that in the real-mode context (and hope RM doesn't access
 	   high memory) */
-	switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 	case 0:
 		*vsid = VSID_REAL | esid;
 		break;
@@ -363,7 +363,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		BUG();
 	}
 
-	if (vcpu->arch.msr & MSR_PR)
+	if (vcpu->arch.shared->msr & MSR_PR)
 		*vsid |= VSID_PR;
 
 	return 0;
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 0b51ef872c1e..67b8c38d932f 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -86,7 +86,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 	struct kvmppc_sid_map *map;
 	u16 sid_map_mask;
 
-	if (vcpu->arch.msr & MSR_PR)
+	if (vcpu->arch.shared->msr & MSR_PR)
 		gvsid |= VSID_PR;
 
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -253,7 +253,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 	u16 sid_map_mask;
 	static int backwards_map = 0;
 
-	if (vcpu->arch.msr & MSR_PR)
+	if (vcpu->arch.shared->msr & MSR_PR)
 		gvsid |= VSID_PR;
 
 	/* We might get collisions that trap in preceding order, so let's
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 4025ea26b3c1..58aa8409dae0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -180,9 +180,9 @@ do_second:
 		goto no_page_found;
 	}
 
-	if ((vcpu->arch.msr & MSR_PR) && slbe->Kp)
+	if ((vcpu->arch.shared->msr & MSR_PR) && slbe->Kp)
 		key = 4;
-	else if (!(vcpu->arch.msr & MSR_PR) && slbe->Ks)
+	else if (!(vcpu->arch.shared->msr & MSR_PR) && slbe->Ks)
 		key = 4;
 
 	for (i=0; i<16; i+=2) {
@@ -381,7 +381,7 @@ static void kvmppc_mmu_book3s_64_slbia(struct kvm_vcpu *vcpu)
 	for (i = 1; i < vcpu_book3s->slb_nr; i++)
 		vcpu_book3s->slb[i].valid = false;
 
-	if (vcpu->arch.msr & MSR_IR) {
+	if (vcpu->arch.shared->msr & MSR_IR) {
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
 	}
@@ -446,13 +446,13 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	struct kvmppc_slb *slb;
 	u64 gvsid = esid;
 
-	if (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 		slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
 		if (slb)
 			gvsid = slb->vsid;
 	}
 
-	switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+	switch (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 	case 0:
 		*vsid = VSID_REAL | esid;
 		break;
@@ -473,7 +473,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		break;
 	}
 
-	if (vcpu->arch.msr & MSR_PR)
+	if (vcpu->arch.shared->msr & MSR_PR)
 		*vsid |= VSID_PR;
 
 	return 0;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 384179a5002b..71c1f9027abb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -66,7 +66,7 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 	struct kvmppc_sid_map *map;
 	u16 sid_map_mask;
 
-	if (vcpu->arch.msr & MSR_PR)
+	if (vcpu->arch.shared->msr & MSR_PR)
 		gvsid |= VSID_PR;
 
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
@@ -191,7 +191,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 	u16 sid_map_mask;
 	static int backwards_map = 0;
 
-	if (vcpu->arch.msr & MSR_PR)
+	if (vcpu->arch.shared->msr & MSR_PR)
 		gvsid |= VSID_PR;
 
 	/* We might get collisions that trap in preceding order, so let's
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index c85f906038ce..35d3c16b2938 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -86,14 +86,15 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case 31:
 		switch (get_xop(inst)) {
 		case OP_31_XOP_MFMSR:
-			kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
+			kvmppc_set_gpr(vcpu, get_rt(inst),
+				       vcpu->arch.shared->msr);
 			break;
 		case OP_31_XOP_MTMSRD:
 		{
 			ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
 			if (inst & 0x10000) {
-				vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
-				vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
+				vcpu->arch.shared->msr &= ~(MSR_RI | MSR_EE);
+				vcpu->arch.shared->msr |= rs & (MSR_RI | MSR_EE);
 			} else
 				kvmppc_set_msr(vcpu, rs);
 			break;
@@ -204,7 +205,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				ra = kvmppc_get_gpr(vcpu, get_ra(inst));
 
 			addr = (ra + rb) & ~31ULL;
-			if (!(vcpu->arch.msr & MSR_SF))
+			if (!(vcpu->arch.shared->msr & MSR_SF))
 				addr &= 0xffffffff;
 			vaddr = addr;
 
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 474f2e24050a..626e6efaa79f 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -165,9 +165,10 @@ static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt)
 static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
 {
 	u64 dsisr;
+	struct kvm_vcpu_arch_shared *shared = vcpu->arch.shared;
 
-	vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0);
-	vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0);
+	shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
+	shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
 	vcpu->arch.dear = eaddr;
 	/* Page Fault */
 	dsisr = kvmppc_set_field(0, 33, 33, 1);
@@ -658,7 +659,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	if (!kvmppc_inst_is_paired_single(vcpu, inst))
 		return EMULATE_FAIL;
 
-	if (!(vcpu->arch.msr & MSR_FP)) {
+	if (!(vcpu->arch.shared->msr & MSR_FP)) {
 		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL);
 		return EMULATE_AGAIN;
 	}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8d4e35f5372c..4ec9d49a1cb9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -62,7 +62,7 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 {
 	int i;
 
-	printk("pc:   %08lx msr:  %08lx\n", vcpu->arch.pc, vcpu->arch.msr);
+	printk("pc:   %08lx msr:  %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
 	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
 	printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
 
@@ -169,34 +169,34 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		break;
 	case BOOKE_IRQPRIO_CRITICAL:
 	case BOOKE_IRQPRIO_WATCHDOG:
-		allowed = vcpu->arch.msr & MSR_CE;
+		allowed = vcpu->arch.shared->msr & MSR_CE;
 		msr_mask = MSR_ME;
 		break;
 	case BOOKE_IRQPRIO_MACHINE_CHECK:
-		allowed = vcpu->arch.msr & MSR_ME;
+		allowed = vcpu->arch.shared->msr & MSR_ME;
 		msr_mask = 0;
 		break;
 	case BOOKE_IRQPRIO_EXTERNAL:
 	case BOOKE_IRQPRIO_DECREMENTER:
 	case BOOKE_IRQPRIO_FIT:
-		allowed = vcpu->arch.msr & MSR_EE;
+		allowed = vcpu->arch.shared->msr & MSR_EE;
 		msr_mask = MSR_CE|MSR_ME|MSR_DE;
 		break;
 	case BOOKE_IRQPRIO_DEBUG:
-		allowed = vcpu->arch.msr & MSR_DE;
+		allowed = vcpu->arch.shared->msr & MSR_DE;
 		msr_mask = MSR_ME;
 		break;
 	}
 
 	if (allowed) {
 		vcpu->arch.srr0 = vcpu->arch.pc;
-		vcpu->arch.srr1 = vcpu->arch.msr;
+		vcpu->arch.srr1 = vcpu->arch.shared->msr;
 		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
 		if (update_esr == true)
 			vcpu->arch.esr = vcpu->arch.queued_esr;
 		if (update_dear == true)
 			vcpu->arch.dear = vcpu->arch.queued_dear;
-		kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
+		kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
 
 		clear_bit(priority, &vcpu->arch.pending_exceptions);
 	}
@@ -265,7 +265,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_PROGRAM:
-		if (vcpu->arch.msr & MSR_PR) {
+		if (vcpu->arch.shared->msr & MSR_PR) {
 			/* Program traps generated by user-level software must be handled
 			 * by the guest kernel. */
 			kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
@@ -467,7 +467,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pc = 0;
-	vcpu->arch.msr = 0;
+	vcpu->arch.shared->msr = 0;
 	kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
 
 	vcpu->arch.shadow_pid = 1;
@@ -490,7 +490,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->ctr = vcpu->arch.ctr;
 	regs->lr = vcpu->arch.lr;
 	regs->xer = kvmppc_get_xer(vcpu);
-	regs->msr = vcpu->arch.msr;
+	regs->msr = vcpu->arch.shared->msr;
 	regs->srr0 = vcpu->arch.srr0;
 	regs->srr1 = vcpu->arch.srr1;
 	regs->pid = vcpu->arch.pid;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index d59bcca1f9d8..88258acc98fa 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -54,12 +54,12 @@ extern unsigned long kvmppc_booke_handlers;
  * changing. */
 static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 {
-	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+	if ((new_msr & MSR_PR) != (vcpu->arch.shared->msr & MSR_PR))
 		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
 
-	vcpu->arch.msr = new_msr;
+	vcpu->arch.shared->msr = new_msr;
 
-	if (vcpu->arch.msr & MSR_WE) {
+	if (vcpu->arch.shared->msr & MSR_WE) {
 		kvm_vcpu_block(vcpu);
 		kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
 	};
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index cbc790ee1928..b115203ac118 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -62,7 +62,7 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		case OP_31_XOP_MFMSR:
 			rt = get_rt(inst);
-			kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
+			kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->msr);
 			kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
 			break;
 
@@ -74,13 +74,13 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 		case OP_31_XOP_WRTEE:
 			rs = get_rs(inst);
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+			vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
 					| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
 			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
 			break;
 
 		case OP_31_XOP_WRTEEI:
-			vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
+			vcpu->arch.shared->msr = (vcpu->arch.shared->msr & ~MSR_EE)
 							 | (inst & MSR_EE);
 			kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
 			break;
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 380a78cf484d..049846911ce4 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -415,7 +415,8 @@ lightweight_exit:
 	lwz	r8, VCPU_GPR(r8)(r4)
 	lwz	r3, VCPU_PC(r4)
 	mtsrr0	r3
-	lwz	r3, VCPU_MSR(r4)
+	lwz	r3, VCPU_SHARED(r4)
+	lwz	r3, VCPU_SHARED_MSR(r3)
 	oris	r3, r3, KVMPPC_MSR_MASK@h
 	ori	r3, r3, KVMPPC_MSR_MASK@l
 	mtsrr1	r3
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 21011e12caeb..092a390876f3 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -314,10 +314,10 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		| MAS1_TID(get_tlb_tid(gtlbe)) | MAS1_TS | MAS1_VALID;
 	stlbe->mas2 = (gvaddr & MAS2_EPN)
 		| e500_shadow_mas2_attrib(gtlbe->mas2,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
+				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
 	stlbe->mas3 = (hpaddr & MAS3_RPN)
 		| e500_shadow_mas3_attrib(gtlbe->mas3,
-				vcpu_e500->vcpu.arch.msr & MSR_PR);
+				vcpu_e500->vcpu.arch.shared->msr & MSR_PR);
 	stlbe->mas7 = (hpaddr >> 32) & MAS7_RPN;
 
 	trace_kvm_stlb_write(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
@@ -576,28 +576,28 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
 int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
 
 	return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
 }
 
 int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
 
 	return kvmppc_e500_tlb_search(vcpu, eaddr, get_cur_pid(vcpu), as);
 }
 
 void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
 {
-	unsigned int as = !!(vcpu->arch.msr & MSR_IS);
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
 
 	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.pc, as);
 }
 
 void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
 {
-	unsigned int as = !!(vcpu->arch.msr & MSR_DS);
+	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
 
 	kvmppc_e500_deliver_tlb_miss(vcpu, vcpu->arch.fault_dear, as);
 }
diff --git a/arch/powerpc/kvm/e500_tlb.h b/arch/powerpc/kvm/e500_tlb.h
index d28e3010a5e2..458946b4775d 100644
--- a/arch/powerpc/kvm/e500_tlb.h
+++ b/arch/powerpc/kvm/e500_tlb.h
@@ -171,7 +171,7 @@ static inline int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
 
 	/* Does it match current guest AS? */
 	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS))
+	if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
 		return 0;
 
 	gpa = get_tlb_raddr(tlbe);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 72a4ad86ee91..22f6fa2982f2 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,7 +38,8 @@
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !(v->arch.msr & MSR_WE) || !!(v->arch.pending_exceptions);
+	return !(v->arch.shared->msr & MSR_WE) ||
+	       !!(v->arch.pending_exceptions);
 }
 
 
-- 
cgit v1.2.3


From d562de48de68b60b3d2522e7d8273d7112034ee6 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:44 +0200
Subject: KVM: PPC: Convert DSISR to shared page

The DSISR register contains information about a data page fault. It is fully
read/write from inside the guest context and we don't need to worry about
interacting based on writes of this register.

This patch converts all users of the current field to the shared page.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_book3s.h    |  1 -
 arch/powerpc/include/asm/kvm_para.h      |  1 +
 arch/powerpc/kvm/book3s.c                | 11 ++++++-----
 arch/powerpc/kvm/book3s_emulate.c        |  6 +++---
 arch/powerpc/kvm/book3s_paired_singles.c |  2 +-
 5 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 8274a2d43925..b5b196166455 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -85,7 +85,6 @@ struct kvmppc_vcpu_book3s {
 	u64 hid[6];
 	u64 gqr[8];
 	int slb_nr;
-	u32 dsisr;
 	u64 sdr1;
 	u64 hior;
 	u64 msr_mask;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index a17dc5229d99..9f7565b1de65 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -24,6 +24,7 @@
 
 struct kvm_vcpu_arch_shared {
 	__u64 msr;
+	__u32 dsisr;
 };
 
 #ifdef __KERNEL__
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2efe69240e1b..eb401b6d4d8c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -595,15 +595,16 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (page_found == -ENOENT) {
 		/* Page not found in guest PTE entries */
 		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
-		to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
+		vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
 		vcpu->arch.shared->msr |=
 			(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
 		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
-		to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
-		to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
+		vcpu->arch.shared->dsisr =
+			to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
+		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
 		vcpu->arch.shared->msr |=
 			(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
@@ -867,7 +868,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
 		} else {
 			vcpu->arch.dear = dar;
-			to_book3s(vcpu)->dsisr = to_svcpu(vcpu)->fault_dsisr;
+			vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
 			r = RESUME_GUEST;
@@ -994,7 +995,7 @@ program_interrupt:
 	}
 	case BOOK3S_INTERRUPT_ALIGNMENT:
 		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
-			to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu,
+			vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
 				kvmppc_get_last_inst(vcpu));
 			vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
 				kvmppc_get_last_inst(vcpu));
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 35d3c16b2938..9982ff163af0 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -221,7 +221,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				else if (r == -EPERM)
 					dsisr |= DSISR_PROTFAULT;
 
-				to_book3s(vcpu)->dsisr = dsisr;
+				vcpu->arch.shared->dsisr = dsisr;
 				to_svcpu(vcpu)->fault_dsisr = dsisr;
 
 				kvmppc_book3s_queue_irqprio(vcpu,
@@ -327,7 +327,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		to_book3s(vcpu)->sdr1 = spr_val;
 		break;
 	case SPRN_DSISR:
-		to_book3s(vcpu)->dsisr = spr_val;
+		vcpu->arch.shared->dsisr = spr_val;
 		break;
 	case SPRN_DAR:
 		vcpu->arch.dear = spr_val;
@@ -440,7 +440,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
 		break;
 	case SPRN_DSISR:
-		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
 		break;
 	case SPRN_DAR:
 		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 626e6efaa79f..749dfbd04738 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -173,7 +173,7 @@ static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
 	/* Page Fault */
 	dsisr = kvmppc_set_field(0, 33, 33, 1);
 	if (is_store)
-		to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
+		shared->dsisr = kvmppc_set_field(dsisr, 38, 38, 1);
 	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
 }
 
-- 
cgit v1.2.3


From 5e030186dfc4e4e47c84d2557b17e4aa06c76f96 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:45 +0200
Subject: KVM: PPC: Convert DAR to shared page.

The DAR register contains the address a data page fault occured at. This
register behaves pretty much like a simple data storage register that gets
written to on data faults. There is no hypervisor interaction required on
read or write.

This patch converts all users of the current field to the shared page.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h      |  1 -
 arch/powerpc/include/asm/kvm_para.h      |  1 +
 arch/powerpc/kvm/book3s.c                | 14 +++++++-------
 arch/powerpc/kvm/book3s_emulate.c        |  6 +++---
 arch/powerpc/kvm/book3s_paired_singles.c |  2 +-
 arch/powerpc/kvm/booke.c                 |  2 +-
 arch/powerpc/kvm/booke_emulate.c         |  4 ++--
 7 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index ba20f90655f3..c852408eac38 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -231,7 +231,6 @@ struct kvm_vcpu_arch {
 	ulong csrr1;
 	ulong dsrr0;
 	ulong dsrr1;
-	ulong dear;
 	ulong esr;
 	u32 dec;
 	u32 decar;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 9f7565b1de65..ec72a1c8c045 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 struct kvm_vcpu_arch_shared {
+	__u64 dar;
 	__u64 msr;
 	__u32 dsisr;
 };
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index eb401b6d4d8c..4d46f8b13cc6 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -594,14 +594,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	if (page_found == -ENOENT) {
 		/* Page not found in guest PTE entries */
-		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
 		vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
 		vcpu->arch.shared->msr |=
 			(to_svcpu(vcpu)->shadow_srr1 & 0x00000000f8000000ULL);
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EPERM) {
 		/* Storage protection */
-		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
 		vcpu->arch.shared->dsisr =
 			to_svcpu(vcpu)->fault_dsisr & ~DSISR_NOHPTE;
 		vcpu->arch.shared->dsisr |= DSISR_PROTFAULT;
@@ -610,7 +610,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		kvmppc_book3s_queue_irqprio(vcpu, vec);
 	} else if (page_found == -EINVAL) {
 		/* Page not found in guest SLB */
-		vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+		vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 	} else if (!is_mmio &&
 		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
@@ -867,17 +867,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		if (to_svcpu(vcpu)->fault_dsisr & DSISR_NOHPTE) {
 			r = kvmppc_handle_pagefault(run, vcpu, dar, exit_nr);
 		} else {
-			vcpu->arch.dear = dar;
+			vcpu->arch.shared->dar = dar;
 			vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-			kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFUL);
+			kvmppc_mmu_pte_flush(vcpu, dar, ~0xFFFUL);
 			r = RESUME_GUEST;
 		}
 		break;
 	}
 	case BOOK3S_INTERRUPT_DATA_SEGMENT:
 		if (kvmppc_mmu_map_segment(vcpu, kvmppc_get_fault_dar(vcpu)) < 0) {
-			vcpu->arch.dear = kvmppc_get_fault_dar(vcpu);
+			vcpu->arch.shared->dar = kvmppc_get_fault_dar(vcpu);
 			kvmppc_book3s_queue_irqprio(vcpu,
 				BOOK3S_INTERRUPT_DATA_SEGMENT);
 		}
@@ -997,7 +997,7 @@ program_interrupt:
 		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
 			vcpu->arch.shared->dsisr = kvmppc_alignment_dsisr(vcpu,
 				kvmppc_get_last_inst(vcpu));
-			vcpu->arch.dear = kvmppc_alignment_dar(vcpu,
+			vcpu->arch.shared->dar = kvmppc_alignment_dar(vcpu,
 				kvmppc_get_last_inst(vcpu));
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 		}
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 9982ff163af0..c1478642f856 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -212,7 +212,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			r = kvmppc_st(vcpu, &addr, 32, zeros, true);
 			if ((r == -ENOENT) || (r == -EPERM)) {
 				*advance = 0;
-				vcpu->arch.dear = vaddr;
+				vcpu->arch.shared->dar = vaddr;
 				to_svcpu(vcpu)->fault_dar = vaddr;
 
 				dsisr = DSISR_ISSTORE;
@@ -330,7 +330,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 		vcpu->arch.shared->dsisr = spr_val;
 		break;
 	case SPRN_DAR:
-		vcpu->arch.dear = spr_val;
+		vcpu->arch.shared->dar = spr_val;
 		break;
 	case SPRN_HIOR:
 		to_book3s(vcpu)->hior = spr_val;
@@ -443,7 +443,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 		kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dsisr);
 		break;
 	case SPRN_DAR:
-		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar);
 		break;
 	case SPRN_HIOR:
 		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 749dfbd04738..807576f148ce 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -169,7 +169,7 @@ static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store)
 
 	shared->msr = kvmppc_set_field(shared->msr, 33, 36, 0);
 	shared->msr = kvmppc_set_field(shared->msr, 42, 47, 0);
-	vcpu->arch.dear = eaddr;
+	shared->dar = eaddr;
 	/* Page Fault */
 	dsisr = kvmppc_set_field(0, 33, 33, 1);
 	if (is_store)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4ec9d49a1cb9..4aab6d2ce133 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -195,7 +195,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		if (update_esr == true)
 			vcpu->arch.esr = vcpu->arch.queued_esr;
 		if (update_dear == true)
-			vcpu->arch.dear = vcpu->arch.queued_dear;
+			vcpu->arch.shared->dar = vcpu->arch.queued_dear;
 		kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
 
 		clear_bit(priority, &vcpu->arch.pending_exceptions);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index b115203ac118..51ef4539ed51 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -105,7 +105,7 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 
 	switch (sprn) {
 	case SPRN_DEAR:
-		vcpu->arch.dear = spr_val; break;
+		vcpu->arch.shared->dar = spr_val; break;
 	case SPRN_ESR:
 		vcpu->arch.esr = spr_val; break;
 	case SPRN_DBCR0:
@@ -200,7 +200,7 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_IVPR:
 		kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
 	case SPRN_DEAR:
-		kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->dar); break;
 	case SPRN_ESR:
 		kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
 	case SPRN_DBCR0:
-- 
cgit v1.2.3


From de7906c36ca1e22a3e3600e95c6a4e2c1e4e2e9c Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:46 +0200
Subject: KVM: PPC: Convert SRR0 and SRR1 to shared page

The SRR0 and SRR1 registers contain cached values of the PC and MSR
respectively. They get written to by the hypervisor when an interrupt
occurs or directly by the kernel. They are also used to tell the rfi(d)
instruction where to jump to.

Because it only gets touched on defined events that, it's very simple to
share with the guest. Hypervisor and guest both have full r/w access.

This patch converts all users of the current field to the shared page.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  2 --
 arch/powerpc/include/asm/kvm_para.h |  2 ++
 arch/powerpc/kvm/book3s.c           | 12 ++++++------
 arch/powerpc/kvm/book3s_emulate.c   |  4 ++--
 arch/powerpc/kvm/booke.c            | 15 ++++++++-------
 arch/powerpc/kvm/booke_emulate.c    |  4 ++--
 arch/powerpc/kvm/emulate.c          | 12 ++++++++----
 7 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index c852408eac38..5255d754f9a9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -225,8 +225,6 @@ struct kvm_vcpu_arch {
 	ulong sprg5;
 	ulong sprg6;
 	ulong sprg7;
-	ulong srr0;
-	ulong srr1;
 	ulong csrr0;
 	ulong csrr1;
 	ulong dsrr0;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index ec72a1c8c045..d7fc6c2c9730 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -23,6 +23,8 @@
 #include <linux/types.h>
 
 struct kvm_vcpu_arch_shared {
+	__u64 srr0;
+	__u64 srr1;
 	__u64 dar;
 	__u64 msr;
 	__u32 dsisr;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 4d46f8b13cc6..afa0dd4a27f4 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -162,8 +162,8 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
-	vcpu->arch.srr0 = kvmppc_get_pc(vcpu);
-	vcpu->arch.srr1 = vcpu->arch.shared->msr | flags;
+	vcpu->arch.shared->srr0 = kvmppc_get_pc(vcpu);
+	vcpu->arch.shared->srr1 = vcpu->arch.shared->msr | flags;
 	kvmppc_set_pc(vcpu, to_book3s(vcpu)->hior + vec);
 	vcpu->arch.mmu.reset_msr(vcpu);
 }
@@ -1059,8 +1059,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->lr = kvmppc_get_lr(vcpu);
 	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.shared->msr;
-	regs->srr0 = vcpu->arch.srr0;
-	regs->srr1 = vcpu->arch.srr1;
+	regs->srr0 = vcpu->arch.shared->srr0;
+	regs->srr1 = vcpu->arch.shared->srr1;
 	regs->pid = vcpu->arch.pid;
 	regs->sprg0 = vcpu->arch.sprg0;
 	regs->sprg1 = vcpu->arch.sprg1;
@@ -1086,8 +1086,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_lr(vcpu, regs->lr);
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
-	vcpu->arch.srr0 = regs->srr0;
-	vcpu->arch.srr1 = regs->srr1;
+	vcpu->arch.shared->srr0 = regs->srr0;
+	vcpu->arch.shared->srr1 = regs->srr1;
 	vcpu->arch.sprg0 = regs->sprg0;
 	vcpu->arch.sprg1 = regs->sprg1;
 	vcpu->arch.sprg2 = regs->sprg2;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index c1478642f856..f333cb445349 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -73,8 +73,8 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		switch (get_xop(inst)) {
 		case OP_19_XOP_RFID:
 		case OP_19_XOP_RFI:
-			kvmppc_set_pc(vcpu, vcpu->arch.srr0);
-			kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+			kvmppc_set_pc(vcpu, vcpu->arch.shared->srr0);
+			kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
 			*advance = 0;
 			break;
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4aab6d2ce133..793df28b628d 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -64,7 +64,8 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
 
 	printk("pc:   %08lx msr:  %08llx\n", vcpu->arch.pc, vcpu->arch.shared->msr);
 	printk("lr:   %08lx ctr:  %08lx\n", vcpu->arch.lr, vcpu->arch.ctr);
-	printk("srr0: %08lx srr1: %08lx\n", vcpu->arch.srr0, vcpu->arch.srr1);
+	printk("srr0: %08llx srr1: %08llx\n", vcpu->arch.shared->srr0,
+					    vcpu->arch.shared->srr1);
 
 	printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions);
 
@@ -189,8 +190,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	}
 
 	if (allowed) {
-		vcpu->arch.srr0 = vcpu->arch.pc;
-		vcpu->arch.srr1 = vcpu->arch.shared->msr;
+		vcpu->arch.shared->srr0 = vcpu->arch.pc;
+		vcpu->arch.shared->srr1 = vcpu->arch.shared->msr;
 		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
 		if (update_esr == true)
 			vcpu->arch.esr = vcpu->arch.queued_esr;
@@ -491,8 +492,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->lr = vcpu->arch.lr;
 	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.shared->msr;
-	regs->srr0 = vcpu->arch.srr0;
-	regs->srr1 = vcpu->arch.srr1;
+	regs->srr0 = vcpu->arch.shared->srr0;
+	regs->srr1 = vcpu->arch.shared->srr1;
 	regs->pid = vcpu->arch.pid;
 	regs->sprg0 = vcpu->arch.sprg0;
 	regs->sprg1 = vcpu->arch.sprg1;
@@ -518,8 +519,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.lr = regs->lr;
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
-	vcpu->arch.srr0 = regs->srr0;
-	vcpu->arch.srr1 = regs->srr1;
+	vcpu->arch.shared->srr0 = regs->srr0;
+	vcpu->arch.shared->srr1 = regs->srr1;
 	vcpu->arch.sprg0 = regs->sprg0;
 	vcpu->arch.sprg1 = regs->sprg1;
 	vcpu->arch.sprg2 = regs->sprg2;
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 51ef4539ed51..1260f5f24c0c 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -31,8 +31,8 @@
 
 static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.pc = vcpu->arch.srr0;
-	kvmppc_set_msr(vcpu, vcpu->arch.srr1);
+	vcpu->arch.pc = vcpu->arch.shared->srr0;
+	kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
 }
 
 int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index 4568ec386c2a..ad0fa4ff4ea0 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -242,9 +242,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 			switch (sprn) {
 			case SPRN_SRR0:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr0);
+				break;
 			case SPRN_SRR1:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->srr1);
+				break;
 			case SPRN_PVR:
 				kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
 			case SPRN_PIR:
@@ -320,9 +322,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			rs = get_rs(inst);
 			switch (sprn) {
 			case SPRN_SRR0:
-				vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
+				vcpu->arch.shared->srr0 = kvmppc_get_gpr(vcpu, rs);
+				break;
 			case SPRN_SRR1:
-				vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
+				vcpu->arch.shared->srr1 = kvmppc_get_gpr(vcpu, rs);
+				break;
 
 			/* XXX We need to context-switch the timebase for
 			 * watchdog and FIT. */
-- 
cgit v1.2.3


From a73a9599e03eef1324d5aeecaebc1b339d2e1664 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:47 +0200
Subject: KVM: PPC: Convert SPRG[0-4] to shared page

When in kernel mode there are 4 additional registers available that are
simple data storage. Instead of exiting to the hypervisor to read and
write those, we can just share them with the guest using the page.

This patch converts all users of the current field to the shared page.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  4 ----
 arch/powerpc/include/asm/kvm_para.h |  4 ++++
 arch/powerpc/kvm/book3s.c           | 16 ++++++++--------
 arch/powerpc/kvm/booke.c            | 16 ++++++++--------
 arch/powerpc/kvm/emulate.c          | 24 ++++++++++++++++--------
 5 files changed, 36 insertions(+), 28 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 5255d754f9a9..221cf85e9a6e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -217,10 +217,6 @@ struct kvm_vcpu_arch {
 	ulong guest_owned_ext;
 #endif
 	u32 mmucr;
-	ulong sprg0;
-	ulong sprg1;
-	ulong sprg2;
-	ulong sprg3;
 	ulong sprg4;
 	ulong sprg5;
 	ulong sprg6;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index d7fc6c2c9730..e402999ba193 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -23,6 +23,10 @@
 #include <linux/types.h>
 
 struct kvm_vcpu_arch_shared {
+	__u64 sprg0;
+	__u64 sprg1;
+	__u64 sprg2;
+	__u64 sprg3;
 	__u64 srr0;
 	__u64 srr1;
 	__u64 dar;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index afa0dd4a27f4..cfd7fe5c3a62 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1062,10 +1062,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->srr0 = vcpu->arch.shared->srr0;
 	regs->srr1 = vcpu->arch.shared->srr1;
 	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.sprg0;
-	regs->sprg1 = vcpu->arch.sprg1;
-	regs->sprg2 = vcpu->arch.sprg2;
-	regs->sprg3 = vcpu->arch.sprg3;
+	regs->sprg0 = vcpu->arch.shared->sprg0;
+	regs->sprg1 = vcpu->arch.shared->sprg1;
+	regs->sprg2 = vcpu->arch.shared->sprg2;
+	regs->sprg3 = vcpu->arch.shared->sprg3;
 	regs->sprg5 = vcpu->arch.sprg4;
 	regs->sprg6 = vcpu->arch.sprg5;
 	regs->sprg7 = vcpu->arch.sprg6;
@@ -1088,10 +1088,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.shared->srr0 = regs->srr0;
 	vcpu->arch.shared->srr1 = regs->srr1;
-	vcpu->arch.sprg0 = regs->sprg0;
-	vcpu->arch.sprg1 = regs->sprg1;
-	vcpu->arch.sprg2 = regs->sprg2;
-	vcpu->arch.sprg3 = regs->sprg3;
+	vcpu->arch.shared->sprg0 = regs->sprg0;
+	vcpu->arch.shared->sprg1 = regs->sprg1;
+	vcpu->arch.shared->sprg2 = regs->sprg2;
+	vcpu->arch.shared->sprg3 = regs->sprg3;
 	vcpu->arch.sprg5 = regs->sprg4;
 	vcpu->arch.sprg6 = regs->sprg5;
 	vcpu->arch.sprg7 = regs->sprg6;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 793df28b628d..b2c8c423c4d5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -495,10 +495,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->srr0 = vcpu->arch.shared->srr0;
 	regs->srr1 = vcpu->arch.shared->srr1;
 	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.sprg0;
-	regs->sprg1 = vcpu->arch.sprg1;
-	regs->sprg2 = vcpu->arch.sprg2;
-	regs->sprg3 = vcpu->arch.sprg3;
+	regs->sprg0 = vcpu->arch.shared->sprg0;
+	regs->sprg1 = vcpu->arch.shared->sprg1;
+	regs->sprg2 = vcpu->arch.shared->sprg2;
+	regs->sprg3 = vcpu->arch.shared->sprg3;
 	regs->sprg5 = vcpu->arch.sprg4;
 	regs->sprg6 = vcpu->arch.sprg5;
 	regs->sprg7 = vcpu->arch.sprg6;
@@ -521,10 +521,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_msr(vcpu, regs->msr);
 	vcpu->arch.shared->srr0 = regs->srr0;
 	vcpu->arch.shared->srr1 = regs->srr1;
-	vcpu->arch.sprg0 = regs->sprg0;
-	vcpu->arch.sprg1 = regs->sprg1;
-	vcpu->arch.sprg2 = regs->sprg2;
-	vcpu->arch.sprg3 = regs->sprg3;
+	vcpu->arch.shared->sprg0 = regs->sprg0;
+	vcpu->arch.shared->sprg1 = regs->sprg1;
+	vcpu->arch.shared->sprg2 = regs->sprg2;
+	vcpu->arch.shared->sprg3 = regs->sprg3;
 	vcpu->arch.sprg5 = regs->sprg4;
 	vcpu->arch.sprg6 = regs->sprg5;
 	vcpu->arch.sprg7 = regs->sprg6;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index ad0fa4ff4ea0..454869b5e91e 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -263,13 +263,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				kvmppc_set_gpr(vcpu, rt, get_tb()); break;
 
 			case SPRN_SPRG0:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg0);
+				break;
 			case SPRN_SPRG1:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg1);
+				break;
 			case SPRN_SPRG2:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg2);
+				break;
 			case SPRN_SPRG3:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
+				kvmppc_set_gpr(vcpu, rt, vcpu->arch.shared->sprg3);
+				break;
 			/* Note: SPRG4-7 are user-readable, so we don't get
 			 * a trap. */
 
@@ -341,13 +345,17 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 				break;
 
 			case SPRN_SPRG0:
-				vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
+				vcpu->arch.shared->sprg0 = kvmppc_get_gpr(vcpu, rs);
+				break;
 			case SPRN_SPRG1:
-				vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
+				vcpu->arch.shared->sprg1 = kvmppc_get_gpr(vcpu, rs);
+				break;
 			case SPRN_SPRG2:
-				vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
+				vcpu->arch.shared->sprg2 = kvmppc_get_gpr(vcpu, rs);
+				break;
 			case SPRN_SPRG3:
-				vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
+				vcpu->arch.shared->sprg3 = kvmppc_get_gpr(vcpu, rs);
+				break;
 
 			default:
 				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
-- 
cgit v1.2.3


From 2a342ed57756ad5d8af5456959433884367e5ab2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:48 +0200
Subject: KVM: PPC: Implement hypervisor interface

To communicate with KVM directly we need to plumb some sort of interface
between the guest and KVM. Usually those interfaces use hypercalls.

This hypercall implementation is described in the last patch of the series
in a special documentation file. Please read that for further information.

This patch implements stubs to handle KVM PPC hypercalls on the host and
guest side alike.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_para.h | 114 +++++++++++++++++++++++++++++++++++-
 arch/powerpc/include/asm/kvm_ppc.h  |   1 +
 arch/powerpc/kernel/Makefile        |   2 +
 arch/powerpc/kernel/kvm.c           |  68 +++++++++++++++++++++
 arch/powerpc/kvm/book3s.c           |   9 ++-
 arch/powerpc/kvm/booke.c            |  10 +++-
 arch/powerpc/kvm/powerpc.c          |  32 ++++++++++
 7 files changed, 232 insertions(+), 4 deletions(-)
 create mode 100644 arch/powerpc/kernel/kvm.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index e402999ba193..556fd59ee0f1 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -21,6 +21,7 @@
 #define __POWERPC_KVM_PARA_H__
 
 #include <linux/types.h>
+#include <linux/of.h>
 
 struct kvm_vcpu_arch_shared {
 	__u64 sprg0;
@@ -34,16 +35,127 @@ struct kvm_vcpu_arch_shared {
 	__u32 dsisr;
 };
 
+#define KVM_SC_MAGIC_R0		0x4b564d21 /* "KVM!" */
+#define HC_VENDOR_KVM		(42 << 16)
+#define HC_EV_SUCCESS		0
+#define HC_EV_UNIMPLEMENTED	12
+
 #ifdef __KERNEL__
 
+#ifdef CONFIG_KVM_GUEST
+
+static inline int kvm_para_available(void)
+{
+	struct device_node *hyper_node;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+		return 0;
+
+	return 1;
+}
+
+extern unsigned long kvm_hypercall(unsigned long *in,
+				   unsigned long *out,
+				   unsigned long nr);
+
+#else
+
 static inline int kvm_para_available(void)
 {
 	return 0;
 }
 
+static unsigned long kvm_hypercall(unsigned long *in,
+				   unsigned long *out,
+				   unsigned long nr)
+{
+	return HC_EV_UNIMPLEMENTED;
+}
+
+#endif
+
+static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+	unsigned long r;
+
+	r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	*r2 = out[0];
+
+	return r;
+}
+
+static inline long kvm_hypercall0(unsigned int nr)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
+				  unsigned long p2)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
+				  unsigned long p2, unsigned long p3)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
+				  unsigned long p2, unsigned long p3,
+				  unsigned long p4)
+{
+	unsigned long in[8];
+	unsigned long out[8];
+
+	in[0] = p1;
+	in[1] = p2;
+	in[2] = p3;
+	in[3] = p4;
+	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+}
+
+
 static inline unsigned int kvm_arch_para_features(void)
 {
-	return 0;
+	unsigned long r;
+
+	if (!kvm_para_available())
+		return 0;
+
+	if(kvm_hypercall0_1(KVM_HC_FEATURES, &r))
+		return 0;
+
+	return r;
 }
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 18d139ec2d22..ecb3bc74c344 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -107,6 +107,7 @@ extern int kvmppc_booke_init(void);
 extern void kvmppc_booke_exit(void);
 
 extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
+extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
 
 /*
  * Cuts out inst bits with ordering according to spec.
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1dda70129141..3a6955dc7191 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -127,6 +127,8 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
 obj-y				+= ppc_save_regs.o
 endif
 
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o
+
 # Disable GCOV in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 GCOV_PROFILE_ftrace.o := n
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
new file mode 100644
index 000000000000..4f85505e4653
--- /dev/null
+++ b/arch/powerpc/kernel/kvm.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2010 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <agraf@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/init.h>
+#include <linux/kvm_para.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/reg.h>
+#include <asm/kvm_ppc.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/disassemble.h>
+
+unsigned long kvm_hypercall(unsigned long *in,
+			    unsigned long *out,
+			    unsigned long nr)
+{
+	unsigned long register r0 asm("r0");
+	unsigned long register r3 asm("r3") = in[0];
+	unsigned long register r4 asm("r4") = in[1];
+	unsigned long register r5 asm("r5") = in[2];
+	unsigned long register r6 asm("r6") = in[3];
+	unsigned long register r7 asm("r7") = in[4];
+	unsigned long register r8 asm("r8") = in[5];
+	unsigned long register r9 asm("r9") = in[6];
+	unsigned long register r10 asm("r10") = in[7];
+	unsigned long register r11 asm("r11") = nr;
+	unsigned long register r12 asm("r12");
+
+	asm volatile("bl	kvm_hypercall_start"
+		     : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
+		       "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
+		       "=r"(r12)
+		     : "r"(r3), "r"(r4), "r"(r5), "r"(r6), "r"(r7), "r"(r8),
+		       "r"(r9), "r"(r10), "r"(r11)
+		     : "memory", "cc", "xer", "ctr", "lr");
+
+	out[0] = r4;
+	out[1] = r5;
+	out[2] = r6;
+	out[3] = r7;
+	out[4] = r8;
+	out[5] = r9;
+	out[6] = r10;
+	out[7] = r11;
+
+	return r3;
+}
+EXPORT_SYMBOL_GPL(kvm_hypercall);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index cfd7fe5c3a62..5cb5f0d9381f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -947,10 +947,10 @@ program_interrupt:
 		break;
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
-		// XXX make user settable
 		if (vcpu->arch.osi_enabled &&
 		    (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) &&
 		    (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) {
+			/* MOL hypercalls */
 			u64 *gprs = run->osi.gprs;
 			int i;
 
@@ -959,8 +959,13 @@ program_interrupt:
 				gprs[i] = kvmppc_get_gpr(vcpu, i);
 			vcpu->arch.osi_needed = 1;
 			r = RESUME_HOST_NV;
-
+		} else if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+			/* KVM PV hypercalls */
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+			r = RESUME_GUEST;
 		} else {
+			/* Guest syscalls */
 			vcpu->stat.syscall_exits++;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
 			r = RESUME_GUEST;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index b2c8c423c4d5..13e0747178e3 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -338,7 +338,15 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		break;
 
 	case BOOKE_INTERRUPT_SYSCALL:
-		kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		if (!(vcpu->arch.shared->msr & MSR_PR) &&
+		    (((u32)kvmppc_get_gpr(vcpu, 0)) == KVM_SC_MAGIC_R0)) {
+			/* KVM PV hypercalls */
+			kvmppc_set_gpr(vcpu, 3, kvmppc_kvm_pv(vcpu));
+			r = RESUME_GUEST;
+		} else {
+			/* Guest syscalls */
+			kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_SYSCALL);
+		}
 		kvmppc_account_exit(vcpu, SYSCALL_EXITS);
 		r = RESUME_GUEST;
 		break;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 22f6fa2982f2..a4cf4b47e232 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -42,6 +42,38 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 	       !!(v->arch.pending_exceptions);
 }
 
+int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
+{
+	int nr = kvmppc_get_gpr(vcpu, 11);
+	int r;
+	unsigned long __maybe_unused param1 = kvmppc_get_gpr(vcpu, 3);
+	unsigned long __maybe_unused param2 = kvmppc_get_gpr(vcpu, 4);
+	unsigned long __maybe_unused param3 = kvmppc_get_gpr(vcpu, 5);
+	unsigned long __maybe_unused param4 = kvmppc_get_gpr(vcpu, 6);
+	unsigned long r2 = 0;
+
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		/* 32 bit mode */
+		param1 &= 0xffffffff;
+		param2 &= 0xffffffff;
+		param3 &= 0xffffffff;
+		param4 &= 0xffffffff;
+	}
+
+	switch (nr) {
+	case HC_VENDOR_KVM | KVM_HC_FEATURES:
+		r = HC_EV_SUCCESS;
+
+		/* Second return value is in r4 */
+		kvmppc_set_gpr(vcpu, 4, r2);
+		break;
+	default:
+		r = HC_EV_UNIMPLEMENTED;
+		break;
+	}
+
+	return r;
+}
 
 int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-- 
cgit v1.2.3


From 5c6cedf488a1144ac4f683f3ea1a642533d1dcd2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:49 +0200
Subject: KVM: PPC: Add PV guest critical sections

When running in hooked code we need a way to disable interrupts without
clobbering any interrupts or exiting out to the hypervisor.

To achieve this, we have an additional critical field in the shared page. If
that field is equal to the r1 register of the guest, it tells the hypervisor
that we're in such a critical section and thus may not receive any interrupts.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_para.h |  1 +
 arch/powerpc/kvm/book3s.c           | 18 ++++++++++++++++--
 arch/powerpc/kvm/booke.c            | 15 +++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 556fd59ee0f1..4577e7b6dff1 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -24,6 +24,7 @@
 #include <linux/of.h>
 
 struct kvm_vcpu_arch_shared {
+	__u64 critical;		/* Guest may not get interrupts if == r1 */
 	__u64 sprg0;
 	__u64 sprg1;
 	__u64 sprg2;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 5cb5f0d9381f..d6227ff0ceae 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -251,14 +251,28 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 	int deliver = 1;
 	int vec = 0;
 	ulong flags = 0ULL;
+	ulong crit_raw = vcpu->arch.shared->critical;
+	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+	bool crit;
+
+	/* Truncate crit indicators in 32 bit mode */
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		crit_raw &= 0xffffffff;
+		crit_r1 &= 0xffffffff;
+	}
+
+	/* Critical section when crit == r1 */
+	crit = (crit_raw == crit_r1);
+	/* ... and we're in supervisor mode */
+	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
 
 	switch (priority) {
 	case BOOK3S_IRQPRIO_DECREMENTER:
-		deliver = vcpu->arch.shared->msr & MSR_EE;
+		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
 		vec = BOOK3S_INTERRUPT_DECREMENTER;
 		break;
 	case BOOK3S_IRQPRIO_EXTERNAL:
-		deliver = vcpu->arch.shared->msr & MSR_EE;
+		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
 		vec = BOOK3S_INTERRUPT_EXTERNAL;
 		break;
 	case BOOK3S_IRQPRIO_SYSTEM_RESET:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 13e0747178e3..104d0ee8c8aa 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -147,6 +147,20 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	int allowed = 0;
 	ulong uninitialized_var(msr_mask);
 	bool update_esr = false, update_dear = false;
+	ulong crit_raw = vcpu->arch.shared->critical;
+	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
+	bool crit;
+
+	/* Truncate crit indicators in 32 bit mode */
+	if (!(vcpu->arch.shared->msr & MSR_SF)) {
+		crit_raw &= 0xffffffff;
+		crit_r1 &= 0xffffffff;
+	}
+
+	/* Critical section when crit == r1 */
+	crit = (crit_raw == crit_r1);
+	/* ... and we're in supervisor mode */
+	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
 
 	switch (priority) {
 	case BOOKE_IRQPRIO_DTLB_MISS:
@@ -181,6 +195,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	case BOOKE_IRQPRIO_DECREMENTER:
 	case BOOKE_IRQPRIO_FIT:
 		allowed = vcpu->arch.shared->msr & MSR_EE;
+		allowed = allowed && !crit;
 		msr_mask = MSR_CE|MSR_ME|MSR_DE;
 		break;
 	case BOOKE_IRQPRIO_DEBUG:
-- 
cgit v1.2.3


From fad93fe1d452960eb838109222cc949eb77f2859 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:50 +0200
Subject: KVM: PPC: Add PV guest scratch registers

While running in hooked code we need to store register contents out because
we must not clobber any registers.

So let's add some fields to the shared page we can just happily write to.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_para.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 4577e7b6dff1..5be00c9533d2 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -24,6 +24,9 @@
 #include <linux/of.h>
 
 struct kvm_vcpu_arch_shared {
+	__u64 scratch1;
+	__u64 scratch2;
+	__u64 scratch3;
 	__u64 critical;		/* Guest may not get interrupts if == r1 */
 	__u64 sprg0;
 	__u64 sprg1;
-- 
cgit v1.2.3


From 90bba358873dc96a6746f0df453a0a8ca3d6b86e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:51 +0200
Subject: KVM: PPC: Tell guest about pending interrupts

When the guest turns on interrupts again, it needs to know if we have an
interrupt pending for it. Because if so, it should rather get out of guest
context and get the interrupt.

So we introduce a new field in the shared page that we use to tell the guest
that there's a pending interrupt lying around.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_para.h | 1 +
 arch/powerpc/kvm/book3s.c           | 7 +++++++
 arch/powerpc/kvm/booke.c            | 7 +++++++
 3 files changed, 15 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 5be00c9533d2..0653b0d238b4 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -37,6 +37,7 @@ struct kvm_vcpu_arch_shared {
 	__u64 dar;
 	__u64 msr;
 	__u32 dsisr;
+	__u32 int_pending;	/* Tells the guest if we have an interrupt */
 };
 
 #define KVM_SC_MAGIC_R0		0x4b564d21 /* "KVM!" */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index d6227ff0ceae..06229fec5c9f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -337,6 +337,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned long old_pending = vcpu->arch.pending_exceptions;
 	unsigned int priority;
 
 #ifdef EXIT_DEBUG
@@ -356,6 +357,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 					 BITS_PER_BYTE * sizeof(*pending),
 					 priority + 1);
 	}
+
+	/* Tell the guest about our interrupt status */
+	if (*pending)
+		vcpu->arch.shared->int_pending = 1;
+	else if (old_pending)
+		vcpu->arch.shared->int_pending = 0;
 }
 
 void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 104d0ee8c8aa..c604277011a6 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -224,6 +224,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
+	unsigned long old_pending = vcpu->arch.pending_exceptions;
 	unsigned int priority;
 
 	priority = __ffs(*pending);
@@ -235,6 +236,12 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 		                         BITS_PER_BYTE * sizeof(*pending),
 		                         priority + 1);
 	}
+
+	/* Tell the guest about our interrupt status */
+	if (*pending)
+		vcpu->arch.shared->int_pending = 1;
+	else if (old_pending)
+		vcpu->arch.shared->int_pending = 0;
 }
 
 /**
-- 
cgit v1.2.3


From 28e83b4fa7f8bd114940fa933ac8cbe80969eba2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:52 +0200
Subject: KVM: PPC: Make PAM a define

On PowerPC it's very normal to not support all of the physical RAM in real mode.
To check if we're matching on the shared page or not, we need to know the limits
so we can restrain ourselves to that range.

So let's make it a define instead of open-coding it. And while at it, let's also
increase it.

Signed-off-by: Alexander Graf <agraf@suse.de>

v2 -> v3:

  - RMO -> PAM (non-magic page)
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 3 +++
 arch/powerpc/kvm/book3s.c           | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 221cf85e9a6e..1674da8134cb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -48,6 +48,9 @@
 #define HPTEG_HASH_NUM_VPTE		(1 << HPTEG_HASH_BITS_VPTE)
 #define HPTEG_HASH_NUM_VPTE_LONG	(1 << HPTEG_HASH_BITS_VPTE_LONG)
 
+/* Physical Address Mask - allowed range of real mode RAM access */
+#define KVM_PAM			0x0fffffffffffffffULL
+
 struct kvm;
 struct kvm_run;
 struct kvm_vcpu;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 06229fec5c9f..0ed5376df82c 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -465,7 +465,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 		r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
 	} else {
 		pte->eaddr = eaddr;
-		pte->raddr = eaddr & 0xffffffff;
+		pte->raddr = eaddr & KVM_PAM;
 		pte->vpage = VSID_REAL | eaddr >> 12;
 		pte->may_read = true;
 		pte->may_write = true;
@@ -579,7 +579,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.may_execute = true;
 		pte.may_read = true;
 		pte.may_write = true;
-		pte.raddr = eaddr & 0xffffffff;
+		pte.raddr = eaddr & KVM_PAM;
 		pte.eaddr = eaddr;
 		pte.vpage = eaddr >> 12;
 	}
-- 
cgit v1.2.3


From beb03f14da9ceff76ff08cbb8af064b52dc21f7e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:53 +0200
Subject: KVM: PPC: First magic page steps

We will be introducing a method to project the shared page in guest context.
As soon as we're talking about this coupling, the shared page is colled magic
page.

This patch introduces simple defines, so the follow-up patches are easier to
read.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1674da8134cb..e1da77579e65 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -287,6 +287,8 @@ struct kvm_vcpu_arch {
 	u64 dec_jiffies;
 	unsigned long pending_exceptions;
 	struct kvm_vcpu_arch_shared *shared;
+	unsigned long magic_page_pa; /* phys addr to map the magic page to */
+	unsigned long magic_page_ea; /* effect. addr to map the magic page to */
 
 #ifdef CONFIG_PPC_BOOK3S
 	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
-- 
cgit v1.2.3


From e8508940a88691ad3d1c46608cd968eb4be9cbc5 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:54 +0200
Subject: KVM: PPC: Magic Page Book3s support

We need to override EA as well as PA lookups for the magic page. When the guest
tells us to project it, the magic page overrides any guest mappings.

In order to reflect that, we need to hook into all the MMU layers of KVM to
force map the magic page if necessary.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s.c             | 35 ++++++++++++++++++++++++++++++++---
 arch/powerpc/kvm/book3s_32_mmu.c      | 16 ++++++++++++++++
 arch/powerpc/kvm/book3s_32_mmu_host.c |  2 +-
 arch/powerpc/kvm/book3s_64_mmu.c      | 30 +++++++++++++++++++++++++++++-
 arch/powerpc/kvm/book3s_64_mmu_host.c |  9 ++-------
 6 files changed, 81 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b5b196166455..00cf8b07e502 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -130,6 +130,7 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
+extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 extern u32 kvmppc_trampoline_lowmem;
 extern u32 kvmppc_trampoline_enter;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 0ed5376df82c..eee97b5a7400 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -419,6 +419,25 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 	}
 }
 
+pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+	ulong mp_pa = vcpu->arch.magic_page_pa;
+
+	/* Magic page override */
+	if (unlikely(mp_pa) &&
+	    unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
+		     ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+		ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
+		pfn_t pfn;
+
+		pfn = (pfn_t)virt_to_phys((void*)shared_page) >> PAGE_SHIFT;
+		get_page(pfn_to_page(pfn));
+		return pfn;
+	}
+
+	return gfn_to_pfn(vcpu->kvm, gfn);
+}
+
 /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To
  * make Book3s_32 Linux work on Book3s_64, we have to make sure we trap dcbz to
  * emulate 32 bytes dcbz length.
@@ -554,6 +573,13 @@ mmio:
 
 static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
 {
+	ulong mp_pa = vcpu->arch.magic_page_pa;
+
+	if (unlikely(mp_pa) &&
+	    unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+		return 1;
+	}
+
 	return kvm_is_visible_gfn(vcpu->kvm, gfn);
 }
 
@@ -1257,6 +1283,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	struct kvmppc_vcpu_book3s *vcpu_book3s;
 	struct kvm_vcpu *vcpu;
 	int err = -ENOMEM;
+	unsigned long p;
 
 	vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s));
 	if (!vcpu_book3s)
@@ -1274,8 +1301,10 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	if (err)
 		goto free_shadow_vcpu;
 
-	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-	if (!vcpu->arch.shared)
+	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
+	/* the real shared page fills the last 4k of our page */
+	vcpu->arch.shared = (void*)(p + PAGE_SIZE - 4096);
+	if (!p)
 		goto uninit_vcpu;
 
 	vcpu->arch.host_retip = kvm_return_point;
@@ -1322,7 +1351,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 
-	free_page((unsigned long)vcpu->arch.shared);
+	free_page((unsigned long)vcpu->arch.shared & PAGE_MASK);
 	kvm_vcpu_uninit(vcpu);
 	kfree(vcpu_book3s->shadow_vcpu);
 	vfree(vcpu_book3s);
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 449bce5f021a..a7d121adc842 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -281,8 +281,24 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 				      struct kvmppc_pte *pte, bool data)
 {
 	int r;
+	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 	pte->eaddr = eaddr;
+
+	/* Magic page override */
+	if (unlikely(mp_ea) &&
+	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+	    !(vcpu->arch.shared->msr & MSR_PR)) {
+		pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
+		pte->raddr = vcpu->arch.magic_page_pa | (pte->raddr & 0xfff);
+		pte->raddr &= KVM_PAM;
+		pte->may_execute = true;
+		pte->may_read = true;
+		pte->may_write = true;
+
+		return 0;
+	}
+
 	r = kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, pte, data);
 	if (r < 0)
 	       r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte, data, true);
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 67b8c38d932f..05e8c9eb0e16 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -147,7 +147,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	struct hpte_cache *pte;
 
 	/* Get host physical address for gpa */
-	hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
 	if (kvm_is_error_hva(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
 				 orig_pte->eaddr);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 58aa8409dae0..d7889ef3211e 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -163,6 +163,22 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	bool found = false;
 	bool perm_err = false;
 	int second = 0;
+	ulong mp_ea = vcpu->arch.magic_page_ea;
+
+	/* Magic page override */
+	if (unlikely(mp_ea) &&
+	    unlikely((eaddr & ~0xfffULL) == (mp_ea & ~0xfffULL)) &&
+	    !(vcpu->arch.shared->msr & MSR_PR)) {
+		gpte->eaddr = eaddr;
+		gpte->vpage = kvmppc_mmu_book3s_64_ea_to_vp(vcpu, eaddr, data);
+		gpte->raddr = vcpu->arch.magic_page_pa | (gpte->raddr & 0xfff);
+		gpte->raddr &= KVM_PAM;
+		gpte->may_execute = true;
+		gpte->may_read = true;
+		gpte->may_write = true;
+
+		return 0;
+	}
 
 	slbe = kvmppc_mmu_book3s_64_find_slbe(vcpu_book3s, eaddr);
 	if (!slbe)
@@ -445,6 +461,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	ulong ea = esid << SID_SHIFT;
 	struct kvmppc_slb *slb;
 	u64 gvsid = esid;
+	ulong mp_ea = vcpu->arch.magic_page_ea;
 
 	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 		slb = kvmppc_mmu_book3s_64_find_slbe(to_book3s(vcpu), ea);
@@ -464,7 +481,7 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		break;
 	case MSR_DR|MSR_IR:
 		if (!slb)
-			return -ENOENT;
+			goto no_slb;
 
 		*vsid = gvsid;
 		break;
@@ -477,6 +494,17 @@ static int kvmppc_mmu_book3s_64_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		*vsid |= VSID_PR;
 
 	return 0;
+
+no_slb:
+	/* Catch magic page case */
+	if (unlikely(mp_ea) &&
+	    unlikely(esid == (mp_ea >> SID_SHIFT)) &&
+	    !(vcpu->arch.shared->msr & MSR_PR)) {
+		*vsid = VSID_REAL | esid;
+		return 0;
+	}
+
+	return -EINVAL;
 }
 
 static bool kvmppc_mmu_book3s_64_is_dcbz32(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 71c1f9027abb..6cdd19a82bda 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -101,18 +101,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 	struct kvmppc_sid_map *map;
 
 	/* Get host physical address for gpa */
-	hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
 	if (kvm_is_error_hva(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
 		return -EINVAL;
 	}
 	hpaddr <<= PAGE_SHIFT;
-#if PAGE_SHIFT == 12
-#elif PAGE_SHIFT == 16
-	hpaddr |= orig_pte->raddr & 0xf000;
-#else
-#error Unknown page size
-#endif
+	hpaddr |= orig_pte->raddr & (~0xfffULL & ~PAGE_MASK);
 
 	/* and write the mapping ea -> hpa into the pt */
 	vcpu->arch.mmu.esid_to_vsid(vcpu, orig_pte->eaddr >> SID_SHIFT, &vsid);
-- 
cgit v1.2.3


From 5fc87407b55f5799418f4dc5931232c2bc06d077 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:55 +0200
Subject: KVM: PPC: Expose magic page support to guest

Now that we have the shared page in place and the MMU code knows about
the magic page, we can expose that capability to the guest!

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_para.h |  2 ++
 arch/powerpc/kvm/powerpc.c          | 11 +++++++++++
 2 files changed, 13 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 0653b0d238b4..7438ab360120 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -45,6 +45,8 @@ struct kvm_vcpu_arch_shared {
 #define HC_EV_SUCCESS		0
 #define HC_EV_UNIMPLEMENTED	12
 
+#define KVM_FEATURE_MAGIC_PAGE	1
+
 #ifdef __KERNEL__
 
 #ifdef CONFIG_KVM_GUEST
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a4cf4b47e232..fecfe043458d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -61,8 +61,19 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	}
 
 	switch (nr) {
+	case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+	{
+		vcpu->arch.magic_page_pa = param1;
+		vcpu->arch.magic_page_ea = param2;
+
+		r = HC_EV_SUCCESS;
+		break;
+	}
 	case HC_VENDOR_KVM | KVM_HC_FEATURES:
 		r = HC_EV_SUCCESS;
+#if defined(CONFIG_PPC_BOOK3S) /* XXX Missing magic page on BookE */
+		r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
+#endif
 
 		/* Second return value is in r4 */
 		kvmppc_set_gpr(vcpu, 4, r2);
-- 
cgit v1.2.3


From d17051cb8d223dffd6bb847b0565ef1654f8e0e1 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:57 +0200
Subject: KVM: PPC: Generic KVM PV guest support

We have all the hypervisor pieces in place now, but the guest parts are still
missing.

This patch implements basic awareness of KVM when running Linux as guest. It
doesn't do anything with it yet though.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/Makefile      |  2 +-
 arch/powerpc/kernel/asm-offsets.c | 15 +++++++++++++++
 arch/powerpc/kernel/kvm.c         |  3 +++
 arch/powerpc/kernel/kvm_emul.S    | 36 ++++++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/Kconfig    | 10 ++++++++++
 5 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/kvm_emul.S

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 3a6955dc7191..be257b0aae36 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -127,7 +127,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
 obj-y				+= ppc_save_regs.o
 endif
 
-obj-$(CONFIG_KVM_GUEST)		+= kvm.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvm_emul.o
 
 # Disable GCOV in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 1221bcdff52f..37486cafb69d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -465,6 +465,21 @@ int main(void)
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
 #endif /* CONFIG_PPC_BOOK3S */
 #endif
+
+#ifdef CONFIG_KVM_GUEST
+	DEFINE(KVM_MAGIC_SCRATCH1, offsetof(struct kvm_vcpu_arch_shared,
+					    scratch1));
+	DEFINE(KVM_MAGIC_SCRATCH2, offsetof(struct kvm_vcpu_arch_shared,
+					    scratch2));
+	DEFINE(KVM_MAGIC_SCRATCH3, offsetof(struct kvm_vcpu_arch_shared,
+					    scratch3));
+	DEFINE(KVM_MAGIC_INT, offsetof(struct kvm_vcpu_arch_shared,
+				       int_pending));
+	DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
+	DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
+					    critical));
+#endif
+
 #ifdef CONFIG_44x
 	DEFINE(PGD_T_LOG2, PGD_T_LOG2);
 	DEFINE(PTE_T_LOG2, PTE_T_LOG2);
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 4f85505e4653..a5ece71ecdd2 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -30,6 +30,9 @@
 #include <asm/cacheflush.h>
 #include <asm/disassemble.h>
 
+#define KVM_MAGIC_PAGE		(-4096L)
+#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
+
 unsigned long kvm_hypercall(unsigned long *in,
 			    unsigned long *out,
 			    unsigned long nr)
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
new file mode 100644
index 000000000000..5cfa2aeeecb0
--- /dev/null
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -0,0 +1,36 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright SUSE Linux Products GmbH 2010
+ *
+ * Authors: Alexander Graf <agraf@suse.de>
+ */
+
+#include <asm/ppc_asm.h>
+#include <asm/kvm_asm.h>
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/asm-offsets.h>
+
+/* Hypercall entry point. Will be patched with device tree instructions. */
+
+.global kvm_hypercall_start
+kvm_hypercall_start:
+	li	r3, -1
+	nop
+	nop
+	nop
+	blr
+
+#define KVM_MAGIC_PAGE		(-4096)
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 81c9208025fa..956154f32cfe 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -21,6 +21,16 @@ source "arch/powerpc/platforms/44x/Kconfig"
 source "arch/powerpc/platforms/40x/Kconfig"
 source "arch/powerpc/platforms/amigaone/Kconfig"
 
+config KVM_GUEST
+	bool "KVM Guest support"
+	default y
+	---help---
+	  This option enables various optimizations for running under the KVM
+	  hypervisor. Overhead for the kernel when not running inside KVM should
+	  be minimal.
+
+	  In case of doubt, say Y
+
 config PPC_NATIVE
 	bool
 	depends on 6xx || PPC64
-- 
cgit v1.2.3


From 73a18109829e7696226a9fd4062d339e7c6ee130 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:58 +0200
Subject: KVM: PPC: KVM PV guest stubs

We will soon start and replace instructions from the text section with
other, paravirtualized versions. To ease the readability of those patches
I split out the generic looping and magic page mapping code out.

This patch still only contains stubs. But at least it loops through the
text section :).

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index a5ece71ecdd2..e93366fbbd21 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -33,6 +33,62 @@
 #define KVM_MAGIC_PAGE		(-4096L)
 #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
 
+#define KVM_MASK_RT		0x03e00000
+
+static bool kvm_patching_worked = true;
+
+static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+{
+	*inst = new_inst;
+	flush_icache_range((ulong)inst, (ulong)inst + 4);
+}
+
+static void kvm_map_magic_page(void *data)
+{
+	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
+		       KVM_MAGIC_PAGE,  /* Physical Address */
+		       KVM_MAGIC_PAGE); /* Effective Address */
+}
+
+static void kvm_check_ins(u32 *inst)
+{
+	u32 _inst = *inst;
+	u32 inst_no_rt = _inst & ~KVM_MASK_RT;
+	u32 inst_rt = _inst & KVM_MASK_RT;
+
+	switch (inst_no_rt) {
+	}
+
+	switch (_inst) {
+	}
+}
+
+static void kvm_use_magic_page(void)
+{
+	u32 *p;
+	u32 *start, *end;
+	u32 tmp;
+
+	/* Tell the host to map the magic page to -4096 on all CPUs */
+	on_each_cpu(kvm_map_magic_page, NULL, 1);
+
+	/* Quick self-test to see if the mapping works */
+	if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Now loop through all code and find instructions */
+	start = (void*)_stext;
+	end = (void*)_etext;
+
+	for (p = start; p < end; p++)
+		kvm_check_ins(p);
+
+	printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
+			 kvm_patching_worked ? "worked" : "failed");
+}
+
 unsigned long kvm_hypercall(unsigned long *in,
 			    unsigned long *out,
 			    unsigned long nr)
@@ -69,3 +125,42 @@ unsigned long kvm_hypercall(unsigned long *in,
 	return r3;
 }
 EXPORT_SYMBOL_GPL(kvm_hypercall);
+
+static int kvm_para_setup(void)
+{
+	extern u32 kvm_hypercall_start;
+	struct device_node *hyper_node;
+	u32 *insts;
+	int len, i;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return -1;
+
+	insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
+	if (len % 4)
+		return -1;
+	if (len > (4 * 4))
+		return -1;
+
+	for (i = 0; i < (len / 4); i++)
+		kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
+
+	return 0;
+}
+
+static int __init kvm_guest_init(void)
+{
+	if (!kvm_para_available())
+		return 0;
+
+	if (kvm_para_setup())
+		return 0;
+
+	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
+		kvm_use_magic_page();
+
+	return 0;
+}
+
+postcore_initcall(kvm_guest_init);
-- 
cgit v1.2.3


From d1293c927568f5b5b8dd3fa263a98683cf8556dc Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:47:59 +0200
Subject: KVM: PPC: PV instructions to loads and stores

Some instructions can simply be replaced by load and store instructions to
or from the magic page.

This patch replaces often called instructions that fall into the above category.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index e93366fbbd21..9ec572c4d2a5 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -33,7 +33,34 @@
 #define KVM_MAGIC_PAGE		(-4096L)
 #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
 
+#define KVM_INST_LWZ		0x80000000
+#define KVM_INST_STW		0x90000000
+#define KVM_INST_LD		0xe8000000
+#define KVM_INST_STD		0xf8000000
+#define KVM_INST_NOP		0x60000000
+#define KVM_INST_B		0x48000000
+#define KVM_INST_B_MASK		0x03ffffff
+#define KVM_INST_B_MAX		0x01ffffff
+
 #define KVM_MASK_RT		0x03e00000
+#define KVM_INST_MFMSR		0x7c0000a6
+#define KVM_INST_MFSPR_SPRG0	0x7c1042a6
+#define KVM_INST_MFSPR_SPRG1	0x7c1142a6
+#define KVM_INST_MFSPR_SPRG2	0x7c1242a6
+#define KVM_INST_MFSPR_SPRG3	0x7c1342a6
+#define KVM_INST_MFSPR_SRR0	0x7c1a02a6
+#define KVM_INST_MFSPR_SRR1	0x7c1b02a6
+#define KVM_INST_MFSPR_DAR	0x7c1302a6
+#define KVM_INST_MFSPR_DSISR	0x7c1202a6
+
+#define KVM_INST_MTSPR_SPRG0	0x7c1043a6
+#define KVM_INST_MTSPR_SPRG1	0x7c1143a6
+#define KVM_INST_MTSPR_SPRG2	0x7c1243a6
+#define KVM_INST_MTSPR_SPRG3	0x7c1343a6
+#define KVM_INST_MTSPR_SRR0	0x7c1a03a6
+#define KVM_INST_MTSPR_SRR1	0x7c1b03a6
+#define KVM_INST_MTSPR_DAR	0x7c1303a6
+#define KVM_INST_MTSPR_DSISR	0x7c1203a6
 
 static bool kvm_patching_worked = true;
 
@@ -43,6 +70,34 @@ static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
 	flush_icache_range((ulong)inst, (ulong)inst + 4);
 }
 
+static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+	kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+	kvm_patch_ins(inst, KVM_INST_LWZ | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+{
+	kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
+}
+
+static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+	kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
+#else
+	kvm_patch_ins(inst, KVM_INST_STW | rt | ((addr + 4) & 0x0000fffc));
+#endif
+}
+
+static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+{
+	kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
+}
+
 static void kvm_map_magic_page(void *data)
 {
 	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
@@ -57,6 +112,60 @@ static void kvm_check_ins(u32 *inst)
 	u32 inst_rt = _inst & KVM_MASK_RT;
 
 	switch (inst_no_rt) {
+	/* Loads */
+	case KVM_INST_MFMSR:
+		kvm_patch_ins_ld(inst, magic_var(msr), inst_rt);
+		break;
+	case KVM_INST_MFSPR_SPRG0:
+		kvm_patch_ins_ld(inst, magic_var(sprg0), inst_rt);
+		break;
+	case KVM_INST_MFSPR_SPRG1:
+		kvm_patch_ins_ld(inst, magic_var(sprg1), inst_rt);
+		break;
+	case KVM_INST_MFSPR_SPRG2:
+		kvm_patch_ins_ld(inst, magic_var(sprg2), inst_rt);
+		break;
+	case KVM_INST_MFSPR_SPRG3:
+		kvm_patch_ins_ld(inst, magic_var(sprg3), inst_rt);
+		break;
+	case KVM_INST_MFSPR_SRR0:
+		kvm_patch_ins_ld(inst, magic_var(srr0), inst_rt);
+		break;
+	case KVM_INST_MFSPR_SRR1:
+		kvm_patch_ins_ld(inst, magic_var(srr1), inst_rt);
+		break;
+	case KVM_INST_MFSPR_DAR:
+		kvm_patch_ins_ld(inst, magic_var(dar), inst_rt);
+		break;
+	case KVM_INST_MFSPR_DSISR:
+		kvm_patch_ins_lwz(inst, magic_var(dsisr), inst_rt);
+		break;
+
+	/* Stores */
+	case KVM_INST_MTSPR_SPRG0:
+		kvm_patch_ins_std(inst, magic_var(sprg0), inst_rt);
+		break;
+	case KVM_INST_MTSPR_SPRG1:
+		kvm_patch_ins_std(inst, magic_var(sprg1), inst_rt);
+		break;
+	case KVM_INST_MTSPR_SPRG2:
+		kvm_patch_ins_std(inst, magic_var(sprg2), inst_rt);
+		break;
+	case KVM_INST_MTSPR_SPRG3:
+		kvm_patch_ins_std(inst, magic_var(sprg3), inst_rt);
+		break;
+	case KVM_INST_MTSPR_SRR0:
+		kvm_patch_ins_std(inst, magic_var(srr0), inst_rt);
+		break;
+	case KVM_INST_MTSPR_SRR1:
+		kvm_patch_ins_std(inst, magic_var(srr1), inst_rt);
+		break;
+	case KVM_INST_MTSPR_DAR:
+		kvm_patch_ins_std(inst, magic_var(dar), inst_rt);
+		break;
+	case KVM_INST_MTSPR_DSISR:
+		kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
+		break;
 	}
 
 	switch (_inst) {
-- 
cgit v1.2.3


From d1290b15e7f139e24150cc6e6d8e904214359e8a Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:00 +0200
Subject: KVM: PPC: PV tlbsync to nop

With our current MMU scheme we don't need to know about the tlbsync instruction.
So we can just nop it out.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 9ec572c4d2a5..3258922cc22c 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -62,6 +62,8 @@
 #define KVM_INST_MTSPR_DAR	0x7c1303a6
 #define KVM_INST_MTSPR_DSISR	0x7c1203a6
 
+#define KVM_INST_TLBSYNC	0x7c00046c
+
 static bool kvm_patching_worked = true;
 
 static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
@@ -98,6 +100,11 @@ static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
 	kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
 }
 
+static void kvm_patch_ins_nop(u32 *inst)
+{
+	kvm_patch_ins(inst, KVM_INST_NOP);
+}
+
 static void kvm_map_magic_page(void *data)
 {
 	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
@@ -166,6 +173,11 @@ static void kvm_check_ins(u32 *inst)
 	case KVM_INST_MTSPR_DSISR:
 		kvm_patch_ins_stw(inst, magic_var(dsisr), inst_rt);
 		break;
+
+	/* Nops */
+	case KVM_INST_TLBSYNC:
+		kvm_patch_ins_nop(inst);
+		break;
 	}
 
 	switch (_inst) {
-- 
cgit v1.2.3


From 2d4f567103ff5a931e773f2e356b4eb303115deb Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:01 +0200
Subject: KVM: PPC: Introduce kvm_tmp framework

We will soon require more sophisticated methods to replace single instructions
with multiple instructions. We do that by branching to a memory region where we
write replacement code for the instruction to.

This region needs to be within 32 MB of the patched instruction though, because
that's the furthest we can jump with immediate branches.

So we keep 1MB of free space around in bss. After we're done initing we can just
tell the mm system that the unused pages are free, but until then we have enough
space to fit all our code in.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c | 42 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 40 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 3258922cc22c..926f93fd722d 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -65,6 +65,8 @@
 #define KVM_INST_TLBSYNC	0x7c00046c
 
 static bool kvm_patching_worked = true;
+static char kvm_tmp[1024 * 1024];
+static int kvm_tmp_index;
 
 static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
 {
@@ -105,6 +107,23 @@ static void kvm_patch_ins_nop(u32 *inst)
 	kvm_patch_ins(inst, KVM_INST_NOP);
 }
 
+static u32 *kvm_alloc(int len)
+{
+	u32 *p;
+
+	if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+		printk(KERN_ERR "KVM: No more space (%d + %d)\n",
+				kvm_tmp_index, len);
+		kvm_patching_worked = false;
+		return NULL;
+	}
+
+	p = (void*)&kvm_tmp[kvm_tmp_index];
+	kvm_tmp_index += len;
+
+	return p;
+}
+
 static void kvm_map_magic_page(void *data)
 {
 	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
@@ -270,17 +289,36 @@ static int kvm_para_setup(void)
 	return 0;
 }
 
+static __init void kvm_free_tmp(void)
+{
+	unsigned long start, end;
+
+	start = (ulong)&kvm_tmp[kvm_tmp_index + (PAGE_SIZE - 1)] & PAGE_MASK;
+	end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
+
+	/* Free the tmp space we don't need */
+	for (; start < end; start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(start));
+		init_page_count(virt_to_page(start));
+		free_page(start);
+		totalram_pages++;
+	}
+}
+
 static int __init kvm_guest_init(void)
 {
 	if (!kvm_para_available())
-		return 0;
+		goto free_tmp;
 
 	if (kvm_para_setup())
-		return 0;
+		goto free_tmp;
 
 	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
 		kvm_use_magic_page();
 
+free_tmp:
+	kvm_free_tmp();
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 71ee8e34fe26252b11668a95708783ec9c58cbda Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:02 +0200
Subject: KVM: PPC: Introduce branch patching helper

We will need to patch several instruction streams over to a different
code path, so we need a way to patch a single instruction with a branch
somewhere else.

This patch adds a helper to facilitate this patching.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 926f93fd722d..239a70d750a2 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -107,6 +107,20 @@ static void kvm_patch_ins_nop(u32 *inst)
 	kvm_patch_ins(inst, KVM_INST_NOP);
 }
 
+static void kvm_patch_ins_b(u32 *inst, int addr)
+{
+#ifdef CONFIG_RELOCATABLE
+	/* On relocatable kernels interrupts handlers and our code
+	   can be in different regions, so we don't patch them */
+
+	extern u32 __end_interrupts;
+	if ((ulong)inst < (ulong)&__end_interrupts)
+		return;
+#endif
+
+	kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
+}
+
 static u32 *kvm_alloc(int len)
 {
 	u32 *p;
-- 
cgit v1.2.3


From 92234722ed631f472f1c4d79d35d8e5cf6910002 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:03 +0200
Subject: KVM: PPC: PV assembler helpers

When we hook an instruction we need to make sure we don't clobber any of
the registers at that point. So we write them out to scratch space in the
magic page. To make sure we don't fall into a race with another piece of
hooked code, we need to disable interrupts.

To make the later patches and code in general easier readable, let's introduce
a set of defines that save and restore r30, r31 and cr. Let's also define some
helpers to read the lower 32 bits of a 64 bit field on 32 bit systems.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm_emul.S | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index 5cfa2aeeecb0..1dac72dd6f6e 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -34,3 +34,33 @@ kvm_hypercall_start:
 	blr
 
 #define KVM_MAGIC_PAGE		(-4096)
+
+#ifdef CONFIG_64BIT
+#define LL64(reg, offs, reg2)	ld	reg, (offs)(reg2)
+#define STL64(reg, offs, reg2)	std	reg, (offs)(reg2)
+#else
+#define LL64(reg, offs, reg2)	lwz	reg, (offs + 4)(reg2)
+#define STL64(reg, offs, reg2)	stw	reg, (offs + 4)(reg2)
+#endif
+
+#define SCRATCH_SAVE							\
+	/* Enable critical section. We are critical if			\
+	   shared->critical == r1 */					\
+	STL64(r1, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);		\
+									\
+	/* Save state */						\
+	PPC_STL	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0);		\
+	PPC_STL	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0);		\
+	mfcr	r31;							\
+	stw	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);
+
+#define SCRATCH_RESTORE							\
+	/* Restore state */						\
+	PPC_LL	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH1)(0);		\
+	lwz	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH3)(0);		\
+	mtcr	r30;							\
+	PPC_LL	r30, (KVM_MAGIC_PAGE + KVM_MAGIC_SCRATCH2)(0);		\
+									\
+	/* Disable critical section. We are critical if			\
+	   shared->critical == r1 and r2 is always != r1 */		\
+	STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
-- 
cgit v1.2.3


From 819a63dc792b0888edd3eda306a9e1e049dcbb1c Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:04 +0200
Subject: KVM: PPC: PV mtmsrd L=1

The PowerPC ISA has a special instruction for mtmsr that only changes the EE
and RI bits, namely the L=1 form.

Since that one is reasonably often occuring and simple to implement, let's
go with this first. Writing EE=0 is always just a store. Doing EE=1 also
requires us to check for pending interrupts and if necessary exit back to the
hypervisor.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c      | 45 +++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/kvm_emul.S | 56 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 239a70d750a2..717ab0dded25 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -63,6 +63,7 @@
 #define KVM_INST_MTSPR_DSISR	0x7c1203a6
 
 #define KVM_INST_TLBSYNC	0x7c00046c
+#define KVM_INST_MTMSRD_L1	0x7c010164
 
 static bool kvm_patching_worked = true;
 static char kvm_tmp[1024 * 1024];
@@ -138,6 +139,43 @@ static u32 *kvm_alloc(int len)
 	return p;
 }
 
+extern u32 kvm_emulate_mtmsrd_branch_offs;
+extern u32 kvm_emulate_mtmsrd_reg_offs;
+extern u32 kvm_emulate_mtmsrd_len;
+extern u32 kvm_emulate_mtmsrd[];
+
+static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_mtmsrd_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsrd_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
+	p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
 static void kvm_map_magic_page(void *data)
 {
 	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
@@ -211,6 +249,13 @@ static void kvm_check_ins(u32 *inst)
 	case KVM_INST_TLBSYNC:
 		kvm_patch_ins_nop(inst);
 		break;
+
+	/* Rewrites */
+	case KVM_INST_MTMSRD_L1:
+		/* We use r30 and r31 during the hook */
+		if (get_rt(inst_rt) < 30)
+			kvm_patch_ins_mtmsrd(inst, inst_rt);
+		break;
 	}
 
 	switch (_inst) {
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index 1dac72dd6f6e..10dc4a6632fd 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -64,3 +64,59 @@ kvm_hypercall_start:
 	/* Disable critical section. We are critical if			\
 	   shared->critical == r1 and r2 is always != r1 */		\
 	STL64(r2, KVM_MAGIC_PAGE + KVM_MAGIC_CRITICAL, 0);
+
+.global kvm_emulate_mtmsrd
+kvm_emulate_mtmsrd:
+
+	SCRATCH_SAVE
+
+	/* Put MSR & ~(MSR_EE|MSR_RI) in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+	lis	r30, (~(MSR_EE | MSR_RI))@h
+	ori	r30, r30, (~(MSR_EE | MSR_RI))@l
+	and	r31, r31, r30
+
+	/* OR the register's (MSR_EE|MSR_RI) on MSR */
+kvm_emulate_mtmsrd_reg:
+	andi.	r30, r0, (MSR_EE|MSR_RI)
+	or	r31, r31, r30
+
+	/* Put MSR back into magic page */
+	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Check if we have to fetch an interrupt */
+	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+	cmpwi	r31, 0
+	beq+	no_check
+
+	/* Check if we may trigger an interrupt */
+	andi.	r30, r30, MSR_EE
+	beq	no_check
+
+	SCRATCH_RESTORE
+
+	/* Nag hypervisor */
+	tlbsync
+
+	b	kvm_emulate_mtmsrd_branch
+
+no_check:
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_mtmsrd_branch:
+	b	.
+kvm_emulate_mtmsrd_end:
+
+.global kvm_emulate_mtmsrd_branch_offs
+kvm_emulate_mtmsrd_branch_offs:
+	.long (kvm_emulate_mtmsrd_branch - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_reg_offs
+kvm_emulate_mtmsrd_reg_offs:
+	.long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
+
+.global kvm_emulate_mtmsrd_len
+kvm_emulate_mtmsrd_len:
+	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
-- 
cgit v1.2.3


From 7810927760a0d16d7a41be4dab895fbbf9445bc0 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:05 +0200
Subject: KVM: PPC: PV mtmsrd L=0 and mtmsr

There is also a form of mtmsr where all bits need to be addressed. While the
PPC64 Linux kernel behaves resonably well here, on PPC32 we do not have an
L=1 form. It does mtmsr even for simple things like only changing EE.

So we need to hook into that one as well and check for a mask of bits that we
deem safe to change from within guest context.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c      | 51 +++++++++++++++++++++++++
 arch/powerpc/kernel/kvm_emul.S | 84 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 135 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 717ab0dded25..8ac57e2c52fa 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -63,7 +63,9 @@
 #define KVM_INST_MTSPR_DSISR	0x7c1203a6
 
 #define KVM_INST_TLBSYNC	0x7c00046c
+#define KVM_INST_MTMSRD_L0	0x7c000164
 #define KVM_INST_MTMSRD_L1	0x7c010164
+#define KVM_INST_MTMSR		0x7c000124
 
 static bool kvm_patching_worked = true;
 static char kvm_tmp[1024 * 1024];
@@ -176,6 +178,49 @@ static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
 	kvm_patch_ins_b(inst, distance_start);
 }
 
+extern u32 kvm_emulate_mtmsr_branch_offs;
+extern u32 kvm_emulate_mtmsr_reg1_offs;
+extern u32 kvm_emulate_mtmsr_reg2_offs;
+extern u32 kvm_emulate_mtmsr_reg3_offs;
+extern u32 kvm_emulate_mtmsr_orig_ins_offs;
+extern u32 kvm_emulate_mtmsr_len;
+extern u32 kvm_emulate_mtmsr[];
+
+static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_mtmsr_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_mtmsr_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
+	p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	p[kvm_emulate_mtmsr_reg1_offs] |= rt;
+	p[kvm_emulate_mtmsr_reg2_offs] |= rt;
+	p[kvm_emulate_mtmsr_reg3_offs] |= rt;
+	p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
 static void kvm_map_magic_page(void *data)
 {
 	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
@@ -256,6 +301,12 @@ static void kvm_check_ins(u32 *inst)
 		if (get_rt(inst_rt) < 30)
 			kvm_patch_ins_mtmsrd(inst, inst_rt);
 		break;
+	case KVM_INST_MTMSR:
+	case KVM_INST_MTMSRD_L0:
+		/* We use r30 and r31 during the hook */
+		if (get_rt(inst_rt) < 30)
+			kvm_patch_ins_mtmsr(inst, inst_rt);
+		break;
 	}
 
 	switch (_inst) {
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index 10dc4a6632fd..8cd22f47dd01 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -120,3 +120,87 @@ kvm_emulate_mtmsrd_reg_offs:
 .global kvm_emulate_mtmsrd_len
 kvm_emulate_mtmsrd_len:
 	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
+
+
+#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
+#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
+
+.global kvm_emulate_mtmsr
+kvm_emulate_mtmsr:
+
+	SCRATCH_SAVE
+
+	/* Fetch old MSR in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Find the changed bits between old and new MSR */
+kvm_emulate_mtmsr_reg1:
+	xor	r31, r0, r31
+
+	/* Check if we need to really do mtmsr */
+	LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
+	and.	r31, r31, r30
+
+	/* No critical bits changed? Maybe we can stay in the guest. */
+	beq	maybe_stay_in_guest
+
+do_mtmsr:
+
+	SCRATCH_RESTORE
+
+	/* Just fire off the mtmsr if it's critical */
+kvm_emulate_mtmsr_orig_ins:
+	mtmsr	r0
+
+	b	kvm_emulate_mtmsr_branch
+
+maybe_stay_in_guest:
+
+	/* Check if we have to fetch an interrupt */
+	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
+	cmpwi	r31, 0
+	beq+	no_mtmsr
+
+	/* Check if we may trigger an interrupt */
+kvm_emulate_mtmsr_reg2:
+	andi.	r31, r0, MSR_EE
+	beq	no_mtmsr
+
+	b	do_mtmsr
+
+no_mtmsr:
+
+	/* Put MSR into magic page because we don't call mtmsr */
+kvm_emulate_mtmsr_reg3:
+	STL64(r0, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_mtmsr_branch:
+	b	.
+kvm_emulate_mtmsr_end:
+
+.global kvm_emulate_mtmsr_branch_offs
+kvm_emulate_mtmsr_branch_offs:
+	.long (kvm_emulate_mtmsr_branch - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg1_offs
+kvm_emulate_mtmsr_reg1_offs:
+	.long (kvm_emulate_mtmsr_reg1 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg2_offs
+kvm_emulate_mtmsr_reg2_offs:
+	.long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_reg3_offs
+kvm_emulate_mtmsr_reg3_offs:
+	.long (kvm_emulate_mtmsr_reg3 - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_orig_ins_offs
+kvm_emulate_mtmsr_orig_ins_offs:
+	.long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
+
+.global kvm_emulate_mtmsr_len
+kvm_emulate_mtmsr_len:
+	.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
-- 
cgit v1.2.3


From 644bfa013fd589b0df2470a66bcd104318ef24cd Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:06 +0200
Subject: KVM: PPC: PV wrteei

On BookE the preferred way to write the EE bit is the wrteei instruction. It
already encodes the EE bit in the instruction.

So in order to get BookE some speedups as well, let's also PV'nize thati
instruction.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/kvm.c      | 50 ++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/kvm_emul.S | 41 ++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 8ac57e2c52fa..e93681753deb 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -67,6 +67,9 @@
 #define KVM_INST_MTMSRD_L1	0x7c010164
 #define KVM_INST_MTMSR		0x7c000124
 
+#define KVM_INST_WRTEEI_0	0x7c000146
+#define KVM_INST_WRTEEI_1	0x7c008146
+
 static bool kvm_patching_worked = true;
 static char kvm_tmp[1024 * 1024];
 static int kvm_tmp_index;
@@ -221,6 +224,47 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
 	kvm_patch_ins_b(inst, distance_start);
 }
 
+#ifdef CONFIG_BOOKE
+
+extern u32 kvm_emulate_wrteei_branch_offs;
+extern u32 kvm_emulate_wrteei_ee_offs;
+extern u32 kvm_emulate_wrteei_len;
+extern u32 kvm_emulate_wrteei[];
+
+static void kvm_patch_ins_wrteei(u32 *inst)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_wrteei_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_wrteei_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_wrteei, kvm_emulate_wrteei_len * 4);
+	p[kvm_emulate_wrteei_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	p[kvm_emulate_wrteei_ee_offs] |= (*inst & MSR_EE);
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_wrteei_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
 static void kvm_map_magic_page(void *data)
 {
 	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
@@ -310,6 +354,12 @@ static void kvm_check_ins(u32 *inst)
 	}
 
 	switch (_inst) {
+#ifdef CONFIG_BOOKE
+	case KVM_INST_WRTEEI_0:
+	case KVM_INST_WRTEEI_1:
+		kvm_patch_ins_wrteei(inst);
+		break;
+#endif
 	}
 }
 
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index 8cd22f47dd01..3199f65ede2c 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -204,3 +204,44 @@ kvm_emulate_mtmsr_orig_ins_offs:
 .global kvm_emulate_mtmsr_len
 kvm_emulate_mtmsr_len:
 	.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+
+
+
+.global kvm_emulate_wrteei
+kvm_emulate_wrteei:
+
+	SCRATCH_SAVE
+
+	/* Fetch old MSR in r31 */
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	/* Remove MSR_EE from old MSR */
+	li	r30, 0
+	ori	r30, r30, MSR_EE
+	andc	r31, r31, r30
+
+	/* OR new MSR_EE onto the old MSR */
+kvm_emulate_wrteei_ee:
+	ori	r31, r31, 0
+
+	/* Write new MSR value back */
+	STL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_wrteei_branch:
+	b	.
+kvm_emulate_wrteei_end:
+
+.global kvm_emulate_wrteei_branch_offs
+kvm_emulate_wrteei_branch_offs:
+	.long (kvm_emulate_wrteei_branch - kvm_emulate_wrteei) / 4
+
+.global kvm_emulate_wrteei_ee_offs
+kvm_emulate_wrteei_ee_offs:
+	.long (kvm_emulate_wrteei_ee - kvm_emulate_wrteei) / 4
+
+.global kvm_emulate_wrteei_len
+kvm_emulate_wrteei_len:
+	.long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4
-- 
cgit v1.2.3


From 15711e9c927bfc08e66791cbf0ca7887c0880768 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 14:48:08 +0200
Subject: KVM: PPC: Add get_pvinfo interface to query hypercall instructions

We need to tell the guest the opcodes that make up a hypercall through
interfaces that are controlled by userspace. So we need to add a call
for userspace to allow it to query those opcodes so it can pass them
on.

This is required because the hypercall opcodes can change based on
the hypervisor conditions. If we're running in hardware accelerated
hypervisor mode, a hypercall looks different from when we're running
without hardware acceleration.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/powerpc.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index fecfe043458d..6a53a3f86dae 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -191,6 +191,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_PPC_OSI:
+	case KVM_CAP_PPC_GET_PVINFO:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -578,16 +579,53 @@ out:
 	return r;
 }
 
+static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
+{
+	u32 inst_lis = 0x3c000000;
+	u32 inst_ori = 0x60000000;
+	u32 inst_nop = 0x60000000;
+	u32 inst_sc = 0x44000002;
+	u32 inst_imm_mask = 0xffff;
+
+	/*
+	 * The hypercall to get into KVM from within guest context is as
+	 * follows:
+	 *
+	 *    lis r0, r0, KVM_SC_MAGIC_R0@h
+	 *    ori r0, KVM_SC_MAGIC_R0@l
+	 *    sc
+	 *    nop
+	 */
+	pvinfo->hcall[0] = inst_lis | ((KVM_SC_MAGIC_R0 >> 16) & inst_imm_mask);
+	pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
+	pvinfo->hcall[2] = inst_sc;
+	pvinfo->hcall[3] = inst_nop;
+
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
+	void __user *argp = (void __user *)arg;
 	long r;
 
 	switch (ioctl) {
+	case KVM_PPC_GET_PVINFO: {
+		struct kvm_ppc_pvinfo pvinfo;
+		r = kvm_vm_ioctl_get_pvinfo(&pvinfo);
+		if (copy_to_user(argp, &pvinfo, sizeof(pvinfo))) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
 
+out:
 	return r;
 }
 
-- 
cgit v1.2.3


From 5302104235f0e9f05781b92a4ab25d20e4537f56 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 15:04:16 +0200
Subject: KVM: PPC: Book3S_32 MMU debug compile fixes

Due to previous changes, the Book3S_32 guest MMU code didn't compile properly
when enabling debugging.

This patch repairs the broken code paths, making it possible to define DEBUG_MMU
and friends again.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s_32_mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index a7d121adc842..5bf4bf8c9e65 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -104,7 +104,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
 	pteg = (vcpu_book3s->sdr1 & 0xffff0000) | hash;
 
 	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
-		vcpu_book3s->vcpu.arch.pc, eaddr, vcpu_book3s->sdr1, pteg,
+		kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
 		sre->vsid);
 
 	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
@@ -269,7 +269,7 @@ no_page_found:
 		dprintk_pte("KVM MMU: No PTE found (sdr1=0x%llx ptegp=0x%lx)\n",
 			    to_book3s(vcpu)->sdr1, ptegp);
 		for (i=0; i<16; i+=2) {
-			dprintk_pte("   %02d: 0x%x - 0x%x (0x%llx)\n",
+			dprintk_pte("   %02d: 0x%x - 0x%x (0x%x)\n",
 				    i, pteg[i], pteg[i+1], ptem);
 		}
 	}
-- 
cgit v1.2.3


From 2e0908afaf03675d22e40ce45a66b8d2070214ac Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 15:04:17 +0200
Subject: KVM: PPC: RCU'ify the Book3s MMU

So far we've been running all code without locking of any sort. This wasn't
really an issue because I didn't see any parallel access to the shadow MMU
code coming.

But then I started to implement dirty bitmapping to MOL which has the video
code in its own thread, so suddenly we had the dirty bitmap code run in
parallel to the shadow mmu code. And with that came trouble.

So I went ahead and made the MMU modifying functions as parallelizable as
I could think of. I hope I didn't screw up too much RCU logic :-). If you
know your way around RCU and locking and what needs to be done when, please
take a look at this patch.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  2 +
 arch/powerpc/kvm/book3s_mmu_hpte.c  | 78 ++++++++++++++++++++++++++++---------
 2 files changed, 61 insertions(+), 19 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e1da77579e65..fafc71aa3343 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -165,6 +165,7 @@ struct hpte_cache {
 	struct hlist_node list_pte;
 	struct hlist_node list_vpte;
 	struct hlist_node list_vpte_long;
+	struct rcu_head rcu_head;
 	u64 host_va;
 	u64 pfn;
 	ulong slot;
@@ -295,6 +296,7 @@ struct kvm_vcpu_arch {
 	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
 	struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
 	int hpte_cache_count;
+	spinlock_t mmu_lock;
 #endif
 };
 
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 4868d4a7ebc5..b64389362446 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -60,68 +60,94 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
 	u64 index;
 
+	spin_lock(&vcpu->arch.mmu_lock);
+
 	/* Add to ePTE list */
 	index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
-	hlist_add_head(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
+	hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
 
 	/* Add to vPTE list */
 	index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
-	hlist_add_head(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
+	hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
 
 	/* Add to vPTE_long list */
 	index = kvmppc_mmu_hash_vpte_long(pte->pte.vpage);
-	hlist_add_head(&pte->list_vpte_long,
-		       &vcpu->arch.hpte_hash_vpte_long[index]);
+	hlist_add_head_rcu(&pte->list_vpte_long,
+			   &vcpu->arch.hpte_hash_vpte_long[index]);
+
+	spin_unlock(&vcpu->arch.mmu_lock);
+}
+
+static void free_pte_rcu(struct rcu_head *head)
+{
+	struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head);
+	kmem_cache_free(hpte_cache, pte);
 }
 
 static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
+	/* pte already invalidated? */
+	if (hlist_unhashed(&pte->list_pte))
+		return;
+
 	dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
 		    pte->pte.eaddr, pte->pte.vpage, pte->host_va);
 
 	/* Different for 32 and 64 bit */
 	kvmppc_mmu_invalidate_pte(vcpu, pte);
 
+	spin_lock(&vcpu->arch.mmu_lock);
+
+	hlist_del_init_rcu(&pte->list_pte);
+	hlist_del_init_rcu(&pte->list_vpte);
+	hlist_del_init_rcu(&pte->list_vpte_long);
+
+	spin_unlock(&vcpu->arch.mmu_lock);
+
 	if (pte->pte.may_write)
 		kvm_release_pfn_dirty(pte->pfn);
 	else
 		kvm_release_pfn_clean(pte->pfn);
 
-	hlist_del(&pte->list_pte);
-	hlist_del(&pte->list_vpte);
-	hlist_del(&pte->list_vpte_long);
-
 	vcpu->arch.hpte_cache_count--;
-	kmem_cache_free(hpte_cache, pte);
+	call_rcu(&pte->rcu_head, free_pte_rcu);
 }
 
 static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu)
 {
 	struct hpte_cache *pte;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *node;
 	int i;
 
+	rcu_read_lock();
+
 	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
 		struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
 
-		hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+		hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
 			invalidate_pte(vcpu, pte);
 	}
+
+	rcu_read_unlock();
 }
 
 static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
 {
 	struct hlist_head *list;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *node;
 	struct hpte_cache *pte;
 
 	/* Find the list of entries in the map */
 	list = &vcpu->arch.hpte_hash_pte[kvmppc_mmu_hash_pte(guest_ea)];
 
+	rcu_read_lock();
+
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_safe(pte, node, tmp, list, list_pte)
+	hlist_for_each_entry_rcu(pte, node, list, list_pte)
 		if ((pte->pte.eaddr & ~0xfffUL) == guest_ea)
 			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
 }
 
 void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
@@ -156,33 +182,41 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
 static void kvmppc_mmu_pte_vflush_short(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
 	struct hlist_head *list;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *node;
 	struct hpte_cache *pte;
 	u64 vp_mask = 0xfffffffffULL;
 
 	list = &vcpu->arch.hpte_hash_vpte[kvmppc_mmu_hash_vpte(guest_vp)];
 
+	rcu_read_lock();
+
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte)
+	hlist_for_each_entry_rcu(pte, node, list, list_vpte)
 		if ((pte->pte.vpage & vp_mask) == guest_vp)
 			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
 }
 
 /* Flush with mask 0xffffff000 */
 static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 {
 	struct hlist_head *list;
-	struct hlist_node *node, *tmp;
+	struct hlist_node *node;
 	struct hpte_cache *pte;
 	u64 vp_mask = 0xffffff000ULL;
 
 	list = &vcpu->arch.hpte_hash_vpte_long[
 		kvmppc_mmu_hash_vpte_long(guest_vp)];
 
+	rcu_read_lock();
+
 	/* Check the list for matching entries and invalidate */
-	hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+	hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
 		if ((pte->pte.vpage & vp_mask) == guest_vp)
 			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
 }
 
 void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
@@ -206,21 +240,25 @@ void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
 
 void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
 {
-	struct hlist_node *node, *tmp;
+	struct hlist_node *node;
 	struct hpte_cache *pte;
 	int i;
 
 	dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n",
 		    vcpu->arch.hpte_cache_count, pa_start, pa_end);
 
+	rcu_read_lock();
+
 	for (i = 0; i < HPTEG_HASH_NUM_VPTE_LONG; i++) {
 		struct hlist_head *list = &vcpu->arch.hpte_hash_vpte_long[i];
 
-		hlist_for_each_entry_safe(pte, node, tmp, list, list_vpte_long)
+		hlist_for_each_entry_rcu(pte, node, list, list_vpte_long)
 			if ((pte->pte.raddr >= pa_start) &&
 			    (pte->pte.raddr < pa_end))
 				invalidate_pte(vcpu, pte);
 	}
+
+	rcu_read_unlock();
 }
 
 struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
@@ -259,6 +297,8 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
 	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
 				  ARRAY_SIZE(vcpu->arch.hpte_hash_vpte_long));
 
+	spin_lock_init(&vcpu->arch.mmu_lock);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 49451389ecc2b4336c305678c210b25fadd18994 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 29 Jul 2010 15:04:18 +0200
Subject: KVM: PPC: correctly check gfn_to_pfn() return value

On failure gfn_to_pfn returns bad_page so use correct function to check
for that.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s_32_mmu_host.c | 2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 05e8c9eb0e16..343452cff9b2 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -148,7 +148,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-	if (kvm_is_error_hva(hpaddr)) {
+	if (is_error_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
 				 orig_pte->eaddr);
 		return -EINVAL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 6cdd19a82bda..672b1495f265 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -102,7 +102,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-	if (kvm_is_error_hva(hpaddr)) {
+	if (is_error_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 2d27fc5eac0205588cb59ae138062e5e96695276 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 15:04:19 +0200
Subject: KVM: PPC: Add book3s_32 tlbie flush acceleration

On Book3s_32 the tlbie instruction flushed effective addresses by the mask
0x0ffff000. This is pretty hard to reflect with a hash that hashes ~0xfff, so
to speed up that target we should also keep a special hash around for it.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  4 ++++
 arch/powerpc/kvm/book3s_mmu_hpte.c  | 40 ++++++++++++++++++++++++++++++++-----
 2 files changed, 39 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fafc71aa3343..bba3b9b72a39 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -42,9 +42,11 @@
 
 #define HPTEG_CACHE_NUM			(1 << 15)
 #define HPTEG_HASH_BITS_PTE		13
+#define HPTEG_HASH_BITS_PTE_LONG	12
 #define HPTEG_HASH_BITS_VPTE		13
 #define HPTEG_HASH_BITS_VPTE_LONG	5
 #define HPTEG_HASH_NUM_PTE		(1 << HPTEG_HASH_BITS_PTE)
+#define HPTEG_HASH_NUM_PTE_LONG		(1 << HPTEG_HASH_BITS_PTE_LONG)
 #define HPTEG_HASH_NUM_VPTE		(1 << HPTEG_HASH_BITS_VPTE)
 #define HPTEG_HASH_NUM_VPTE_LONG	(1 << HPTEG_HASH_BITS_VPTE_LONG)
 
@@ -163,6 +165,7 @@ struct kvmppc_mmu {
 
 struct hpte_cache {
 	struct hlist_node list_pte;
+	struct hlist_node list_pte_long;
 	struct hlist_node list_vpte;
 	struct hlist_node list_vpte_long;
 	struct rcu_head rcu_head;
@@ -293,6 +296,7 @@ struct kvm_vcpu_arch {
 
 #ifdef CONFIG_PPC_BOOK3S
 	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
+	struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
 	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
 	struct hlist_head hpte_hash_vpte_long[HPTEG_HASH_NUM_VPTE_LONG];
 	int hpte_cache_count;
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index b64389362446..02c64ab99c97 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -45,6 +45,12 @@ static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
 	return hash_64(eaddr >> PTE_SIZE, HPTEG_HASH_BITS_PTE);
 }
 
+static inline u64 kvmppc_mmu_hash_pte_long(u64 eaddr)
+{
+	return hash_64((eaddr & 0x0ffff000) >> PTE_SIZE,
+		       HPTEG_HASH_BITS_PTE_LONG);
+}
+
 static inline u64 kvmppc_mmu_hash_vpte(u64 vpage)
 {
 	return hash_64(vpage & 0xfffffffffULL, HPTEG_HASH_BITS_VPTE);
@@ -66,6 +72,11 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 	index = kvmppc_mmu_hash_pte(pte->pte.eaddr);
 	hlist_add_head_rcu(&pte->list_pte, &vcpu->arch.hpte_hash_pte[index]);
 
+	/* Add to ePTE_long list */
+	index = kvmppc_mmu_hash_pte_long(pte->pte.eaddr);
+	hlist_add_head_rcu(&pte->list_pte_long,
+			   &vcpu->arch.hpte_hash_pte_long[index]);
+
 	/* Add to vPTE list */
 	index = kvmppc_mmu_hash_vpte(pte->pte.vpage);
 	hlist_add_head_rcu(&pte->list_vpte, &vcpu->arch.hpte_hash_vpte[index]);
@@ -99,6 +110,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 	spin_lock(&vcpu->arch.mmu_lock);
 
 	hlist_del_init_rcu(&pte->list_pte);
+	hlist_del_init_rcu(&pte->list_pte_long);
 	hlist_del_init_rcu(&pte->list_vpte);
 	hlist_del_init_rcu(&pte->list_vpte_long);
 
@@ -150,10 +162,28 @@ static void kvmppc_mmu_pte_flush_page(struct kvm_vcpu *vcpu, ulong guest_ea)
 	rcu_read_unlock();
 }
 
-void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
+static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
 {
-	u64 i;
+	struct hlist_head *list;
+	struct hlist_node *node;
+	struct hpte_cache *pte;
+
+	/* Find the list of entries in the map */
+	list = &vcpu->arch.hpte_hash_pte_long[
+			kvmppc_mmu_hash_pte_long(guest_ea)];
 
+	rcu_read_lock();
+
+	/* Check the list for matching entries and invalidate */
+	hlist_for_each_entry_rcu(pte, node, list, list_pte_long)
+		if ((pte->pte.eaddr & 0x0ffff000UL) == guest_ea)
+			invalidate_pte(vcpu, pte);
+
+	rcu_read_unlock();
+}
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
+{
 	dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
 		    vcpu->arch.hpte_cache_count, guest_ea, ea_mask);
 
@@ -164,9 +194,7 @@ void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
 		kvmppc_mmu_pte_flush_page(vcpu, guest_ea);
 		break;
 	case 0x0ffff000:
-		/* 32-bit flush w/o segment, go through all possible segments */
-		for (i = 0; i < 0x100000000ULL; i += 0x10000000ULL)
-			kvmppc_mmu_pte_flush(vcpu, guest_ea | i, ~0xfffUL);
+		kvmppc_mmu_pte_flush_long(vcpu, guest_ea);
 		break;
 	case 0:
 		/* Doing a complete flush -> start from scratch */
@@ -292,6 +320,8 @@ int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu)
 	/* init hpte lookup hashes */
 	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte,
 				  ARRAY_SIZE(vcpu->arch.hpte_hash_pte));
+	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_pte_long,
+				  ARRAY_SIZE(vcpu->arch.hpte_hash_pte_long));
 	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte,
 				  ARRAY_SIZE(vcpu->arch.hpte_hash_vpte));
 	kvmppc_mmu_hpte_init_hash(vcpu->arch.hpte_hash_vpte_long,
-- 
cgit v1.2.3


From 0e677903878ef90e09a45507255c0b1e36166064 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 15:04:20 +0200
Subject: KVM: PPC: Use MSR_DR for external load_up

Book3S_32 requires MSR_DR to be disabled during load_up_xxx while on Book3S_64
it's supposed to be enabled. I misread the code and disabled it in both cases,
potentially breaking the PS3 which has a really small RMA.

This patch makes KVM work on the PS3 again.

Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s_rmhandlers.S | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 506d5c316c96..229d3d662af9 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -202,8 +202,25 @@ _GLOBAL(kvmppc_rmcall)
 
 #if defined(CONFIG_PPC_BOOK3S_32)
 #define STACK_LR	INT_FRAME_SIZE+4
+
+/* load_up_xxx have to run with MSR_DR=0 on Book3S_32 */
+#define MSR_EXT_START						\
+	PPC_STL	r20, _NIP(r1);					\
+	mfmsr	r20;						\
+	LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE);			\
+	andc	r3,r20,r3;		/* Disable DR,EE */	\
+	mtmsr	r3;						\
+	sync
+
+#define MSR_EXT_END						\
+	mtmsr	r20;			/* Enable DR,EE */	\
+	sync;							\
+	PPC_LL	r20, _NIP(r1)
+
 #elif defined(CONFIG_PPC_BOOK3S_64)
 #define STACK_LR	_LINK
+#define MSR_EXT_START
+#define MSR_EXT_END
 #endif
 
 /*
@@ -215,19 +232,12 @@ _GLOBAL(kvmppc_load_up_ ## what);				\
 	PPC_STLU r1, -INT_FRAME_SIZE(r1);			\
 	mflr	r3;						\
 	PPC_STL	r3, STACK_LR(r1);				\
-	PPC_STL	r20, _NIP(r1);					\
-	mfmsr	r20;						\
-	LOAD_REG_IMMEDIATE(r3, MSR_DR|MSR_EE);			\
-	andc	r3,r20,r3;		/* Disable DR,EE */	\
-	mtmsr	r3;						\
-	sync;							\
+	MSR_EXT_START;						\
 								\
 	bl	FUNC(load_up_ ## what);				\
 								\
-	mtmsr	r20;			/* Enable DR,EE */	\
-	sync;							\
+	MSR_EXT_END;						\
 	PPC_LL	r3, STACK_LR(r1);				\
-	PPC_LL	r20, _NIP(r1);					\
 	mtlr	r3;						\
 	addi	r1, r1, INT_FRAME_SIZE;				\
 	blr
-- 
cgit v1.2.3


From 2b05d71fefc3b83e686bead355c6d35e440c4261 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 15:04:21 +0200
Subject: KVM: PPC: Make long relocations be ulong

On Book3S KVM we directly expose some asm pointers to C code as
variables. These need to be relocated and thus break on relocatable
kernels.

To make sure we can at least build, let's mark them as long instead
of u32 where 64bit relocations don't work.

This fixes the following build error:

WARNING: 2 bad relocations^M
> c000000000008590 R_PPC64_ADDR32    .text+0x4000000000008460^M
> c000000000008594 R_PPC64_ADDR32    .text+0x4000000000008598^M

Please keep in mind that actually using KVM on a relocated kernel
might still break. This only fixes the compile problem.

Reported-by: Subrata Modak <subrata@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_book3s.h | 4 ++--
 arch/powerpc/kvm/book3s_rmhandlers.S  | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 00cf8b07e502..f04f516c97da 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -132,8 +132,8 @@ extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn);
 
-extern u32 kvmppc_trampoline_lowmem;
-extern u32 kvmppc_trampoline_enter;
+extern ulong kvmppc_trampoline_lowmem;
+extern ulong kvmppc_trampoline_enter;
 extern void kvmppc_rmcall(ulong srr0, ulong srr1);
 extern void kvmppc_load_up_fpu(void);
 extern void kvmppc_load_up_altivec(void);
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 229d3d662af9..2b9c9088d00e 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -252,10 +252,10 @@ define_load_up(vsx)
 
 .global kvmppc_trampoline_lowmem
 kvmppc_trampoline_lowmem:
-	.long kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
+	PPC_LONG kvmppc_handler_lowmem_trampoline - CONFIG_KERNEL_START
 
 .global kvmppc_trampoline_enter
 kvmppc_trampoline_enter:
-	.long kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
+	PPC_LONG kvmppc_handler_trampoline_enter - CONFIG_KERNEL_START
 
 #include "book3s_segment.S"
-- 
cgit v1.2.3


From a58ddea556f8877ccf7caa046b6d6b32982f5b1d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 29 Jul 2010 15:04:22 +0200
Subject: KVM: PPC: Move KVM trampolines before __end_interrupts

When using a relocatable kernel we need to make sure that the trampline code
and the interrupt handlers are both copied to low memory. The only way to do
this reliably is to put them in the copied section.

This patch should make relocated kernels work with KVM.

KVM-Stable-Tag
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/exceptions-64s.S | 6 ++++++
 arch/powerpc/kernel/head_64.S        | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f53029a01554..1667a078b3e6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -299,6 +299,12 @@ slb_miss_user_pseries:
 	b	.				/* prevent spec. execution */
 #endif /* __DISABLED__ */
 
+/* KVM's trampoline code needs to be close to the interrupt handlers */
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#include "../kvm/book3s_rmhandlers.S"
+#endif
+
 	.align	7
 	.globl	__end_interrupts
 __end_interrupts:
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index c571cd3c1453..f0dd577e4a5b 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -166,12 +166,6 @@ exception_marker:
 #include "exceptions-64s.S"
 #endif
 
-/* KVM trampoline code needs to be close to the interrupt handlers */
-
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-#include "../kvm/book3s_rmhandlers.S"
-#endif
-
 _GLOBAL(generic_secondary_thread_init)
 	mr	r24,r3
 
-- 
cgit v1.2.3


From 646bab55a278ceb1cf43b1f80d3dd468be62a421 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Tue, 17 Aug 2010 10:08:52 +0800
Subject: KVM: PPC: fix leakage of error page in kvmppc_patch_dcbz()

Add kvm_release_page_clean() after is_error_page() to avoid
leakage of error page.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index eee97b5a7400..7656b6df0d8b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -455,8 +455,10 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	int i;
 
 	hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-	if (is_error_page(hpage))
+	if (is_error_page(hpage)) {
+		kvm_release_page_clean(hpage);
 		return;
+	}
 
 	hpage_offset = pte->raddr & ~PAGE_MASK;
 	hpage_offset &= ~0xFFFULL;
-- 
cgit v1.2.3


From 989044ee0fdc6c22a11ea1d22e2a3d17463cb564 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 30 Aug 2010 12:01:56 +0200
Subject: KVM: PPC: Fix CONFIG_KVM_GUEST && !CONFIG_KVM case

When CONFIG_KVM_GUEST is selected, but CONFIG_KVM is not, we were missing
some defines in asm-offsets.c and included too many headers at other places.

This patch makes above configuration work.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kernel/asm-offsets.c | 6 +++---
 arch/powerpc/kernel/kvm.c         | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 37486cafb69d..6d92b4e13ebf 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -48,11 +48,11 @@
 #ifdef CONFIG_PPC_ISERIES
 #include <asm/iseries/alpaca.h>
 #endif
-#ifdef CONFIG_KVM
+#if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST)
 #include <linux/kvm_host.h>
-#ifndef CONFIG_BOOKE
-#include <asm/kvm_book3s.h>
 #endif
+#if defined(CONFIG_KVM) && defined(CONFIG_PPC_BOOK3S)
+#include <asm/kvm_book3s.h>
 #endif
 
 #ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index e93681753deb..d3a2cc50d611 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -25,7 +25,6 @@
 #include <linux/of.h>
 
 #include <asm/reg.h>
-#include <asm/kvm_ppc.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
 #include <asm/disassemble.h>
-- 
cgit v1.2.3


From bed1ed9860d3744cc6488831fa5672d5c7aff4be Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 11:06:26 +0200
Subject: KVM: PPC: Move EXIT_DEBUG partially to tracepoints

We have a debug printk on every exit that is usually #ifdef'ed out. Using
tracepoints makes a lot more sense here though, as they can be dynamically
enabled.

This patch converts the most commonly used debug printks of EXIT_DEBUG to
tracepoints.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s.c | 26 ++++--------------------
 arch/powerpc/kvm/trace.h  | 51 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 22 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7656b6df0d8b..37db61d37041 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -17,6 +17,7 @@
 #include <linux/kvm_host.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include "trace.h"
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
@@ -35,7 +36,6 @@
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 /* #define EXIT_DEBUG */
-/* #define EXIT_DEBUG_SIMPLE */
 /* #define DEBUG_EXT */
 
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
@@ -105,14 +105,6 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
-#if defined(EXIT_DEBUG)
-static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
-{
-	u64 jd = mftb() - vcpu->arch.dec_jiffies;
-	return vcpu->arch.dec - jd;
-}
-#endif
-
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
 	ulong smsr = vcpu->arch.shared->msr;
@@ -850,16 +842,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
-#ifdef EXIT_DEBUG
-	printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | msr=0x%lx\n",
-		exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
-		kvmppc_get_dec(vcpu), to_svcpu(vcpu)->shadow_srr1);
-#elif defined (EXIT_DEBUG_SIMPLE)
-	if ((exit_nr != 0x900) && (exit_nr != 0x500))
-		printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | msr=0x%lx\n",
-			exit_nr, kvmppc_get_pc(vcpu), kvmppc_get_fault_dar(vcpu),
-			vcpu->arch.shared->msr);
-#endif
+
+	trace_kvm_book3s_exit(exit_nr, vcpu);
 	kvm_resched(vcpu);
 	switch (exit_nr) {
 	case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -1091,9 +1075,7 @@ program_interrupt:
 		}
 	}
 
-#ifdef EXIT_DEBUG
-	printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, kvmppc_get_pc(vcpu), r);
-#endif
+	trace_kvm_book3s_reenter(r, vcpu);
 
 	return r;
 }
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index a8e840018052..b5e9d81a1ea2 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -98,6 +98,57 @@ TRACE_EVENT(kvm_gtlb_write,
 		__entry->word1, __entry->word2)
 );
 
+
+/*************************************************************************
+ *                         Book3S trace points                           *
+ *************************************************************************/
+
+#ifdef CONFIG_PPC_BOOK3S
+
+TRACE_EVENT(kvm_book3s_exit,
+	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+	TP_ARGS(exit_nr, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_nr		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned long,	msr		)
+		__field(	unsigned long,	dar		)
+		__field(	unsigned long,	srr1		)
+	),
+
+	TP_fast_assign(
+		__entry->exit_nr	= exit_nr;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->dar		= kvmppc_get_fault_dar(vcpu);
+		__entry->msr		= vcpu->arch.shared->msr;
+		__entry->srr1		= to_svcpu(vcpu)->shadow_srr1;
+	),
+
+	TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
+		  __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
+		  __entry->srr1)
+);
+
+TRACE_EVENT(kvm_book3s_reenter,
+	TP_PROTO(int r, struct kvm_vcpu *vcpu),
+	TP_ARGS(r, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	r		)
+		__field(	unsigned long,	pc		)
+	),
+
+	TP_fast_assign(
+		__entry->r		= r;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+	),
+
+	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
+);
+
+#endif /* CONFIG_PPC_BOOK3S */
+
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
-- 
cgit v1.2.3


From 82fdee7bce546c3ce38dcf0db6096eea73dbe7bd Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 11:38:54 +0200
Subject: KVM: PPC: Move book3s_64 mmu map debug print to trace point

This patch moves Book3s MMU debugging over to tracepoints.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_host.c | 13 ++-----------
 arch/powerpc/kvm/trace.h              | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 672b1495f265..aa516ad81de7 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -28,19 +28,13 @@
 #include <asm/machdep.h>
 #include <asm/mmu_context.h>
 #include <asm/hw_irq.h>
+#include "trace.h"
 
 #define PTE_SIZE 12
 #define VSID_ALL 0
 
-/* #define DEBUG_MMU */
 /* #define DEBUG_SLB */
 
-#ifdef DEBUG_MMU
-#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
-#else
-#define dprintk_mmu(a, ...) do { } while(0)
-#endif
-
 #ifdef DEBUG_SLB
 #define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__)
 #else
@@ -156,10 +150,7 @@ map_again:
 	} else {
 		struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
 
-		dprintk_mmu("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx\n",
-			    ((rflags & HPTE_R_PP) == 3) ? '-' : 'w',
-			    (rflags & HPTE_R_N) ? '-' : 'x',
-			    orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr);
+		trace_kvm_book3s_64_mmu_map(rflags, hpteg, va, hpaddr, orig_pte);
 
 		/* The ppc_md code may give us a secondary entry even though we
 		   asked for a primary. Fix up. */
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index b5e9d81a1ea2..8ed6f1c7c86e 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -147,6 +147,40 @@ TRACE_EVENT(kvm_book3s_reenter,
 	TP_printk("reentry r=%d | pc=0x%lx", __entry->r, __entry->pc)
 );
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+TRACE_EVENT(kvm_book3s_64_mmu_map,
+	TP_PROTO(int rflags, ulong hpteg, ulong va, pfn_t hpaddr,
+		 struct kvmppc_pte *orig_pte),
+	TP_ARGS(rflags, hpteg, va, hpaddr, orig_pte),
+
+	TP_STRUCT__entry(
+		__field(	unsigned char,		flag_w		)
+		__field(	unsigned char,		flag_x		)
+		__field(	unsigned long,		eaddr		)
+		__field(	unsigned long,		hpteg		)
+		__field(	unsigned long,		va		)
+		__field(	unsigned long long,	vpage		)
+		__field(	unsigned long,		hpaddr		)
+	),
+
+	TP_fast_assign(
+		__entry->flag_w	= ((rflags & HPTE_R_PP) == 3) ? '-' : 'w';
+		__entry->flag_x	= (rflags & HPTE_R_N) ? '-' : 'x';
+		__entry->eaddr	= orig_pte->eaddr;
+		__entry->hpteg	= hpteg;
+		__entry->va	= va;
+		__entry->vpage	= orig_pte->vpage;
+		__entry->hpaddr	= hpaddr;
+	),
+
+	TP_printk("KVM: %c%c Map 0x%lx: [%lx] 0x%lx (0x%llx) -> %lx",
+		  __entry->flag_w, __entry->flag_x, __entry->eaddr,
+		  __entry->hpteg, __entry->va, __entry->vpage, __entry->hpaddr)
+);
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 #endif /* CONFIG_PPC_BOOK3S */
 
 #endif /* _TRACE_KVM_H */
-- 
cgit v1.2.3


From 4c4eea7769d0099ea09f9bdb7aed1cc61d57c9d6 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 12:51:07 +0200
Subject: KVM: PPC: Add tracepoint for generic mmu map

This patch moves the generic mmu map debugging over to tracepoints.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_mmu_hpte.c |  3 +++
 arch/powerpc/kvm/trace.h           | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 02c64ab99c97..ac94bd992564 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -21,6 +21,7 @@
 #include <linux/kvm_host.h>
 #include <linux/hash.h>
 #include <linux/slab.h>
+#include "trace.h"
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -66,6 +67,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
 	u64 index;
 
+	trace_kvm_book3s_mmu_map(pte);
+
 	spin_lock(&vcpu->arch.mmu_lock);
 
 	/* Add to ePTE list */
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 8ed6f1c7c86e..68a84442d799 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -181,6 +181,35 @@ TRACE_EVENT(kvm_book3s_64_mmu_map,
 
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
+TRACE_EVENT(kvm_book3s_mmu_map,
+	TP_PROTO(struct hpte_cache *pte),
+	TP_ARGS(pte),
+
+	TP_STRUCT__entry(
+		__field(	u64,		host_va		)
+		__field(	u64,		pfn		)
+		__field(	ulong,		eaddr		)
+		__field(	u64,		vpage		)
+		__field(	ulong,		raddr		)
+		__field(	int,		flags		)
+	),
+
+	TP_fast_assign(
+		__entry->host_va	= pte->host_va;
+		__entry->pfn		= pte->pfn;
+		__entry->eaddr		= pte->pte.eaddr;
+		__entry->vpage		= pte->pte.vpage;
+		__entry->raddr		= pte->pte.raddr;
+		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
+					  (pte->pte.may_write ? 0x2 : 0) |
+					  (pte->pte.may_execute ? 0x1 : 0);
+	),
+
+	TP_printk("Map: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_va, __entry->pfn, __entry->eaddr,
+		  __entry->vpage, __entry->raddr, __entry->flags)
+);
+
 #endif /* CONFIG_PPC_BOOK3S */
 
 #endif /* _TRACE_KVM_H */
-- 
cgit v1.2.3


From 8696ee431233171b3c1cc82bae0193efc4fef2ac Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 12:55:19 +0200
Subject: KVM: PPC: Move pte invalidate debug code to tracepoint

This patch moves the SPTE flush debug printk over to tracepoints.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_mmu_hpte.c |  3 +--
 arch/powerpc/kvm/trace.h           | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index ac94bd992564..3397152a2b26 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -104,8 +104,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 	if (hlist_unhashed(&pte->list_pte))
 		return;
 
-	dprintk_mmu("KVM: Flushing SPT: 0x%lx (0x%llx) -> 0x%llx\n",
-		    pte->pte.eaddr, pte->pte.vpage, pte->host_va);
+	trace_kvm_book3s_mmu_invalidate(pte);
 
 	/* Different for 32 and 64 bit */
 	kvmppc_mmu_invalidate_pte(vcpu, pte);
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 68a84442d799..06ad93e40648 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -210,6 +210,35 @@ TRACE_EVENT(kvm_book3s_mmu_map,
 		  __entry->vpage, __entry->raddr, __entry->flags)
 );
 
+TRACE_EVENT(kvm_book3s_mmu_invalidate,
+	TP_PROTO(struct hpte_cache *pte),
+	TP_ARGS(pte),
+
+	TP_STRUCT__entry(
+		__field(	u64,		host_va		)
+		__field(	u64,		pfn		)
+		__field(	ulong,		eaddr		)
+		__field(	u64,		vpage		)
+		__field(	ulong,		raddr		)
+		__field(	int,		flags		)
+	),
+
+	TP_fast_assign(
+		__entry->host_va	= pte->host_va;
+		__entry->pfn		= pte->pfn;
+		__entry->eaddr		= pte->pte.eaddr;
+		__entry->vpage		= pte->pte.vpage;
+		__entry->raddr		= pte->pte.raddr;
+		__entry->flags		= (pte->pte.may_read ? 0x4 : 0) |
+					  (pte->pte.may_write ? 0x2 : 0) |
+					  (pte->pte.may_execute ? 0x1 : 0);
+	),
+
+	TP_printk("Flush: hva=%llx pfn=%llx ea=%lx vp=%llx ra=%lx [%x]",
+		  __entry->host_va, __entry->pfn, __entry->eaddr,
+		  __entry->vpage, __entry->raddr, __entry->flags)
+);
+
 #endif /* CONFIG_PPC_BOOK3S */
 
 #endif /* _TRACE_KVM_H */
-- 
cgit v1.2.3


From c22c31963b4b0c23250e8f520a76427b3986b73b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 13:38:18 +0200
Subject: KVM: PPC: Fix sid map search after flush

After a flush the sid map contained lots of entries with 0 for their gvsid and
hvsid value. Unfortunately, 0 can be a real value the guest searches for when
looking up a vsid so it would incorrectly find the host's 0 hvsid mapping which
doesn't belong to our sid space.

So let's also check for the valid bit that indicated that the sid we're
looking at actually contains useful data.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_host.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index aa516ad81de7..ebb1b5ddabfb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -65,14 +65,14 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
 	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
-	if (map->guest_vsid == gvsid) {
+	if (map->valid && (map->guest_vsid == gvsid)) {
 		dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n",
 			    gvsid, map->host_vsid);
 		return map;
 	}
 
 	map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
-	if (map->guest_vsid == gvsid) {
+	if (map->valid && (map->guest_vsid == gvsid)) {
 		dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n",
 			    gvsid, map->host_vsid);
 		return map;
-- 
cgit v1.2.3


From c60b4cf70127941e2f944a7971a7f6b3ecb367ac Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 13:40:30 +0200
Subject: KVM: PPC: Add tracepoints for generic spte flushes

The different ways of flusing shadow ptes have their own debug prints which use
stupid old printk.

Let's move them to tracepoints, making them easier available, faster and
possible to activate on demand

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_mmu_hpte.c | 18 +++---------------
 arch/powerpc/kvm/trace.h           | 23 +++++++++++++++++++++++
 2 files changed, 26 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 3397152a2b26..bd6a7676d0c8 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -31,14 +31,6 @@
 
 #define PTE_SIZE	12
 
-/* #define DEBUG_MMU */
-
-#ifdef DEBUG_MMU
-#define dprintk_mmu(a, ...) printk(KERN_INFO a, __VA_ARGS__)
-#else
-#define dprintk_mmu(a, ...) do { } while(0)
-#endif
-
 static struct kmem_cache *hpte_cache;
 
 static inline u64 kvmppc_mmu_hash_pte(u64 eaddr)
@@ -186,9 +178,7 @@ static void kvmppc_mmu_pte_flush_long(struct kvm_vcpu *vcpu, ulong guest_ea)
 
 void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, ulong guest_ea, ulong ea_mask)
 {
-	dprintk_mmu("KVM: Flushing %d Shadow PTEs: 0x%lx & 0x%lx\n",
-		    vcpu->arch.hpte_cache_count, guest_ea, ea_mask);
-
+	trace_kvm_book3s_mmu_flush("", vcpu, guest_ea, ea_mask);
 	guest_ea &= ea_mask;
 
 	switch (ea_mask) {
@@ -251,8 +241,7 @@ static void kvmppc_mmu_pte_vflush_long(struct kvm_vcpu *vcpu, u64 guest_vp)
 
 void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
 {
-	dprintk_mmu("KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
-		    vcpu->arch.hpte_cache_count, guest_vp, vp_mask);
+	trace_kvm_book3s_mmu_flush("v", vcpu, guest_vp, vp_mask);
 	guest_vp &= vp_mask;
 
 	switch(vp_mask) {
@@ -274,8 +263,7 @@ void kvmppc_mmu_pte_pflush(struct kvm_vcpu *vcpu, ulong pa_start, ulong pa_end)
 	struct hpte_cache *pte;
 	int i;
 
-	dprintk_mmu("KVM: Flushing %d Shadow pPTEs: 0x%lx - 0x%lx\n",
-		    vcpu->arch.hpte_cache_count, pa_start, pa_end);
+	trace_kvm_book3s_mmu_flush("p", vcpu, pa_start, pa_end);
 
 	rcu_read_lock();
 
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 06ad93e40648..23f757a69163 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -239,6 +239,29 @@ TRACE_EVENT(kvm_book3s_mmu_invalidate,
 		  __entry->vpage, __entry->raddr, __entry->flags)
 );
 
+TRACE_EVENT(kvm_book3s_mmu_flush,
+	TP_PROTO(const char *type, struct kvm_vcpu *vcpu, unsigned long long p1,
+		 unsigned long long p2),
+	TP_ARGS(type, vcpu, p1, p2),
+
+	TP_STRUCT__entry(
+		__field(	int,			count		)
+		__field(	unsigned long long,	p1		)
+		__field(	unsigned long long,	p2		)
+		__field(	const char *,		type		)
+	),
+
+	TP_fast_assign(
+		__entry->count		= vcpu->arch.hpte_cache_count;
+		__entry->p1		= p1;
+		__entry->p2		= p2;
+		__entry->type		= type;
+	),
+
+	TP_printk("Flush %d %sPTEs: %llx - %llx",
+		  __entry->count, __entry->type, __entry->p1, __entry->p2)
+);
+
 #endif /* CONFIG_PPC_BOOK3S */
 
 #endif /* _TRACE_KVM_H */
-- 
cgit v1.2.3


From 4cb6b7ea0cd085e6613153ad69608cad6421abcc Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 16:08:22 +0200
Subject: KVM: PPC: Preload magic page when in kernel mode

When the guest jumps into kernel mode and has the magic page mapped, theres a
very high chance that it will also use it. So let's detect that scenario and
map the segment accordingly.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 37db61d37041..54ca578239db 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -145,6 +145,16 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 		kvmppc_mmu_flush_segments(vcpu);
 		kvmppc_mmu_map_segment(vcpu, kvmppc_get_pc(vcpu));
+
+		/* Preload magic page segment when in kernel mode */
+		if (!(msr & MSR_PR) && vcpu->arch.magic_page_pa) {
+			struct kvm_vcpu_arch *a = &vcpu->arch;
+
+			if (msr & MSR_DR)
+				kvmppc_mmu_map_segment(vcpu, a->magic_page_ea);
+			else
+				kvmppc_mmu_map_segment(vcpu, a->magic_page_pa);
+		}
 	}
 
 	/* Preload FPU if it's enabled */
-- 
cgit v1.2.3


From 2e602847d9c2d6b487bda62bbbe550db40ca912f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 20:11:39 +0200
Subject: KVM: PPC: Don't flush PTEs on NX/RO hit

When hitting a no-execute or read-only data/inst storage interrupt we were
flushing the respective PTE so we're sure it gets properly overwritten next.

According to the spec, this is unnecessary though. The guest issues a tlbie
anyways, so we're safe to just keep the PTE around and have it manually removed
from the guest, saving us a flush.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 54ca578239db..2fb528f417ff 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -887,7 +887,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			vcpu->arch.shared->msr |=
 				to_svcpu(vcpu)->shadow_srr1 & 0x58000000;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-			kvmppc_mmu_pte_flush(vcpu, kvmppc_get_pc(vcpu), ~0xFFFUL);
 			r = RESUME_GUEST;
 		}
 		break;
@@ -913,7 +912,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			vcpu->arch.shared->dar = dar;
 			vcpu->arch.shared->dsisr = to_svcpu(vcpu)->fault_dsisr;
 			kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
-			kvmppc_mmu_pte_flush(vcpu, dar, ~0xFFFUL);
 			r = RESUME_GUEST;
 		}
 		break;
-- 
cgit v1.2.3


From e7c1d14e3bf40b87e6a3f68964b36dbb2c875c0f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 21:24:48 +0200
Subject: KVM: PPC: Make invalidation code more reliable

There is a race condition in the pte invalidation code path where we can't
be sure if a pte was invalidated already. So let's move the spin lock around
to get rid of the race.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_mmu_hpte.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index bd6a7676d0c8..79751d8dd131 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -92,10 +92,6 @@ static void free_pte_rcu(struct rcu_head *head)
 
 static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
-	/* pte already invalidated? */
-	if (hlist_unhashed(&pte->list_pte))
-		return;
-
 	trace_kvm_book3s_mmu_invalidate(pte);
 
 	/* Different for 32 and 64 bit */
@@ -103,18 +99,24 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 
 	spin_lock(&vcpu->arch.mmu_lock);
 
+	/* pte already invalidated in between? */
+	if (hlist_unhashed(&pte->list_pte)) {
+		spin_unlock(&vcpu->arch.mmu_lock);
+		return;
+	}
+
 	hlist_del_init_rcu(&pte->list_pte);
 	hlist_del_init_rcu(&pte->list_pte_long);
 	hlist_del_init_rcu(&pte->list_vpte);
 	hlist_del_init_rcu(&pte->list_vpte_long);
 
-	spin_unlock(&vcpu->arch.mmu_lock);
-
 	if (pte->pte.may_write)
 		kvm_release_pfn_dirty(pte->pfn);
 	else
 		kvm_release_pfn_clean(pte->pfn);
 
+	spin_unlock(&vcpu->arch.mmu_lock);
+
 	vcpu->arch.hpte_cache_count--;
 	call_rcu(&pte->rcu_head, free_pte_rcu);
 }
-- 
cgit v1.2.3


From 928d78be54014e65498e289fdc3f82acc4b804a9 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 21:25:33 +0200
Subject: KVM: PPC: Move slb debugging to tracepoints

This patch moves debugging printks for shadow SLB debugging over to tracepoints.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_host.c | 22 +++--------
 arch/powerpc/kvm/trace.h              | 73 +++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 17 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index ebb1b5ddabfb..321c931f691c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -33,14 +33,6 @@
 #define PTE_SIZE 12
 #define VSID_ALL 0
 
-/* #define DEBUG_SLB */
-
-#ifdef DEBUG_SLB
-#define dprintk_slb(a, ...) printk(KERN_INFO a, __VA_ARGS__)
-#else
-#define dprintk_slb(a, ...) do { } while(0)
-#endif
-
 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
 	ppc_md.hpte_invalidate(pte->slot, pte->host_va,
@@ -66,20 +58,17 @@ static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 	sid_map_mask = kvmppc_sid_hash(vcpu, gvsid);
 	map = &to_book3s(vcpu)->sid_map[sid_map_mask];
 	if (map->valid && (map->guest_vsid == gvsid)) {
-		dprintk_slb("SLB: Searching: 0x%llx -> 0x%llx\n",
-			    gvsid, map->host_vsid);
+		trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
 		return map;
 	}
 
 	map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - sid_map_mask];
 	if (map->valid && (map->guest_vsid == gvsid)) {
-		dprintk_slb("SLB: Searching 0x%llx -> 0x%llx\n",
-			    gvsid, map->host_vsid);
+		trace_kvm_book3s_slb_found(gvsid, map->host_vsid);
 		return map;
 	}
 
-	dprintk_slb("SLB: Searching %d/%d: 0x%llx -> not found\n",
-		    sid_map_mask, SID_MAP_MASK - sid_map_mask, gvsid);
+	trace_kvm_book3s_slb_fail(sid_map_mask, gvsid);
 	return NULL;
 }
 
@@ -205,8 +194,7 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 	map->guest_vsid = gvsid;
 	map->valid = true;
 
-	dprintk_slb("SLB: New mapping at %d: 0x%llx -> 0x%llx\n",
-		    sid_map_mask, gvsid, map->host_vsid);
+	trace_kvm_book3s_slb_map(sid_map_mask, gvsid, map->host_vsid);
 
 	return map;
 }
@@ -278,7 +266,7 @@ int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
 	to_svcpu(vcpu)->slb[slb_index].esid = slb_esid;
 	to_svcpu(vcpu)->slb[slb_index].vsid = slb_vsid;
 
-	dprintk_slb("slbmte %#llx, %#llx\n", slb_vsid, slb_esid);
+	trace_kvm_book3s_slbmte(slb_vsid, slb_esid);
 
 	return 0;
 }
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 23f757a69163..3aca1b042b8c 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -262,6 +262,79 @@ TRACE_EVENT(kvm_book3s_mmu_flush,
 		  __entry->count, __entry->type, __entry->p1, __entry->p2)
 );
 
+TRACE_EVENT(kvm_book3s_slb_found,
+	TP_PROTO(unsigned long long gvsid, unsigned long long hvsid),
+	TP_ARGS(gvsid, hvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long long,	gvsid		)
+		__field(	unsigned long long,	hvsid		)
+	),
+
+	TP_fast_assign(
+		__entry->gvsid		= gvsid;
+		__entry->hvsid		= hvsid;
+	),
+
+	TP_printk("%llx -> %llx", __entry->gvsid, __entry->hvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_fail,
+	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid),
+	TP_ARGS(sid_map_mask, gvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned short,		sid_map_mask	)
+		__field(	unsigned long long,	gvsid		)
+	),
+
+	TP_fast_assign(
+		__entry->sid_map_mask	= sid_map_mask;
+		__entry->gvsid		= gvsid;
+	),
+
+	TP_printk("%x/%x: %llx", __entry->sid_map_mask,
+		  SID_MAP_MASK - __entry->sid_map_mask, __entry->gvsid)
+);
+
+TRACE_EVENT(kvm_book3s_slb_map,
+	TP_PROTO(u16 sid_map_mask, unsigned long long gvsid,
+		 unsigned long long hvsid),
+	TP_ARGS(sid_map_mask, gvsid, hvsid),
+
+	TP_STRUCT__entry(
+		__field(	unsigned short,		sid_map_mask	)
+		__field(	unsigned long long,	guest_vsid	)
+		__field(	unsigned long long,	host_vsid	)
+	),
+
+	TP_fast_assign(
+		__entry->sid_map_mask	= sid_map_mask;
+		__entry->guest_vsid	= gvsid;
+		__entry->host_vsid	= hvsid;
+	),
+
+	TP_printk("%x: %llx -> %llx", __entry->sid_map_mask,
+		  __entry->guest_vsid, __entry->host_vsid)
+);
+
+TRACE_EVENT(kvm_book3s_slbmte,
+	TP_PROTO(u64 slb_vsid, u64 slb_esid),
+	TP_ARGS(slb_vsid, slb_esid),
+
+	TP_STRUCT__entry(
+		__field(	u64,	slb_vsid	)
+		__field(	u64,	slb_esid	)
+	),
+
+	TP_fast_assign(
+		__entry->slb_vsid	= slb_vsid;
+		__entry->slb_esid	= slb_esid;
+	),
+
+	TP_printk("%llx, %llx", __entry->slb_vsid, __entry->slb_esid)
+);
+
 #endif /* CONFIG_PPC_BOOK3S */
 
 #endif /* _TRACE_KVM_H */
-- 
cgit v1.2.3


From b9877ce2994cc812f00dbb2adb88c1749b6dac86 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 21:48:53 +0200
Subject: KVM: PPC: Revert "KVM: PPC: Use kernel hash function"

It turns out the in-kernel hash function is sub-optimal for our subtle
hash inputs where every bit is significant. So let's revert to the original
hash functions.

This reverts commit 05340ab4f9a6626f7a2e8f9fe5397c61d494f445.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_32_mmu_host.c | 10 ++++++++--
 arch/powerpc/kvm/book3s_64_mmu_host.c | 11 +++++++++--
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 343452cff9b2..57dddeb23b9b 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -19,7 +19,6 @@
  */
 
 #include <linux/kvm_host.h>
-#include <linux/hash.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -77,7 +76,14 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
  * a hash, so we don't waste cycles on looping */
 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
 {
-	return hash_64(gvsid, SID_MAP_BITS);
+	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
 }
 
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 321c931f691c..e7c4d00b99cf 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -20,7 +20,6 @@
  */
 
 #include <linux/kvm_host.h>
-#include <linux/hash.h>
 
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
@@ -44,9 +43,17 @@ void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
  * a hash, so we don't waste cycles on looping */
 static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
 {
-	return hash_64(gvsid, SID_MAP_BITS);
+	return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+		     ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
 }
 
+
 static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
 {
 	struct kvmppc_sid_map *map;
-- 
cgit v1.2.3


From cb24c50826e0722bffb0674f088954cd4980818b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 22:05:00 +0200
Subject: KVM: PPC: Remove unused define

The define VSID_ALL is unused. Let's remove it.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_host.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index e7c4d00b99cf..4040c8d16ad5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -30,7 +30,6 @@
 #include "trace.h"
 
 #define PTE_SIZE 12
-#define VSID_ALL 0
 
 void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 {
-- 
cgit v1.2.3


From 7508e16c9f2a20f7721d7bc47c33a7b34c873a2c Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 3 Aug 2010 11:32:56 +0200
Subject: KVM: PPC: Add feature bitmap for magic page

We will soon add SR PV support to the shared page, so we need some
infrastructure that allows the guest to query for features KVM exports.

This patch adds a second return value to the magic mapping that
indicated to the guest which features are available.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_para.h |  2 ++
 arch/powerpc/kernel/kvm.c           | 21 +++++++++++++++------
 arch/powerpc/kvm/powerpc.c          |  5 ++++-
 3 files changed, 21 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 7438ab360120..43c1b2260af8 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -47,6 +47,8 @@ struct kvm_vcpu_arch_shared {
 
 #define KVM_FEATURE_MAGIC_PAGE	1
 
+#define KVM_MAGIC_FEAT_SR	(1 << 0)
+
 #ifdef __KERNEL__
 
 #ifdef CONFIG_KVM_GUEST
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index d3a2cc50d611..226882fe85a6 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -266,12 +266,20 @@ static void kvm_patch_ins_wrteei(u32 *inst)
 
 static void kvm_map_magic_page(void *data)
 {
-	kvm_hypercall2(KVM_HC_PPC_MAP_MAGIC_PAGE,
-		       KVM_MAGIC_PAGE,  /* Physical Address */
-		       KVM_MAGIC_PAGE); /* Effective Address */
+	u32 *features = data;
+
+	ulong in[8];
+	ulong out[8];
+
+	in[0] = KVM_MAGIC_PAGE;
+	in[1] = KVM_MAGIC_PAGE;
+
+	kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
+
+	*features = out[0];
 }
 
-static void kvm_check_ins(u32 *inst)
+static void kvm_check_ins(u32 *inst, u32 features)
 {
 	u32 _inst = *inst;
 	u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -367,9 +375,10 @@ static void kvm_use_magic_page(void)
 	u32 *p;
 	u32 *start, *end;
 	u32 tmp;
+	u32 features;
 
 	/* Tell the host to map the magic page to -4096 on all CPUs */
-	on_each_cpu(kvm_map_magic_page, NULL, 1);
+	on_each_cpu(kvm_map_magic_page, &features, 1);
 
 	/* Quick self-test to see if the mapping works */
 	if (__get_user(tmp, (u32*)KVM_MAGIC_PAGE)) {
@@ -382,7 +391,7 @@ static void kvm_use_magic_page(void)
 	end = (void*)_etext;
 
 	for (p = start; p < end; p++)
-		kvm_check_ins(p);
+		kvm_check_ins(p, features);
 
 	printk(KERN_INFO "KVM: Live patching for a fast VM %s\n",
 			 kvm_patching_worked ? "worked" : "failed");
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6a53a3f86dae..496d7a5200dc 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -66,6 +66,8 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 		vcpu->arch.magic_page_pa = param1;
 		vcpu->arch.magic_page_ea = param2;
 
+		r2 = 0;
+
 		r = HC_EV_SUCCESS;
 		break;
 	}
@@ -76,13 +78,14 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 #endif
 
 		/* Second return value is in r4 */
-		kvmppc_set_gpr(vcpu, 4, r2);
 		break;
 	default:
 		r = HC_EV_UNIMPLEMENTED;
 		break;
 	}
 
+	kvmppc_set_gpr(vcpu, 4, r2);
+
 	return r;
 }
 
-- 
cgit v1.2.3


From c1c88e2fa16f979ba3e99018a53962abe852b30f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 2 Aug 2010 23:23:04 +0200
Subject: KVM: PPC: Move BAT handling code into spr handler

The current approach duplicates the spr->bat finding logic and makes it harder
to reuse the actually used variables. So let's move everything down to the spr
handler.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_emulate.c | 48 +++++++++++++--------------------------
 1 file changed, 16 insertions(+), 32 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index f333cb445349..466846557089 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -264,7 +264,7 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper,
 	}
 }
 
-static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
+static struct kvmppc_bat *kvmppc_find_bat(struct kvm_vcpu *vcpu, int sprn)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
 	struct kvmppc_bat *bat;
@@ -286,35 +286,7 @@ static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn)
 		BUG();
 	}
 
-	if (sprn % 2)
-		return bat->raw >> 32;
-	else
-		return bat->raw;
-}
-
-static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
-{
-	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
-	struct kvmppc_bat *bat;
-
-	switch (sprn) {
-	case SPRN_IBAT0U ... SPRN_IBAT3L:
-		bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2];
-		break;
-	case SPRN_IBAT4U ... SPRN_IBAT7L:
-		bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)];
-		break;
-	case SPRN_DBAT0U ... SPRN_DBAT3L:
-		bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2];
-		break;
-	case SPRN_DBAT4U ... SPRN_DBAT7L:
-		bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)];
-		break;
-	default:
-		BUG();
-	}
-
-	kvmppc_set_bat(vcpu, bat, !(sprn % 2), val);
+	return bat;
 }
 
 int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
@@ -339,12 +311,16 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
 	case SPRN_IBAT4U ... SPRN_IBAT7L:
 	case SPRN_DBAT0U ... SPRN_DBAT3L:
 	case SPRN_DBAT4U ... SPRN_DBAT7L:
-		kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
+	{
+		struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+		kvmppc_set_bat(vcpu, bat, !(sprn % 2), (u32)spr_val);
 		/* BAT writes happen so rarely that we're ok to flush
 		 * everything here */
 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
 		kvmppc_mmu_flush_segments(vcpu);
 		break;
+	}
 	case SPRN_HID0:
 		to_book3s(vcpu)->hid[0] = spr_val;
 		break;
@@ -434,8 +410,16 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	case SPRN_IBAT4U ... SPRN_IBAT7L:
 	case SPRN_DBAT0U ... SPRN_DBAT3L:
 	case SPRN_DBAT4U ... SPRN_DBAT7L:
-		kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn));
+	{
+		struct kvmppc_bat *bat = kvmppc_find_bat(vcpu, sprn);
+
+		if (sprn % 2)
+			kvmppc_set_gpr(vcpu, rt, bat->raw >> 32);
+		else
+			kvmppc_set_gpr(vcpu, rt, bat->raw);
+
 		break;
+	}
 	case SPRN_SDR1:
 		kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
 		break;
-- 
cgit v1.2.3


From 8e8651783ff2458f31098be7c2abacf2fcab054a Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 3 Aug 2010 01:06:11 +0200
Subject: KVM: PPC: Interpret SR registers on demand

Right now we're examining the contents of Book3s_32's segment registers when
the register is written and put the interpreted contents into a struct.

There are two reasons this is bad. For starters, the struct has worse real-time
performance, as it occupies more ram. But the more important part is that with
segment registers being interpreted from their raw values, we can put them in
the shared page, allowing guests to mess with them directly.

This patch makes the internal representation of SRs be u32s.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h | 11 +----
 arch/powerpc/kvm/book3s.c             |  4 +-
 arch/powerpc/kvm/book3s_32_mmu.c      | 79 +++++++++++++++++++----------------
 3 files changed, 46 insertions(+), 48 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f04f516c97da..08846520220c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -38,15 +38,6 @@ struct kvmppc_slb {
 	bool class	: 1;
 };
 
-struct kvmppc_sr {
-	u32 raw;
-	u32 vsid;
-	bool Ks		: 1;
-	bool Kp		: 1;
-	bool nx		: 1;
-	bool valid	: 1;
-};
-
 struct kvmppc_bat {
 	u64 raw;
 	u32 bepi;
@@ -79,7 +70,7 @@ struct kvmppc_vcpu_book3s {
 		u64 vsid;
 	} slb_shadow[64];
 	u8 slb_shadow_max;
-	struct kvmppc_sr sr[16];
+	u32 sr[16];
 	struct kvmppc_bat ibat[8];
 	struct kvmppc_bat dbat[8];
 	u64 hid[6];
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 2fb528f417ff..34472afbb3ec 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1162,8 +1162,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 		}
 	} else {
 		for (i = 0; i < 16; i++) {
-			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
-			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i].raw;
+			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i];
+			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i];
 		}
 		for (i = 0; i < 8; i++) {
 			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 5bf4bf8c9e65..d4ff76fd1ff9 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -58,14 +58,39 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static inline u32 sr_vsid(u32 sr_raw)
+{
+	return sr_raw & 0x0fffffff;
+}
+
+static inline bool sr_valid(u32 sr_raw)
+{
+	return (sr_raw & 0x80000000) ? false : true;
+}
+
+static inline bool sr_ks(u32 sr_raw)
+{
+	return (sr_raw & 0x40000000) ? true: false;
+}
+
+static inline bool sr_kp(u32 sr_raw)
+{
+	return (sr_raw & 0x20000000) ? true: false;
+}
+
+static inline bool sr_nx(u32 sr_raw)
+{
+	return (sr_raw & 0x10000000) ? true: false;
+}
+
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  struct kvmppc_pte *pte, bool data);
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 					     u64 *vsid);
 
-static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
+static u32 find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
 {
-	return &vcpu_book3s->sr[(eaddr >> 28) & 0xf];
+	return vcpu_book3s->sr[(eaddr >> 28) & 0xf];
 }
 
 static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -87,7 +112,7 @@ static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu)
 }
 
 static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3s,
-				      struct kvmppc_sr *sre, gva_t eaddr,
+				      u32 sre, gva_t eaddr,
 				      bool primary)
 {
 	u32 page, hash, pteg, htabmask;
@@ -96,7 +121,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
 	page = (eaddr & 0x0FFFFFFF) >> 12;
 	htabmask = ((vcpu_book3s->sdr1 & 0x1FF) << 16) | 0xFFC0;
 
-	hash = ((sre->vsid ^ page) << 6);
+	hash = ((sr_vsid(sre) ^ page) << 6);
 	if (!primary)
 		hash = ~hash;
 	hash &= htabmask;
@@ -105,7 +130,7 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
 
 	dprintk("MMU: pc=0x%lx eaddr=0x%lx sdr1=0x%llx pteg=0x%x vsid=0x%x\n",
 		kvmppc_get_pc(&vcpu_book3s->vcpu), eaddr, vcpu_book3s->sdr1, pteg,
-		sre->vsid);
+		sr_vsid(sre));
 
 	r = gfn_to_hva(vcpu_book3s->vcpu.kvm, pteg >> PAGE_SHIFT);
 	if (kvm_is_error_hva(r))
@@ -113,10 +138,9 @@ static hva_t kvmppc_mmu_book3s_32_get_pteg(struct kvmppc_vcpu_book3s *vcpu_book3
 	return r | (pteg & ~PAGE_MASK);
 }
 
-static u32 kvmppc_mmu_book3s_32_get_ptem(struct kvmppc_sr *sre, gva_t eaddr,
-				    bool primary)
+static u32 kvmppc_mmu_book3s_32_get_ptem(u32 sre, gva_t eaddr, bool primary)
 {
-	return ((eaddr & 0x0fffffff) >> 22) | (sre->vsid << 7) |
+	return ((eaddr & 0x0fffffff) >> 22) | (sr_vsid(sre) << 7) |
 	       (primary ? 0 : 0x40) | 0x80000000;
 }
 
@@ -180,7 +204,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 				     bool primary)
 {
 	struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
-	struct kvmppc_sr *sre;
+	u32 sre;
 	hva_t ptegp;
 	u32 pteg[16];
 	u32 ptem = 0;
@@ -190,7 +214,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 	sre = find_sr(vcpu_book3s, eaddr);
 
 	dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
-		    sre->vsid, sre->raw);
+		    sr_vsid(sre), sre);
 
 	pte->vpage = kvmppc_mmu_book3s_32_ea_to_vp(vcpu, eaddr, data);
 
@@ -214,8 +238,8 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 			pte->raddr = (pteg[i+1] & ~(0xFFFULL)) | (eaddr & 0xFFF);
 			pp = pteg[i+1] & 3;
 
-			if ((sre->Kp &&  (vcpu->arch.shared->msr & MSR_PR)) ||
-			    (sre->Ks && !(vcpu->arch.shared->msr & MSR_PR)))
+			if ((sr_kp(sre) &&  (vcpu->arch.shared->msr & MSR_PR)) ||
+			    (sr_ks(sre) && !(vcpu->arch.shared->msr & MSR_PR)))
 				pp |= 4;
 
 			pte->may_write = false;
@@ -311,30 +335,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
 {
-	return to_book3s(vcpu)->sr[srnum].raw;
+	return to_book3s(vcpu)->sr[srnum];
 }
 
 static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 					ulong value)
 {
-	struct kvmppc_sr *sre;
-
-	sre = &to_book3s(vcpu)->sr[srnum];
-
-	/* Flush any left-over shadows from the previous SR */
-
-	/* XXX Not necessary? */
-	/* kvmppc_mmu_pte_flush(vcpu, ((u64)sre->vsid) << 28, 0xf0000000ULL); */
-
-	/* And then put in the new SR */
-	sre->raw = value;
-	sre->vsid = (value & 0x0fffffff);
-	sre->valid = (value & 0x80000000) ? false : true;
-	sre->Ks = (value & 0x40000000) ? true : false;
-	sre->Kp = (value & 0x20000000) ? true : false;
-	sre->nx = (value & 0x10000000) ? true : false;
-
-	/* Map the new segment */
+	to_book3s(vcpu)->sr[srnum] = value;
 	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
 }
 
@@ -347,13 +354,13 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 					     u64 *vsid)
 {
 	ulong ea = esid << SID_SHIFT;
-	struct kvmppc_sr *sr;
+	u32 sr;
 	u64 gvsid = esid;
 
 	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
 		sr = find_sr(to_book3s(vcpu), ea);
-		if (sr->valid)
-			gvsid = sr->vsid;
+		if (sr_valid(sr))
+			gvsid = sr_vsid(sr);
 	}
 
 	/* In case we only have one of MSR_IR or MSR_DR set, let's put
@@ -370,8 +377,8 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 		*vsid = VSID_REAL_DR | gvsid;
 		break;
 	case MSR_DR|MSR_IR:
-		if (sr->valid)
-			*vsid = sr->vsid;
+		if (sr_valid(sr))
+			*vsid = sr_vsid(sr);
 		else
 			*vsid = VSID_BAT | gvsid;
 		break;
-- 
cgit v1.2.3


From df1bfa25d81f9451715ccbbb67551e0f792ceec8 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 3 Aug 2010 02:29:27 +0200
Subject: KVM: PPC: Put segment registers in shared page

Now that the actual mtsr doesn't do anything anymore, we can move the sr
contents over to the shared page, so a guest can directly read and write
its sr contents from guest context.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 -
 arch/powerpc/include/asm/kvm_para.h   |  1 +
 arch/powerpc/kvm/book3s.c             |  7 +++----
 arch/powerpc/kvm/book3s_32_mmu.c      | 12 ++++++------
 arch/powerpc/kvm/powerpc.c            |  2 +-
 5 files changed, 11 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 08846520220c..be8aac24ba83 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -70,7 +70,6 @@ struct kvmppc_vcpu_book3s {
 		u64 vsid;
 	} slb_shadow[64];
 	u8 slb_shadow_max;
-	u32 sr[16];
 	struct kvmppc_bat ibat[8];
 	struct kvmppc_bat dbat[8];
 	u64 hid[6];
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 43c1b2260af8..d79fd0910964 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -38,6 +38,7 @@ struct kvm_vcpu_arch_shared {
 	__u64 msr;
 	__u32 dsisr;
 	__u32 int_pending;	/* Tells the guest if we have an interrupt */
+	__u32 sr[16];
 };
 
 #define KVM_SC_MAGIC_R0		0x4b564d21 /* "KVM!" */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 34472afbb3ec..02a9cb165d53 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -1161,10 +1161,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 			sregs->u.s.ppc64.slb[i].slbv = vcpu3s->slb[i].origv;
 		}
 	} else {
-		for (i = 0; i < 16; i++) {
-			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i];
-			sregs->u.s.ppc32.sr[i] = vcpu3s->sr[i];
-		}
+		for (i = 0; i < 16; i++)
+			sregs->u.s.ppc32.sr[i] = vcpu->arch.shared->sr[i];
+
 		for (i = 0; i < 8; i++) {
 			sregs->u.s.ppc32.ibat[i] = vcpu3s->ibat[i].raw;
 			sregs->u.s.ppc32.dbat[i] = vcpu3s->dbat[i].raw;
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index d4ff76fd1ff9..c8cefdd15fd8 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -88,9 +88,9 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 					     u64 *vsid);
 
-static u32 find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr)
+static u32 find_sr(struct kvm_vcpu *vcpu, gva_t eaddr)
 {
-	return vcpu_book3s->sr[(eaddr >> 28) & 0xf];
+	return vcpu->arch.shared->sr[(eaddr >> 28) & 0xf];
 }
 
 static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
@@ -211,7 +211,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr,
 	int i;
 	int found = 0;
 
-	sre = find_sr(vcpu_book3s, eaddr);
+	sre = find_sr(vcpu, eaddr);
 
 	dprintk_pte("SR 0x%lx: vsid=0x%x, raw=0x%x\n", eaddr >> 28,
 		    sr_vsid(sre), sre);
@@ -335,13 +335,13 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 
 static u32 kvmppc_mmu_book3s_32_mfsrin(struct kvm_vcpu *vcpu, u32 srnum)
 {
-	return to_book3s(vcpu)->sr[srnum];
+	return vcpu->arch.shared->sr[srnum];
 }
 
 static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum,
 					ulong value)
 {
-	to_book3s(vcpu)->sr[srnum] = value;
+	vcpu->arch.shared->sr[srnum] = value;
 	kvmppc_mmu_map_segment(vcpu, srnum << SID_SHIFT);
 }
 
@@ -358,7 +358,7 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, ulong esid,
 	u64 gvsid = esid;
 
 	if (vcpu->arch.shared->msr & (MSR_DR|MSR_IR)) {
-		sr = find_sr(to_book3s(vcpu), ea);
+		sr = find_sr(vcpu, ea);
 		if (sr_valid(sr))
 			gvsid = sr_vsid(sr);
 	}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 496d7a5200dc..028891c0baff 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -66,7 +66,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 		vcpu->arch.magic_page_pa = param1;
 		vcpu->arch.magic_page_ea = param2;
 
-		r2 = 0;
+		r2 = KVM_MAGIC_FEAT_SR;
 
 		r = HC_EV_SUCCESS;
 		break;
-- 
cgit v1.2.3


From cbe487fac7fc016dbabbcbe83f11384e1803a56d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 3 Aug 2010 10:39:35 +0200
Subject: KVM: PPC: Add mtsrin PV code

This is the guest side of the mtsr acceleration. Using this a guest can now
call mtsrin with almost no overhead as long as it ensures that it only uses
it with (MSR_IR|MSR_DR) == 0. Linux does that, so we're good.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/asm-offsets.c |  1 +
 arch/powerpc/kernel/kvm.c         | 60 +++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kernel/kvm_emul.S    | 50 ++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6d92b4e13ebf..7f0d6fcc28a3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -478,6 +478,7 @@ int main(void)
 	DEFINE(KVM_MAGIC_MSR, offsetof(struct kvm_vcpu_arch_shared, msr));
 	DEFINE(KVM_MAGIC_CRITICAL, offsetof(struct kvm_vcpu_arch_shared,
 					    critical));
+	DEFINE(KVM_MAGIC_SR, offsetof(struct kvm_vcpu_arch_shared, sr));
 #endif
 
 #ifdef CONFIG_44x
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 226882fe85a6..c8bab24ff8ac 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -42,6 +42,7 @@
 #define KVM_INST_B_MAX		0x01ffffff
 
 #define KVM_MASK_RT		0x03e00000
+#define KVM_MASK_RB		0x0000f800
 #define KVM_INST_MFMSR		0x7c0000a6
 #define KVM_INST_MFSPR_SPRG0	0x7c1042a6
 #define KVM_INST_MFSPR_SPRG1	0x7c1142a6
@@ -69,6 +70,8 @@
 #define KVM_INST_WRTEEI_0	0x7c000146
 #define KVM_INST_WRTEEI_1	0x7c008146
 
+#define KVM_INST_MTSRIN		0x7c0001e4
+
 static bool kvm_patching_worked = true;
 static char kvm_tmp[1024 * 1024];
 static int kvm_tmp_index;
@@ -264,6 +267,51 @@ static void kvm_patch_ins_wrteei(u32 *inst)
 
 #endif
 
+#ifdef CONFIG_PPC_BOOK3S_32
+
+extern u32 kvm_emulate_mtsrin_branch_offs;
+extern u32 kvm_emulate_mtsrin_reg1_offs;
+extern u32 kvm_emulate_mtsrin_reg2_offs;
+extern u32 kvm_emulate_mtsrin_orig_ins_offs;
+extern u32 kvm_emulate_mtsrin_len;
+extern u32 kvm_emulate_mtsrin[];
+
+static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+{
+	u32 *p;
+	int distance_start;
+	int distance_end;
+	ulong next_inst;
+
+	p = kvm_alloc(kvm_emulate_mtsrin_len * 4);
+	if (!p)
+		return;
+
+	/* Find out where we are and put everything there */
+	distance_start = (ulong)p - (ulong)inst;
+	next_inst = ((ulong)inst + 4);
+	distance_end = next_inst - (ulong)&p[kvm_emulate_mtsrin_branch_offs];
+
+	/* Make sure we only write valid b instructions */
+	if (distance_start > KVM_INST_B_MAX) {
+		kvm_patching_worked = false;
+		return;
+	}
+
+	/* Modify the chunk to fit the invocation */
+	memcpy(p, kvm_emulate_mtsrin, kvm_emulate_mtsrin_len * 4);
+	p[kvm_emulate_mtsrin_branch_offs] |= distance_end & KVM_INST_B_MASK;
+	p[kvm_emulate_mtsrin_reg1_offs] |= (rb << 10);
+	p[kvm_emulate_mtsrin_reg2_offs] |= rt;
+	p[kvm_emulate_mtsrin_orig_ins_offs] = *inst;
+	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtsrin_len * 4);
+
+	/* Patch the invocation */
+	kvm_patch_ins_b(inst, distance_start);
+}
+
+#endif
+
 static void kvm_map_magic_page(void *data)
 {
 	u32 *features = data;
@@ -360,6 +408,18 @@ static void kvm_check_ins(u32 *inst, u32 features)
 		break;
 	}
 
+	switch (inst_no_rt & ~KVM_MASK_RB) {
+#ifdef CONFIG_PPC_BOOK3S_32
+	case KVM_INST_MTSRIN:
+		if (features & KVM_MAGIC_FEAT_SR) {
+			u32 inst_rb = _inst & KVM_MASK_RB;
+			kvm_patch_ins_mtsrin(inst, inst_rt, inst_rb);
+		}
+		break;
+		break;
+#endif
+	}
+
 	switch (_inst) {
 #ifdef CONFIG_BOOKE
 	case KVM_INST_WRTEEI_0:
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index 3199f65ede2c..a6e97e7a55e0 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -245,3 +245,53 @@ kvm_emulate_wrteei_ee_offs:
 .global kvm_emulate_wrteei_len
 kvm_emulate_wrteei_len:
 	.long (kvm_emulate_wrteei_end - kvm_emulate_wrteei) / 4
+
+
+.global kvm_emulate_mtsrin
+kvm_emulate_mtsrin:
+
+	SCRATCH_SAVE
+
+	LL64(r31, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+	andi.	r31, r31, MSR_DR | MSR_IR
+	beq	kvm_emulate_mtsrin_reg1
+
+	SCRATCH_RESTORE
+
+kvm_emulate_mtsrin_orig_ins:
+	nop
+	b	kvm_emulate_mtsrin_branch
+
+kvm_emulate_mtsrin_reg1:
+	/* rX >> 26 */
+	rlwinm  r30,r0,6,26,29
+
+kvm_emulate_mtsrin_reg2:
+	stw	r0, (KVM_MAGIC_PAGE + KVM_MAGIC_SR)(r30)
+
+	SCRATCH_RESTORE
+
+	/* Go back to caller */
+kvm_emulate_mtsrin_branch:
+	b	.
+kvm_emulate_mtsrin_end:
+
+.global kvm_emulate_mtsrin_branch_offs
+kvm_emulate_mtsrin_branch_offs:
+	.long (kvm_emulate_mtsrin_branch - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg1_offs
+kvm_emulate_mtsrin_reg1_offs:
+	.long (kvm_emulate_mtsrin_reg1 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_reg2_offs
+kvm_emulate_mtsrin_reg2_offs:
+	.long (kvm_emulate_mtsrin_reg2 - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_orig_ins_offs
+kvm_emulate_mtsrin_orig_ins_offs:
+	.long (kvm_emulate_mtsrin_orig_ins - kvm_emulate_mtsrin) / 4
+
+.global kvm_emulate_mtsrin_len
+kvm_emulate_mtsrin_len:
+	.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
-- 
cgit v1.2.3


From 512ba59ed9c580b5e5575beda0041bb19a641127 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 5 Aug 2010 11:26:04 +0200
Subject: KVM: PPC: Make PV mtmsr work with r30 and r31

So far we've been restricting ourselves to r0-r29 as registers an mtmsr
instruction could use. This was bad, as there are some code paths in
Linux actually using r30.

So let's instead handle all registers gracefully and get rid of that
stupid limitation

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/kvm.c      | 39 ++++++++++++++++++++++++++++++++-------
 arch/powerpc/kernel/kvm_emul.S | 17 ++++++++---------
 2 files changed, 40 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index c8bab24ff8ac..10b681c092ed 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -42,6 +42,7 @@
 #define KVM_INST_B_MAX		0x01ffffff
 
 #define KVM_MASK_RT		0x03e00000
+#define KVM_RT_30		0x03c00000
 #define KVM_MASK_RB		0x0000f800
 #define KVM_INST_MFMSR		0x7c0000a6
 #define KVM_INST_MFSPR_SPRG0	0x7c1042a6
@@ -82,6 +83,15 @@ static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
 	flush_icache_range((ulong)inst, (ulong)inst + 4);
 }
 
+static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+{
+#ifdef CONFIG_64BIT
+	kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
+#else
+	kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000fffc));
+#endif
+}
+
 static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
 {
 #ifdef CONFIG_64BIT
@@ -186,7 +196,6 @@ static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
 extern u32 kvm_emulate_mtmsr_branch_offs;
 extern u32 kvm_emulate_mtmsr_reg1_offs;
 extern u32 kvm_emulate_mtmsr_reg2_offs;
-extern u32 kvm_emulate_mtmsr_reg3_offs;
 extern u32 kvm_emulate_mtmsr_orig_ins_offs;
 extern u32 kvm_emulate_mtmsr_len;
 extern u32 kvm_emulate_mtmsr[];
@@ -216,9 +225,27 @@ static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
 	/* Modify the chunk to fit the invocation */
 	memcpy(p, kvm_emulate_mtmsr, kvm_emulate_mtmsr_len * 4);
 	p[kvm_emulate_mtmsr_branch_offs] |= distance_end & KVM_INST_B_MASK;
-	p[kvm_emulate_mtmsr_reg1_offs] |= rt;
-	p[kvm_emulate_mtmsr_reg2_offs] |= rt;
-	p[kvm_emulate_mtmsr_reg3_offs] |= rt;
+
+	/* Make clobbered registers work too */
+	switch (get_rt(rt)) {
+	case 30:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+				 magic_var(scratch2), KVM_RT_30);
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+				 magic_var(scratch2), KVM_RT_30);
+		break;
+	case 31:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg1_offs],
+				 magic_var(scratch1), KVM_RT_30);
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsr_reg2_offs],
+				 magic_var(scratch1), KVM_RT_30);
+		break;
+	default:
+		p[kvm_emulate_mtmsr_reg1_offs] |= rt;
+		p[kvm_emulate_mtmsr_reg2_offs] |= rt;
+		break;
+	}
+
 	p[kvm_emulate_mtmsr_orig_ins_offs] = *inst;
 	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsr_len * 4);
 
@@ -402,9 +429,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
 		break;
 	case KVM_INST_MTMSR:
 	case KVM_INST_MTMSRD_L0:
-		/* We use r30 and r31 during the hook */
-		if (get_rt(inst_rt) < 30)
-			kvm_patch_ins_mtmsr(inst, inst_rt);
+		kvm_patch_ins_mtmsr(inst, inst_rt);
 		break;
 	}
 
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index a6e97e7a55e0..65305325250b 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -135,7 +135,8 @@ kvm_emulate_mtmsr:
 
 	/* Find the changed bits between old and new MSR */
 kvm_emulate_mtmsr_reg1:
-	xor	r31, r0, r31
+	ori	r30, r0, 0
+	xor	r31, r30, r31
 
 	/* Check if we need to really do mtmsr */
 	LOAD_REG_IMMEDIATE(r30, MSR_CRITICAL_BITS)
@@ -156,14 +157,17 @@ kvm_emulate_mtmsr_orig_ins:
 
 maybe_stay_in_guest:
 
+	/* Get the target register in r30 */
+kvm_emulate_mtmsr_reg2:
+	ori	r30, r0, 0
+
 	/* Check if we have to fetch an interrupt */
 	lwz	r31, (KVM_MAGIC_PAGE + KVM_MAGIC_INT)(0)
 	cmpwi	r31, 0
 	beq+	no_mtmsr
 
 	/* Check if we may trigger an interrupt */
-kvm_emulate_mtmsr_reg2:
-	andi.	r31, r0, MSR_EE
+	andi.	r31, r30, MSR_EE
 	beq	no_mtmsr
 
 	b	do_mtmsr
@@ -171,8 +175,7 @@ kvm_emulate_mtmsr_reg2:
 no_mtmsr:
 
 	/* Put MSR into magic page because we don't call mtmsr */
-kvm_emulate_mtmsr_reg3:
-	STL64(r0, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
+	STL64(r30, KVM_MAGIC_PAGE + KVM_MAGIC_MSR, 0)
 
 	SCRATCH_RESTORE
 
@@ -193,10 +196,6 @@ kvm_emulate_mtmsr_reg1_offs:
 kvm_emulate_mtmsr_reg2_offs:
 	.long (kvm_emulate_mtmsr_reg2 - kvm_emulate_mtmsr) / 4
 
-.global kvm_emulate_mtmsr_reg3_offs
-kvm_emulate_mtmsr_reg3_offs:
-	.long (kvm_emulate_mtmsr_reg3 - kvm_emulate_mtmsr) / 4
-
 .global kvm_emulate_mtmsr_orig_ins_offs
 kvm_emulate_mtmsr_orig_ins_offs:
 	.long (kvm_emulate_mtmsr_orig_ins - kvm_emulate_mtmsr) / 4
-- 
cgit v1.2.3


From 9ee18b1e08e6a5648aeaaf998eabc72b5304cc17 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 5 Aug 2010 12:24:40 +0200
Subject: KVM: PPC: Update int_pending also on dequeue

When having a decrementor interrupt pending, the dequeuing happens manually
through an mtdec instruction. This instruction simply calls dequeue on that
interrupt, so the int_pending hint doesn't get updated.

This patch enables updating the int_pending hint also on dequeue, thus
correctly enabling guests to stay in guest contexts more often.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 02a9cb165d53..7adea6320654 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -201,6 +201,9 @@ static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 {
 	clear_bit(kvmppc_book3s_vec2irqprio(vec),
 		  &vcpu->arch.pending_exceptions);
+
+	if (!vcpu->arch.pending_exceptions)
+		vcpu->arch.shared->int_pending = 0;
 }
 
 void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
-- 
cgit v1.2.3


From df08bd10266ce6132278f6b4ddc4bb0a12330b56 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 5 Aug 2010 15:44:41 +0200
Subject: KVM: PPC: Make PV mtmsrd L=1 work with r30 and r31

We had an arbitrary limitation in mtmsrd L=1 that kept us from using r30 and
r31 as input registers. Let's get rid of that and get more potential speedups!

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/kvm.c      | 21 +++++++++++++++++----
 arch/powerpc/kernel/kvm_emul.S |  8 +++++++-
 2 files changed, 24 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 10b681c092ed..48a033865410 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -158,6 +158,7 @@ static u32 *kvm_alloc(int len)
 
 extern u32 kvm_emulate_mtmsrd_branch_offs;
 extern u32 kvm_emulate_mtmsrd_reg_offs;
+extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
 extern u32 kvm_emulate_mtmsrd_len;
 extern u32 kvm_emulate_mtmsrd[];
 
@@ -186,7 +187,21 @@ static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
 	/* Modify the chunk to fit the invocation */
 	memcpy(p, kvm_emulate_mtmsrd, kvm_emulate_mtmsrd_len * 4);
 	p[kvm_emulate_mtmsrd_branch_offs] |= distance_end & KVM_INST_B_MASK;
-	p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+	switch (get_rt(rt)) {
+	case 30:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+				 magic_var(scratch2), KVM_RT_30);
+		break;
+	case 31:
+		kvm_patch_ins_ll(&p[kvm_emulate_mtmsrd_reg_offs],
+				 magic_var(scratch1), KVM_RT_30);
+		break;
+	default:
+		p[kvm_emulate_mtmsrd_reg_offs] |= rt;
+		break;
+	}
+
+	p[kvm_emulate_mtmsrd_orig_ins_offs] = *inst;
 	flush_icache_range((ulong)p, (ulong)p + kvm_emulate_mtmsrd_len * 4);
 
 	/* Patch the invocation */
@@ -423,9 +438,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
 
 	/* Rewrites */
 	case KVM_INST_MTMSRD_L1:
-		/* We use r30 and r31 during the hook */
-		if (get_rt(inst_rt) < 30)
-			kvm_patch_ins_mtmsrd(inst, inst_rt);
+		kvm_patch_ins_mtmsrd(inst, inst_rt);
 		break;
 	case KVM_INST_MTMSR:
 	case KVM_INST_MTMSRD_L0:
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index 65305325250b..f2b1b2523e61 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -78,7 +78,8 @@ kvm_emulate_mtmsrd:
 
 	/* OR the register's (MSR_EE|MSR_RI) on MSR */
 kvm_emulate_mtmsrd_reg:
-	andi.	r30, r0, (MSR_EE|MSR_RI)
+	ori	r30, r0, 0
+	andi.	r30, r30, (MSR_EE|MSR_RI)
 	or	r31, r31, r30
 
 	/* Put MSR back into magic page */
@@ -96,6 +97,7 @@ kvm_emulate_mtmsrd_reg:
 	SCRATCH_RESTORE
 
 	/* Nag hypervisor */
+kvm_emulate_mtmsrd_orig_ins:
 	tlbsync
 
 	b	kvm_emulate_mtmsrd_branch
@@ -117,6 +119,10 @@ kvm_emulate_mtmsrd_branch_offs:
 kvm_emulate_mtmsrd_reg_offs:
 	.long (kvm_emulate_mtmsrd_reg - kvm_emulate_mtmsrd) / 4
 
+.global kvm_emulate_mtmsrd_orig_ins_offs
+kvm_emulate_mtmsrd_orig_ins_offs:
+	.long (kvm_emulate_mtmsrd_orig_ins - kvm_emulate_mtmsrd) / 4
+
 .global kvm_emulate_mtmsrd_len
 kvm_emulate_mtmsrd_len:
 	.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
-- 
cgit v1.2.3


From ad0873763a83e7b31ba87a85ec2027dd6a9d7b55 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 17 Aug 2010 11:41:44 +0200
Subject: KVM: PPC: Force enable nap on KVM

There are some heuristics in the PPC power management code that try to find
out if the particular hardware we're running on supports proper power management
or just hangs the machine when going into nap mode.

Since we know that KVM is safe with nap, let's force enable it in the PV code
once we're certain that we are on a KVM VM.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/kvm.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 48a033865410..669d989be1d6 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -582,6 +582,9 @@ static int __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
 		kvm_use_magic_page();
 
+	/* Enable napping */
+	powersave_nap = 1;
+
 free_tmp:
 	kvm_free_tmp();
 
-- 
cgit v1.2.3


From 8b6db3bc965c204db6868d4005808b4fdc9c46d7 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 15 Aug 2010 08:04:24 +0200
Subject: KVM: PPC: Implement correct SID mapping on Book3s_32

Up until now we were doing segment mappings wrong on Book3s_32. For Book3s_64
we were using a trick where we know that a single mmu_context gives us 16 bits
of context ids.

The mm system on Book3s_32 instead uses a clever algorithm to distribute VSIDs
across the available range, so a context id really only gives us 16 available
VSIDs.

To keep at least a few guest processes in the SID shadow, let's map a number of
contexts that we can use as VSID pool. This makes the code be actually correct
and shouldn't hurt performance too much.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h | 15 +++++++--
 arch/powerpc/kvm/book3s_32_mmu_host.c | 57 +++++++++++++++++++----------------
 arch/powerpc/kvm/book3s_64_mmu_host.c |  8 ++---
 3 files changed, 48 insertions(+), 32 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index be8aac24ba83..d62e703f1214 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -60,6 +60,13 @@ struct kvmppc_sid_map {
 #define SID_MAP_NUM     (1 << SID_MAP_BITS)
 #define SID_MAP_MASK    (SID_MAP_NUM - 1)
 
+#ifdef CONFIG_PPC_BOOK3S_64
+#define SID_CONTEXTS	1
+#else
+#define SID_CONTEXTS	128
+#define VSID_POOL_SIZE	(SID_CONTEXTS * 16)
+#endif
+
 struct kvmppc_vcpu_book3s {
 	struct kvm_vcpu vcpu;
 	struct kvmppc_book3s_shadow_vcpu *shadow_vcpu;
@@ -78,10 +85,14 @@ struct kvmppc_vcpu_book3s {
 	u64 sdr1;
 	u64 hior;
 	u64 msr_mask;
-	u64 vsid_first;
 	u64 vsid_next;
+#ifdef CONFIG_PPC_BOOK3S_32
+	u32 vsid_pool[VSID_POOL_SIZE];
+#else
+	u64 vsid_first;
 	u64 vsid_max;
-	int context_id;
+#endif
+	int context_id[SID_CONTEXTS];
 	ulong prog_flags; /* flags to inject when giving a 700 trap */
 };
 
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 57dddeb23b9b..9fecbfbce773 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -275,18 +275,15 @@ static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
 	backwards_map = !backwards_map;
 
 	/* Uh-oh ... out of mappings. Let's flush! */
-	if (vcpu_book3s->vsid_next >= vcpu_book3s->vsid_max) {
-		vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+	if (vcpu_book3s->vsid_next >= VSID_POOL_SIZE) {
+		vcpu_book3s->vsid_next = 0;
 		memset(vcpu_book3s->sid_map, 0,
 		       sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
 		kvmppc_mmu_flush_segments(vcpu);
 	}
-	map->host_vsid = vcpu_book3s->vsid_next;
-
-	/* Would have to be 111 to be completely aligned with the rest of
-	   Linux, but that is just way too little space! */
-	vcpu_book3s->vsid_next+=1;
+	map->host_vsid = vcpu_book3s->vsid_pool[vcpu_book3s->vsid_next];
+	vcpu_book3s->vsid_next++;
 
 	map->guest_vsid = gvsid;
 	map->valid = true;
@@ -333,40 +330,38 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
+	int i;
+
 	kvmppc_mmu_hpte_destroy(vcpu);
 	preempt_disable();
-	__destroy_context(to_book3s(vcpu)->context_id);
+	for (i = 0; i < SID_CONTEXTS; i++)
+		__destroy_context(to_book3s(vcpu)->context_id[i]);
 	preempt_enable();
 }
 
 /* From mm/mmu_context_hash32.c */
-#define CTX_TO_VSID(ctx) (((ctx) * (897 * 16)) & 0xffffff)
+#define CTX_TO_VSID(c, id)	((((c) * (897 * 16)) + (id * 0x111)) & 0xffffff)
 
 int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
 	int err;
 	ulong sdr1;
+	int i;
+	int j;
 
-	err = __init_new_context();
-	if (err < 0)
-		return -1;
-	vcpu3s->context_id = err;
-
-	vcpu3s->vsid_max = CTX_TO_VSID(vcpu3s->context_id + 1) - 1;
-	vcpu3s->vsid_first = CTX_TO_VSID(vcpu3s->context_id);
-
-#if 0 /* XXX still doesn't guarantee uniqueness */
-	/* We could collide with the Linux vsid space because the vsid
-	 * wraps around at 24 bits. We're safe if we do our own space
-	 * though, so let's always set the highest bit. */
+	for (i = 0; i < SID_CONTEXTS; i++) {
+		err = __init_new_context();
+		if (err < 0)
+			goto init_fail;
+		vcpu3s->context_id[i] = err;
 
-	vcpu3s->vsid_max |= 0x00800000;
-	vcpu3s->vsid_first |= 0x00800000;
-#endif
-	BUG_ON(vcpu3s->vsid_max < vcpu3s->vsid_first);
+		/* Remember context id for this combination */
+		for (j = 0; j < 16; j++)
+			vcpu3s->vsid_pool[(i * 16) + j] = CTX_TO_VSID(err, j);
+	}
 
-	vcpu3s->vsid_next = vcpu3s->vsid_first;
+	vcpu3s->vsid_next = 0;
 
 	/* Remember where the HTAB is */
 	asm ( "mfsdr1 %0" : "=r"(sdr1) );
@@ -376,4 +371,14 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	kvmppc_mmu_hpte_init(vcpu);
 
 	return 0;
+
+init_fail:
+	for (j = 0; j < i; j++) {
+		if (!vcpu3s->context_id[j])
+			continue;
+
+		__destroy_context(to_book3s(vcpu)->context_id[j]);
+	}
+
+	return -1;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 4040c8d16ad5..fa2f08434ba5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -286,7 +286,7 @@ void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu)
 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_hpte_destroy(vcpu);
-	__destroy_context(to_book3s(vcpu)->context_id);
+	__destroy_context(to_book3s(vcpu)->context_id[0]);
 }
 
 int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
@@ -297,10 +297,10 @@ int kvmppc_mmu_init(struct kvm_vcpu *vcpu)
 	err = __init_new_context();
 	if (err < 0)
 		return -1;
-	vcpu3s->context_id = err;
+	vcpu3s->context_id[0] = err;
 
-	vcpu3s->vsid_max = ((vcpu3s->context_id + 1) << USER_ESID_BITS) - 1;
-	vcpu3s->vsid_first = vcpu3s->context_id << USER_ESID_BITS;
+	vcpu3s->vsid_max = ((vcpu3s->context_id[0] + 1) << USER_ESID_BITS) - 1;
+	vcpu3s->vsid_first = vcpu3s->context_id[0] << USER_ESID_BITS;
 	vcpu3s->vsid_next = vcpu3s->vsid_first;
 
 	kvmppc_mmu_hpte_init(vcpu);
-- 
cgit v1.2.3


From 296c19d0b4072dd9594daeec532563e56bddd119 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 15 Aug 2010 08:39:19 +0200
Subject: KVM: PPC: Don't put MSR_POW in MSR

On Book3S a mtmsr with the MSR_POW bit set indicates that the OS is in
idle and only needs to be waked up on the next interrupt.

Now, unfortunately we let that bit slip into the stored MSR value which
is not what the real CPU does, so that we ended up executing code like
this:

	r = mfmsr();
	/* r containts MSR_POW */
	mtmsr(r | MSR_EE);

This obviously breaks, as we're going into idle mode in code sections that
don't expect to be idling.

This patch masks MSR_POW out of the stored MSR value on wakeup, making
guests happy again.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 7adea6320654..5833df7e8ccc 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -134,10 +134,14 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
 	vcpu->arch.shared->msr = msr;
 	kvmppc_recalc_shadow_msr(vcpu);
 
-	if (msr & (MSR_WE|MSR_POW)) {
+	if (msr & MSR_POW) {
 		if (!vcpu->arch.pending_exceptions) {
 			kvm_vcpu_block(vcpu);
 			vcpu->stat.halt_wakeup++;
+
+			/* Unset POW bit after we woke up */
+			msr &= ~MSR_POW;
+			vcpu->arch.shared->msr = msr;
 		}
 	}
 
-- 
cgit v1.2.3


From 082decf29a9fe5bd5dcbfb26223e44edd9deabed Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollis_blanchard@mentor.com>
Date: Sat, 7 Aug 2010 10:33:56 -0700
Subject: KVM: PPC: initialize IVORs in addition to IVPR

Developers can now tell at a glace the exact type of the premature interrupt,
instead of just knowing that there was some premature interrupt.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c604277011a6..835f6d0e4f20 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -497,15 +497,19 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 /* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
+	int i;
+
 	vcpu->arch.pc = 0;
 	vcpu->arch.shared->msr = 0;
 	kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
 
 	vcpu->arch.shadow_pid = 1;
 
-	/* Eye-catching number so we know if the guest takes an interrupt
-	 * before it's programmed its own IVPR. */
+	/* Eye-catching numbers so we know if the guest takes an interrupt
+	 * before it's programmed its own IVPR/IVORs. */
 	vcpu->arch.ivpr = 0x55550000;
+	for (i = 0; i < BOOKE_IRQPRIO_MAX; i++)
+		vcpu->arch.ivor[i] = 0x7700 | i * 4;
 
 	kvmppc_init_timing_stats(vcpu);
 
-- 
cgit v1.2.3


From 0b3bafc8e5867039e265869749abbb7ea6dd2c8b Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollis_blanchard@mentor.com>
Date: Sat, 7 Aug 2010 10:33:57 -0700
Subject: KVM: PPC: fix compilation of "dump tlbs" debug function

Missing local variable.

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/44x_tlb.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 9f71b8d6eb0d..5f3cff83e089 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -47,6 +47,7 @@
 #ifdef DEBUG
 void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
 {
+	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
 	struct kvmppc_44x_tlbe *tlbe;
 	int i;
 
-- 
cgit v1.2.3


From ebc65874e9e8f3b8bbbc69aa49acd7489cd41c52 Mon Sep 17 00:00:00 2001
From: Hollis Blanchard <hollis_blanchard@mentor.com>
Date: Sat, 7 Aug 2010 10:33:58 -0700
Subject: KVM: PPC: allow ppc440gp to pass the compatibility check

Match only the first part of cur_cpu_spec->platform.

440GP (the first 440 processor) is identified by the string "ppc440gp", while
all later 440 processors use simply "ppc440".

Signed-off-by: Hollis Blanchard <hollis_blanchard@mentor.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/44x.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index e7b1f3fca5dc..74d0e7421143 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -43,7 +43,7 @@ int kvmppc_core_check_processor_compat(void)
 {
 	int r;
 
-	if (strcmp(cur_cpu_spec->platform, "ppc440") == 0)
+	if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0)
 		r = 0;
 	else
 		r = -ENOTSUPP;
@@ -72,6 +72,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 	/* Since the guest can directly access the timebase, it must know the
 	 * real timebase frequency. Accordingly, it must see the state of
 	 * CCR1[TCS]. */
+	/* XXX CCR1 doesn't exist on all 440 SoCs. */
 	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
 
 	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
-- 
cgit v1.2.3


From 591bd8e7b4c8b9246d7a1c81ffbd28e35dc5de4e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 17 Aug 2010 22:08:39 +0200
Subject: KVM: PPC: Enable napping only for Book3s_64

Before I incorrectly enabled napping also for BookE, which would result in
needless dcache flushes. Since we only need to force enable napping on
Book3s_64 because it doesn't go into MSR_POW otherwise, we can just #ifdef
that code to this particular platform.

Reported-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/kvm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 669d989be1d6..428d0e538aec 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -582,8 +582,10 @@ static int __init kvm_guest_init(void)
 	if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
 		kvm_use_magic_page();
 
+#ifdef CONFIG_PPC_BOOK3S_64
 	/* Enable napping */
 	powersave_nap = 1;
+#endif
 
 free_tmp:
 	kvm_free_tmp();
-- 
cgit v1.2.3


From 17bd158006a33615270f9dba15c62f49bd447435 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 30 Aug 2010 10:44:15 +0200
Subject: KVM: PPC: Implement Level interrupts on Book3S

The current interrupt logic is just completely broken. We get a notification
from user space, telling us that an interrupt is there. But then user space
expects us that we just acknowledge an interrupt once we deliver it to the
guest.

This is not how real hardware works though. On real hardware, the interrupt
controller pulls the external interrupt line until it gets notified that the
interrupt was received.

So in reality we have two events: pulling and letting go of the interrupt line.

To maintain backwards compatibility, I added a new request for the pulling
part. The letting go part was implemented earlier already.

With this in place, we can now finally start guests that do not randomly stall
and stop to work at random times.

This patch implements above logic for Book3S.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm.h     |  1 +
 arch/powerpc/include/asm/kvm_asm.h |  4 +++-
 arch/powerpc/kvm/book3s.c          | 30 +++++++++++++++++++++++++++---
 3 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 6c5547d82bbe..18ea6963ad77 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -86,5 +86,6 @@ struct kvm_guest_debug_arch {
 
 #define KVM_INTERRUPT_SET	-1U
 #define KVM_INTERRUPT_UNSET	-2U
+#define KVM_INTERRUPT_SET_LEVEL	-3U
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index c5ea4cda34b3..5b7504674397 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -58,6 +58,7 @@
 #define BOOK3S_INTERRUPT_INST_STORAGE	0x400
 #define BOOK3S_INTERRUPT_INST_SEGMENT	0x480
 #define BOOK3S_INTERRUPT_EXTERNAL	0x500
+#define BOOK3S_INTERRUPT_EXTERNAL_LEVEL	0x501
 #define BOOK3S_INTERRUPT_ALIGNMENT	0x600
 #define BOOK3S_INTERRUPT_PROGRAM	0x700
 #define BOOK3S_INTERRUPT_FP_UNAVAIL	0x800
@@ -84,7 +85,8 @@
 #define BOOK3S_IRQPRIO_EXTERNAL			13
 #define BOOK3S_IRQPRIO_DECREMENTER		14
 #define BOOK3S_IRQPRIO_PERFORMANCE_MONITOR	15
-#define BOOK3S_IRQPRIO_MAX			16
+#define BOOK3S_IRQPRIO_EXTERNAL_LEVEL		16
+#define BOOK3S_IRQPRIO_MAX			17
 
 #define BOOK3S_HFLAG_DCBZ32			0x1
 #define BOOK3S_HFLAG_SLB			0x2
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 5833df7e8ccc..e316847c08c0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -186,6 +186,7 @@ static int kvmppc_book3s_vec2irqprio(unsigned int vec)
 	case 0x400: prio = BOOK3S_IRQPRIO_INST_STORAGE;		break;
 	case 0x480: prio = BOOK3S_IRQPRIO_INST_SEGMENT;		break;
 	case 0x500: prio = BOOK3S_IRQPRIO_EXTERNAL;		break;
+	case 0x501: prio = BOOK3S_IRQPRIO_EXTERNAL_LEVEL;	break;
 	case 0x600: prio = BOOK3S_IRQPRIO_ALIGNMENT;		break;
 	case 0x700: prio = BOOK3S_IRQPRIO_PROGRAM;		break;
 	case 0x800: prio = BOOK3S_IRQPRIO_FP_UNAVAIL;		break;
@@ -246,13 +247,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+	unsigned int vec = BOOK3S_INTERRUPT_EXTERNAL;
+
+	if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+		vec = BOOK3S_INTERRUPT_EXTERNAL_LEVEL;
+
+	kvmppc_book3s_queue_irqprio(vcpu, vec);
 }
 
 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
                                   struct kvm_interrupt *irq)
 {
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL);
+	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
@@ -281,6 +288,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 		vec = BOOK3S_INTERRUPT_DECREMENTER;
 		break;
 	case BOOK3S_IRQPRIO_EXTERNAL:
+	case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
 		deliver = (vcpu->arch.shared->msr & MSR_EE) && !crit;
 		vec = BOOK3S_INTERRUPT_EXTERNAL;
 		break;
@@ -343,6 +351,23 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 	return deliver;
 }
 
+/*
+ * This function determines if an irqprio should be cleared once issued.
+ */
+static bool clear_irqprio(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+	switch (priority) {
+		case BOOK3S_IRQPRIO_DECREMENTER:
+			/* DEC interrupts get cleared by mtdec */
+			return false;
+		case BOOK3S_IRQPRIO_EXTERNAL_LEVEL:
+			/* External interrupts get cleared by userspace */
+			return false;
+	}
+
+	return true;
+}
+
 void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 {
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
@@ -356,8 +381,7 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
 	priority = __ffs(*pending);
 	while (priority < BOOK3S_IRQPRIO_MAX) {
 		if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
-		    (priority != BOOK3S_IRQPRIO_DECREMENTER)) {
-			/* DEC interrupts get cleared by mtdec */
+		    clear_irqprio(vcpu, priority)) {
 			clear_bit(priority, &vcpu->arch.pending_exceptions);
 			break;
 		}
-- 
cgit v1.2.3


From 7b4203e8cb5c5d9bc49da62b7a6fa4ba876a1b3f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 30 Aug 2010 13:50:45 +0200
Subject: KVM: PPC: Expose level based interrupt cap

Now that we have all the level interrupt magic in place, let's
expose the capability to user space, so it can make use of it!

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/powerpc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 028891c0baff..2f87a1627f6c 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -192,6 +192,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_PAIRED_SINGLES:
 	case KVM_CAP_PPC_UNSET_IRQ:
+	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_PPC_OSI:
 	case KVM_CAP_PPC_GET_PVINFO:
-- 
cgit v1.2.3


From c5335f17651de5075313524ccc3881527268966f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 30 Aug 2010 14:03:24 +0200
Subject: KVM: PPC: Implement level interrupts for BookE

BookE also wants to support level based interrupts, so let's implement
all the necessary logic there. We need to trick a bit here because the
irqprios are 1:1 assigned to architecture defined values. But since there
is some space left there, we can just pick a random one and move it later
on - it's internal anyways.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 17 +++++++++++++++--
 arch/powerpc/kvm/booke.h |  4 +++-
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 835f6d0e4f20..77575d08c818 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -131,13 +131,19 @@ void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
 void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                 struct kvm_interrupt *irq)
 {
-	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL);
+	unsigned int prio = BOOKE_IRQPRIO_EXTERNAL;
+
+	if (irq->irq == KVM_INTERRUPT_SET_LEVEL)
+		prio = BOOKE_IRQPRIO_EXTERNAL_LEVEL;
+
+	kvmppc_booke_queue_irqprio(vcpu, prio);
 }
 
 void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
                                   struct kvm_interrupt *irq)
 {
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions);
+	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
 /* Deliver the interrupt of the corresponding priority, if possible. */
@@ -150,6 +156,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	ulong crit_raw = vcpu->arch.shared->critical;
 	ulong crit_r1 = kvmppc_get_gpr(vcpu, 1);
 	bool crit;
+	bool keep_irq = false;
 
 	/* Truncate crit indicators in 32 bit mode */
 	if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -162,6 +169,11 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	/* ... and we're in supervisor mode */
 	crit = crit && !(vcpu->arch.shared->msr & MSR_PR);
 
+	if (priority == BOOKE_IRQPRIO_EXTERNAL_LEVEL) {
+		priority = BOOKE_IRQPRIO_EXTERNAL;
+		keep_irq = true;
+	}
+
 	switch (priority) {
 	case BOOKE_IRQPRIO_DTLB_MISS:
 	case BOOKE_IRQPRIO_DATA_STORAGE:
@@ -214,7 +226,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 			vcpu->arch.shared->dar = vcpu->arch.queued_dear;
 		kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
 
-		clear_bit(priority, &vcpu->arch.pending_exceptions);
+		if (!keep_irq)
+			clear_bit(priority, &vcpu->arch.pending_exceptions);
 	}
 
 	return allowed;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index 88258acc98fa..492bb7030358 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -46,7 +46,9 @@
 #define BOOKE_IRQPRIO_FIT 17
 #define BOOKE_IRQPRIO_DECREMENTER 18
 #define BOOKE_IRQPRIO_PERFORMANCE_MONITOR 19
-#define BOOKE_IRQPRIO_MAX 19
+/* Internal pseudo-irqprio for level triggered externals */
+#define BOOKE_IRQPRIO_EXTERNAL_LEVEL 20
+#define BOOKE_IRQPRIO_MAX 20
 
 extern unsigned long kvmppc_booke_handlers;
 
-- 
cgit v1.2.3


From 21e537ba149be99c4d31a04949ca6e0770379662 Mon Sep 17 00:00:00 2001
From: Kyle Moffett <Kyle.D.Moffett@boeing.com>
Date: Mon, 30 Aug 2010 11:38:39 -0400
Subject: KVM: PPC: e500_tlb: Fix a minor copy-paste tracing bug

The kvmppc_e500_stlbe_invalidate() function was trying to pass too many
parameters to trace_kvm_stlb_inval().  This appears to be a bad
copy-paste from a call to trace_kvm_stlb_write().

Signed-off-by: Kyle Moffett <Kyle.D.Moffett@boeing.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 092a390876f3..a413883cf948 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -226,8 +226,7 @@ static void kvmppc_e500_stlbe_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	kvmppc_e500_shadow_release(vcpu_e500, tlbsel, esel);
 	stlbe->mas1 = 0;
-	trace_kvm_stlb_inval(index_of(tlbsel, esel), stlbe->mas1, stlbe->mas2,
-			     stlbe->mas3, stlbe->mas7);
+	trace_kvm_stlb_inval(index_of(tlbsel, esel));
 }
 
 static void kvmppc_e500_tlb1_invalidate(struct kvmppc_vcpu_e500 *vcpu_e500,
-- 
cgit v1.2.3


From 344941beb9926418663e171a347d1a31d727fe45 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 31 Aug 2010 03:45:39 +0200
Subject: KVM: PPC: Fix compile error in e500_tlb.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The e500_tlb.c file didn't compile for me due to the following error:

arch/powerpc/kvm/e500_tlb.c: In function ‘kvmppc_e500_shadow_map’:
arch/powerpc/kvm/e500_tlb.c:300: error: format ‘%lx’ expects type ‘long unsigned int’, but argument 2 has type ‘gfn_t’

So let's explicitly cast the argument to make printk happy.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index a413883cf948..d6d6d47a75a9 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -297,7 +297,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	/* Get reference to new page. */
 	new_page = gfn_to_page(vcpu_e500->vcpu.kvm, gfn);
 	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
+		printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n",
+				(long)gfn);
 		kvm_release_page_clean(new_page);
 		return;
 	}
-- 
cgit v1.2.3


From 26e673c3003bc8f24bdbbdcb8bc91a78556f579a Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 3 Sep 2010 10:22:19 +0200
Subject: KVM: PPC: Move of include to __KERNEL__ section

We have to protect the include for linux/of.h by __KERNEL__ so it doesn't
accidently get referenced outside.

This patch fixes this and makes the tree compile again.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_para.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index d79fd0910964..50533f9adf40 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -21,7 +21,6 @@
 #define __POWERPC_KVM_PARA_H__
 
 #include <linux/types.h>
-#include <linux/of.h>
 
 struct kvm_vcpu_arch_shared {
 	__u64 scratch1;
@@ -54,6 +53,8 @@ struct kvm_vcpu_arch_shared {
 
 #ifdef CONFIG_KVM_GUEST
 
+#include <linux/of.h>
+
 static inline int kvm_para_available(void)
 {
 	struct device_node *hyper_node;
-- 
cgit v1.2.3