From 04a13c7c632e1fe04a5f6e6c83565d2559e37598 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 1 Sep 2009 21:12:28 +0900
Subject: percpu: don't assume existence of cpu0

percpu incorrectly assumed that cpu0 was always there which led to the
following warning and eventual oops on sparc machines w/o cpu0.

  WARNING: at mm/percpu.c:651 pcpu_map+0xdc/0x100()
  Modules linked in:
  Call Trace:
    [000000000045eb70] warn_slowpath_common+0x50/0xa0
    [000000000045ebdc] warn_slowpath_null+0x1c/0x40
    [00000000004d493c] pcpu_map+0xdc/0x100
    [00000000004d59a4] pcpu_alloc+0x3e4/0x4e0
    [00000000004d5af8] __alloc_percpu+0x18/0x40
    [00000000005b112c] __percpu_counter_init+0x4c/0xc0
  ...
  Unable to handle kernel NULL pointer dereference
  ...
   I7: <sysfs_new_dirent+0x30/0x120>
   Disabling lock debugging due to kernel taint
   Caller[000000000053c1b0]: sysfs_new_dirent+0x30/0x120
   Caller[000000000053c7a4]: create_dir+0x24/0xc0
   Caller[000000000053c870]: sysfs_create_dir+0x30/0x80
   Caller[00000000005990e8]: kobject_add_internal+0xc8/0x200
  ...
   Kernel panic - not syncing: Attempted to kill the idle task!

This patch fixes the problem by backporting parts from devel branch to
make percpu core not depend on the existence of cpu0.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Meelis Roos <mroos@linux.ee>
Cc: David Miller <davem@davemloft.net>
---
 mm/percpu.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'mm')

diff --git a/mm/percpu.c b/mm/percpu.c
index 5fe37842e0ea..3311c8919f37 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -197,7 +197,12 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
 static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
 				     int page_idx)
 {
-	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
+	/*
+	 * Any possible cpu id can be used here, so there's no need to
+	 * worry about preemption or cpu hotplug.
+	 */
+	return *pcpu_chunk_pagep(chunk, raw_smp_processor_id(),
+				 page_idx) != NULL;
 }
 
 /* set the pointer to a chunk in a page struct */
@@ -297,6 +302,14 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 		return pcpu_first_chunk;
 	}
 
+	/*
+	 * The address is relative to unit0 which might be unused and
+	 * thus unmapped.  Offset the address to the unit space of the
+	 * current processor before looking it up in the vmalloc
+	 * space.  Note that any possible cpu id can be used here, so
+	 * there's no need to worry about preemption or cpu hotplug.
+	 */
+	addr += raw_smp_processor_id() * pcpu_unit_size;
 	return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
-- 
cgit v1.2.3


From d76b1590e06a63a3d8697168cd0aabf1c4b3cb3a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 3 Sep 2009 22:38:59 +0300
Subject: slub: Fix kmem_cache_destroy() with SLAB_DESTROY_BY_RCU

kmem_cache_destroy() should call rcu_barrier() *after* kmem_cache_close() and
*before* sysfs_slab_remove() or risk rcu_free_slab() being called after
kmem_cache is deleted (kfreed).

rmmod nf_conntrack can crash the machine because it has to kmem_cache_destroy()
a SLAB_DESTROY_BY_RCU enabled cache.

Cc: <stable@kernel.org>
Reported-by: Zdenek Kabelac <zdenek.kabelac@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---
 mm/slub.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'mm')

diff --git a/mm/slub.c b/mm/slub.c
index b9f1491a58a1..b6276753626e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2594,8 +2594,6 @@ static inline int kmem_cache_close(struct kmem_cache *s)
  */
 void kmem_cache_destroy(struct kmem_cache *s)
 {
-	if (s->flags & SLAB_DESTROY_BY_RCU)
-		rcu_barrier();
 	down_write(&slub_lock);
 	s->refcount--;
 	if (!s->refcount) {
@@ -2606,6 +2604,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
 				"still has objects.\n", s->name, __func__);
 			dump_stack();
 		}
+		if (s->flags & SLAB_DESTROY_BY_RCU)
+			rcu_barrier();
 		sysfs_slab_remove(s);
 	} else
 		up_write(&slub_lock);
-- 
cgit v1.2.3


From a190887b58c32d19c2eee007c5eb8faa970a69ba Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 5 Sep 2009 11:17:07 -0700
Subject: nommu: fix error handling in do_mmap_pgoff()

Fix the error handling in do_mmap_pgoff().  If do_mmap_shared_file() or
do_mmap_private() fail, we jump to the error_put_region label at which
point we cann __put_nommu_region() on the region - but we haven't yet
added the region to the tree, and so __put_nommu_region() may BUG
because the region tree is empty or it may corrupt the region tree.

To get around this, we can afford to add the region to the region tree
before calling do_mmap_shared_file() or do_mmap_private() as we keep
nommu_region_sem write-locked, so no-one can race with us by seeing a
transient region.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/nommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'mm')

diff --git a/mm/nommu.c b/mm/nommu.c
index 4bde489ec431..66e81e7e9fe9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1352,6 +1352,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	}
 
 	vma->vm_region = region;
+	add_nommu_region(region);
 
 	/* set up the mapping */
 	if (file && vma->vm_flags & VM_SHARED)
@@ -1361,8 +1362,6 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (ret < 0)
 		goto error_put_region;
 
-	add_nommu_region(region);
-
 	/* okay... we have a mapping; now we have to register it */
 	result = vma->vm_start;
 
-- 
cgit v1.2.3


From dd5d241ea955006122d76af88af87de73fec25b4 Mon Sep 17 00:00:00 2001
From: Mel Gorman <mel@csn.ul.ie>
Date: Sat, 5 Sep 2009 11:17:11 -0700
Subject: page-allocator: always change pageblock ownership when
 anti-fragmentation is disabled

On low-memory systems, anti-fragmentation gets disabled as fragmentation
cannot be avoided on a sufficiently large boundary to be worthwhile.  Once
disabled, there is a period of time when all the pageblocks are marked
MOVABLE and the expectation is that they get marked UNMOVABLE at each call
to __rmqueue_fallback().

However, when MAX_ORDER is large the pageblocks do not change ownership
because the normal criteria are not met.  This has the effect of
prematurely breaking up too many large contiguous blocks.  This is most
serious on NOMMU systems which depend on high-order allocations to boot.
This patch causes pageblocks to change ownership on every fallback when
anti-fragmentation is disabled.  This prevents the large blocks being
prematurely broken up.

This is a fix to commit 49255c619fbd482d704289b5eb2795f8e3b7ff2e [page
allocator: move check for disabled anti-fragmentation out of fastpath] and
the problem affects 2.6.31-rc8.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Tested-by: Paul Mundt <lethal@linux-sh.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/page_alloc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'mm')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5cc986eb9f6f..a0de15f46987 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -817,13 +817,15 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 			 * agressive about taking ownership of free pages
 			 */
 			if (unlikely(current_order >= (pageblock_order >> 1)) ||
-					start_migratetype == MIGRATE_RECLAIMABLE) {
+					start_migratetype == MIGRATE_RECLAIMABLE ||
+					page_group_by_mobility_disabled) {
 				unsigned long pages;
 				pages = move_freepages_block(zone, page,
 								start_migratetype);
 
 				/* Claim the whole block if over half of it is free */
-				if (pages >= (1 << (pageblock_order-1)))
+				if (pages >= (1 << (pageblock_order-1)) ||
+						page_group_by_mobility_disabled)
 					set_pageblock_migratetype(page,
 								start_migratetype);
 
-- 
cgit v1.2.3