From 47f3a867f6310d6abfa185ab12baaba7ed1d69af Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Fri, 6 Jan 2006 00:10:32 -0800
Subject: [PATCH] mm: fix __alloc_pages cpuset ALLOC_* flags

Two changes to the setting of the ALLOC_CPUSET flag in
mm/page_alloc.c:__alloc_pages()

- A bug fix - the "ignoring mins" case should not be honoring ALLOC_CPUSET.
  This case of all cases, since it is handling a request that will free up
  more memory than is asked for (exiting tasks, e.g.) should be allowed to
  escape cpuset constraints when memory is tight.

- A logic change to make it simpler.  Honor cpusets even on GFP_ATOMIC
  (!wait) requests.  With this, cpuset confinement applies to all requests
  except ALLOC_NO_WATERMARKS, so that in a subsequent cleanup patch, I can
  remove the ALLOC_CPUSET flag entirely.  Since I don't know any real reason
  this logic has to be either way, I am choosing the path of the simplest
  code.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fe14a8c87fc2..1e49dc7cd619 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -903,8 +903,7 @@ restart:
 		alloc_flags |= ALLOC_HARDER;
 	if (gfp_mask & __GFP_HIGH)
 		alloc_flags |= ALLOC_HIGH;
-	if (wait)
-		alloc_flags |= ALLOC_CPUSET;
+	alloc_flags |= ALLOC_CPUSET;
 
 	/*
 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
@@ -926,7 +925,7 @@ restart:
 nofail_alloc:
 			/* go through the zonelist yet again, ignoring mins */
 			page = get_page_from_freelist(gfp_mask, order,
-				zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
+				zonelist, ALLOC_NO_WATERMARKS);
 			if (page)
 				goto got_pg;
 			if (gfp_mask & __GFP_NOFAIL) {
-- 
cgit v1.2.3


From a94b3ab7eab4edcc9b2cb474b188f774c331adf7 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <kravetz@us.ibm.com>
Date: Fri, 6 Jan 2006 00:10:51 -0800
Subject: [PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES

The NODES_SPAN_OTHER_NODES config option was created so that DISCONTIGMEM
could handle pSeries numa layouts.  However, support for DISCONTIGMEM has
been replaced by SPARSEMEM on powerpc.  As a result, this config option and
supporting code is no longer needed.

I have already sent a patch to Paul that removes the option from powerpc
specific code.  This removes the arch independent piece.  Doesn't really
matter which is applied first.

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1e49dc7cd619..07825c637a58 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1708,8 +1708,6 @@ void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 	for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) {
 		if (!early_pfn_valid(pfn))
 			continue;
-		if (!early_pfn_in_nid(pfn, nid))
-			continue;
 		page = pfn_to_page(pfn);
 		set_page_links(page, zone, nid, pfn);
 		set_page_count(page, 1);
-- 
cgit v1.2.3


From c54ad30c784b84d0275152d0ca80985b21471811 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:10:56 -0800
Subject: [PATCH] mm: pagealloc opt

Slightly optimise some page allocation and freeing functions by taking
advantage of knowing whether or not interrupts are disabled.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07825c637a58..680cbe5b6ba2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -375,11 +375,10 @@ static int
 free_pages_bulk(struct zone *zone, int count,
 		struct list_head *list, unsigned int order)
 {
-	unsigned long flags;
 	struct page *page = NULL;
 	int ret = 0;
 
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
 	while (!list_empty(list) && count--) {
@@ -389,12 +388,13 @@ free_pages_bulk(struct zone *zone, int count,
 		__free_pages_bulk(page, zone, order);
 		ret++;
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return ret;
 }
 
 void __free_pages_ok(struct page *page, unsigned int order)
 {
+	unsigned long flags;
 	LIST_HEAD(list);
 	int i;
 	int reserved = 0;
@@ -415,7 +415,9 @@ void __free_pages_ok(struct page *page, unsigned int order)
 	list_add(&page->lru, &list);
 	mod_page_state(pgfree, 1 << order);
 	kernel_map_pages(page, 1<<order, 0);
+	local_irq_save(flags);
 	free_pages_bulk(page_zone(page), 1, &list, order);
+	local_irq_restore(flags);
 }
 
 
@@ -539,12 +541,11 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
 static int rmqueue_bulk(struct zone *zone, unsigned int order, 
 			unsigned long count, struct list_head *list)
 {
-	unsigned long flags;
 	int i;
 	int allocated = 0;
 	struct page *page;
 	
-	spin_lock_irqsave(&zone->lock, flags);
+	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
 		page = __rmqueue(zone, order);
 		if (page == NULL)
@@ -552,7 +553,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 		allocated++;
 		list_add_tail(&page->lru, list);
 	}
-	spin_unlock_irqrestore(&zone->lock, flags);
+	spin_unlock(&zone->lock);
 	return allocated;
 }
 
@@ -589,6 +590,7 @@ void drain_remote_pages(void)
 #if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
 static void __drain_pages(unsigned int cpu)
 {
+	unsigned long flags;
 	struct zone *zone;
 	int i;
 
@@ -600,8 +602,10 @@ static void __drain_pages(unsigned int cpu)
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
+			local_irq_save(flags);
 			pcp->count -= free_pages_bulk(zone, pcp->count,
 						&pcp->list, 0);
+			local_irq_restore(flags);
 		}
 	}
 }
@@ -744,7 +748,7 @@ again:
 		if (pcp->count <= pcp->low)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (pcp->count) {
+		if (likely(pcp->count)) {
 			page = list_entry(pcp->list.next, struct page, lru);
 			list_del(&page->lru);
 			pcp->count--;
-- 
cgit v1.2.3


From 77a8a78834561398fb4cb1480afa7b0e80b1dd53 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:10:57 -0800
Subject: [PATCH] mm: set_page_refs opt

Inline set_page_refs.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 680cbe5b6ba2..6d513faf050b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -453,23 +453,6 @@ expand(struct zone *zone, struct page *page,
 	return page;
 }
 
-void set_page_refs(struct page *page, int order)
-{
-#ifdef CONFIG_MMU
-	set_page_count(page, 1);
-#else
-	int i;
-
-	/*
-	 * We need to reference all the pages for this order, otherwise if
-	 * anyone accesses one of the pages with (get/put) it will be freed.
-	 * - eg: access_process_vm()
-	 */
-	for (i = 0; i < (1 << order); i++)
-		set_page_count(page + i, 1);
-#endif /* CONFIG_MMU */
-}
-
 /*
  * This page is about to be returned from the page allocator
  */
-- 
cgit v1.2.3


From 92be2e33b155ee76399f51f41fb061f850d02f08 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:10:57 -0800
Subject: [PATCH] mm: microopt conditions

Micro optimise some conditionals where we don't need lazy evaluation.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6d513faf050b..b0647b515277 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -336,9 +336,9 @@ static inline void __free_pages_bulk (struct page *page,
 
 static inline int free_pages_check(const char *function, struct page *page)
 {
-	if (	page_mapcount(page) ||
-		page->mapping != NULL ||
-		page_count(page) != 0 ||
+	if (unlikely(page_mapcount(page) |
+		(page->mapping != NULL)  |
+		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
 			1 << PG_private |
@@ -348,7 +348,7 @@ static inline int free_pages_check(const char *function, struct page *page)
 			1 << PG_slab	|
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved )))
+			1 << PG_reserved ))))
 		bad_page(function, page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
@@ -458,9 +458,9 @@ expand(struct zone *zone, struct page *page,
  */
 static int prep_new_page(struct page *page, int order)
 {
-	if (	page_mapcount(page) ||
-		page->mapping != NULL ||
-		page_count(page) != 0 ||
+	if (unlikely(page_mapcount(page) |
+		(page->mapping != NULL)  |
+		(page_count(page) != 0)  |
 		(page->flags & (
 			1 << PG_lru	|
 			1 << PG_private	|
@@ -471,7 +471,7 @@ static int prep_new_page(struct page *page, int order)
 			1 << PG_slab    |
 			1 << PG_swapcache |
 			1 << PG_writeback |
-			1 << PG_reserved )))
+			1 << PG_reserved ))))
 		bad_page(__FUNCTION__, page);
 
 	/*
-- 
cgit v1.2.3


From 13e7444b0ec59f96d81a4e8c379d5f38fc5f2cc1 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:10:58 -0800
Subject: [PATCH] mm: remove bad_range

bad_range is supposed to be a temporary check.  It would be a pity to throw it
out.  Make it depend on CONFIG_DEBUG_VM instead.

CONFIG_HOLES_IN_ZONE systems were relying on this to check pfn_valid in the
page allocator.  Add that to page_is_buddy instead.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b0647b515277..088712f2ac02 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -81,6 +81,7 @@ int min_free_kbytes = 1024;
 unsigned long __initdata nr_kernel_pages;
 unsigned long __initdata nr_all_pages;
 
+#ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
 	int ret = 0;
@@ -122,6 +123,13 @@ static int bad_range(struct zone *zone, struct page *page)
 	return 0;
 }
 
+#else
+static inline int bad_range(struct zone *zone, struct page *page)
+{
+	return 0;
+}
+#endif
+
 static void bad_page(const char *function, struct page *page)
 {
 	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
@@ -255,14 +263,20 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
 /*
  * This function checks whether a page is free && is the buddy
  * we can do coalesce a page and its buddy if
- * (a) the buddy is free &&
- * (b) the buddy is on the buddy system &&
- * (c) a page and its buddy have the same order.
+ * (a) the buddy is not in a hole &&
+ * (b) the buddy is free &&
+ * (c) the buddy is on the buddy system &&
+ * (d) a page and its buddy have the same order.
  * for recording page's order, we use page_private(page) and PG_private.
  *
  */
 static inline int page_is_buddy(struct page *page, int order)
 {
+#ifdef CONFIG_HOLES_IN_ZONE
+	if (!pfn_valid(page_to_pfn(page)))
+		return 0;
+#endif
+
        if (PagePrivate(page)           &&
            (page_order(page) == order) &&
             page_count(page) == 0)
@@ -314,17 +328,15 @@ static inline void __free_pages_bulk (struct page *page,
 		struct free_area *area;
 		struct page *buddy;
 
-		combined_idx = __find_combined_index(page_idx, order);
 		buddy = __page_find_buddy(page, page_idx, order);
-
-		if (bad_range(zone, buddy))
-			break;
 		if (!page_is_buddy(buddy, order))
 			break;		/* Move the buddy up one level. */
+
 		list_del(&buddy->lru);
 		area = zone->free_area + order;
 		area->nr_free--;
 		rmv_page_order(buddy);
+		combined_idx = __find_combined_index(page_idx, order);
 		page = page + (combined_idx - page_idx);
 		page_idx = combined_idx;
 		order++;
-- 
cgit v1.2.3


From 2d92c5c9150a2a9ca3dc25da58d5042e17a96b6a Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:10:59 -0800
Subject: [PATCH] mm: remove pcp low

struct per_cpu_pages.low is useless.  Remove it.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 088712f2ac02..7cff958e7813 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -740,7 +740,7 @@ again:
 		page = NULL;
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
+		if (!pcp->count)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
 		if (likely(pcp->count)) {
@@ -1345,10 +1345,9 @@ void show_free_areas(void)
 			pageset = zone_pcp(zone, cpu);
 
 			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
+				printk("cpu %d %s: high %d, batch %d used:%d\n",
 					cpu,
 					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].low,
 					pageset->pcp[temperature].high,
 					pageset->pcp[temperature].batch,
 					pageset->pcp[temperature].count);
@@ -1790,14 +1789,12 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 
 	pcp = &p->pcp[0];		/* hot */
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&pcp->list);
 
 	pcp = &p->pcp[1];		/* cold*/
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 2 * batch;
 	pcp->batch = max(1UL, batch/2);
 	INIT_LIST_HEAD(&pcp->list);
@@ -2193,12 +2190,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 				seq_printf(m,
 					   "\n    cpu: %i pcp: %i"
 					   "\n              count: %i"
-					   "\n              low:   %i"
 					   "\n              high:  %i"
 					   "\n              batch: %i",
 					   i, j,
 					   pageset->pcp[j].count,
-					   pageset->pcp[j].low,
 					   pageset->pcp[j].high,
 					   pageset->pcp[j].batch);
 			}
-- 
cgit v1.2.3


From a86b1f53166a260ced8f3c8c526945bf496f2e78 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:00 -0800
Subject: [PATCH] mm: page_state fixes

read_page_state and __get_page_state only traverse online CPUs, which will
cause results to fluctuate when CPUs are plugged in or out.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7cff958e7813..379618747deb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1169,12 +1169,11 @@ EXPORT_SYMBOL(nr_pagecache);
 DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
 #endif
 
-void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
+static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
 {
 	int cpu = 0;
 
 	memset(ret, 0, sizeof(*ret));
-	cpus_and(*cpumask, *cpumask, cpu_online_map);
 
 	cpu = first_cpu(*cpumask);
 	while (cpu < NR_CPUS) {
@@ -1227,7 +1226,7 @@ unsigned long __read_page_state(unsigned long offset)
 	unsigned long ret = 0;
 	int cpu;
 
-	for_each_online_cpu(cpu) {
+	for_each_cpu(cpu) {
 		unsigned long in;
 
 		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
-- 
cgit v1.2.3


From 085cc7d5de3cc662da7ea78296464a0d52f3f01f Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:01 -0800
Subject: [PATCH] mm: page_alloc cleanups

Small cleanups that does not change generated code with the gcc's I've tested
with.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 379618747deb..925b0b985f79 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -447,8 +447,7 @@ void __free_pages_ok(struct page *page, unsigned int order)
  *
  * -- wli
  */
-static inline struct page *
-expand(struct zone *zone, struct page *page,
+static inline void expand(struct zone *zone, struct page *page,
  	int low, int high, struct free_area *area)
 {
 	unsigned long size = 1 << high;
@@ -462,7 +461,6 @@ expand(struct zone *zone, struct page *page,
 		area->nr_free++;
 		set_page_order(&page[size], high);
 	}
-	return page;
 }
 
 /*
@@ -522,7 +520,8 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order)
 		rmv_page_order(page);
 		area->nr_free--;
 		zone->free_pages -= 1UL << order;
-		return expand(zone, page, order, current_order, area);
+		expand(zone, page, order, current_order, area);
+		return page;
 	}
 
 	return NULL;
@@ -537,19 +536,16 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list)
 {
 	int i;
-	int allocated = 0;
-	struct page *page;
 	
 	spin_lock(&zone->lock);
 	for (i = 0; i < count; ++i) {
-		page = __rmqueue(zone, order);
-		if (page == NULL)
+		struct page *page = __rmqueue(zone, order);
+		if (unlikely(page == NULL))
 			break;
-		allocated++;
 		list_add_tail(&page->lru, list);
 	}
 	spin_unlock(&zone->lock);
-	return allocated;
+	return i;
 }
 
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From a226f6c899799fe2c4919daa0767ac579c88f7bd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Jan 2006 00:11:08 -0800
Subject: [PATCH] FRV: Clean up bootmem allocator's page freeing algorithm

The attached patch cleans up the way the bootmem allocator frees pages.

A new function, __free_pages_bootmem(), is provided in mm/page_alloc.c that is
called from mm/bootmem.c to turn pages over to the main allocator.  All the
bits of code to initialise pages (clearing PG_reserved and setting the page
count) are moved to here.  The checks on page validity are removed, on the
assumption that the struct page arrays will have been prepared correctly.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 36 +++++++++++++++++++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 925b0b985f79..cdad3249cf7f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,8 @@ unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
 
+static void fastcall free_hot_cold_page(struct page *page, int cold);
+
 /*
  * results with 256, 32 in the lowmem_reserve sysctl:
  *	1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
@@ -432,6 +434,39 @@ void __free_pages_ok(struct page *page, unsigned int order)
 	local_irq_restore(flags);
 }
 
+/*
+ * permit the bootmem allocator to evade page validation on high-order frees
+ */
+void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order)
+{
+	if (order == 0) {
+		__ClearPageReserved(page);
+		set_page_count(page, 0);
+
+		free_hot_cold_page(page, 0);
+	} else {
+		LIST_HEAD(list);
+		int loop;
+
+		for (loop = 0; loop < BITS_PER_LONG; loop++) {
+			struct page *p = &page[loop];
+
+			if (loop + 16 < BITS_PER_LONG)
+				prefetchw(p + 16);
+			__ClearPageReserved(p);
+			set_page_count(p, 0);
+		}
+
+		arch_free_page(page, order);
+
+		mod_page_state(pgfree, 1 << order);
+
+		list_add(&page->lru, &list);
+		kernel_map_pages(page, 1 << order, 0);
+		free_pages_bulk(page_zone(page), 1, &list, order);
+	}
+}
+
 
 /*
  * The order of subdivision here is critical for the IO subsystem.
@@ -671,7 +706,6 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z)
 /*
  * Free a 0-order page
  */
-static void FASTCALL(free_hot_cold_page(struct page *page, int cold));
 static void fastcall free_hot_cold_page(struct page *page, int cold)
 {
 	struct zone *zone = page_zone(page);
-- 
cgit v1.2.3


From 9328b8faae922e52073785ed6c1eaa8565648a0e Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:10 -0800
Subject: [PATCH] mm: dma32 zone statistics

Add dma32 to zone statistics.  Also attempt to arrange struct page_state a
bit better (visually).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cdad3249cf7f..e12154d9c4ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2277,32 +2277,40 @@ static char *vmstat_text[] = {
 	"pgpgout",
 	"pswpin",
 	"pswpout",
-	"pgalloc_high",
 
+	"pgalloc_high",
 	"pgalloc_normal",
+	"pgalloc_dma32",
 	"pgalloc_dma",
+
 	"pgfree",
 	"pgactivate",
 	"pgdeactivate",
 
 	"pgfault",
 	"pgmajfault",
+
 	"pgrefill_high",
 	"pgrefill_normal",
+	"pgrefill_dma32",
 	"pgrefill_dma",
 
 	"pgsteal_high",
 	"pgsteal_normal",
+	"pgsteal_dma32",
 	"pgsteal_dma",
+
 	"pgscan_kswapd_high",
 	"pgscan_kswapd_normal",
-
+	"pgscan_kswapd_dma32",
 	"pgscan_kswapd_dma",
+
 	"pgscan_direct_high",
 	"pgscan_direct_normal",
+	"pgscan_direct_dma32",
 	"pgscan_direct_dma",
-	"pginodesteal",
 
+	"pginodesteal",
 	"slabs_scanned",
 	"kswapd_steal",
 	"kswapd_inodesteal",
-- 
cgit v1.2.3


From 224abf92b2f439a9030f21d2926ec8047d1ffcdb Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:11 -0800
Subject: [PATCH] mm: bad_page optimisation

Cut down size slightly by not passing bad_page the function name (it should be
able to be determined by dump_stack()).  And cut down the number of printks in
bad_page.

Also, cut down some branching in the destroy_compound_page path.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e12154d9c4ed..b9fd2c238f13 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -132,16 +132,16 @@ static inline int bad_range(struct zone *zone, struct page *page)
 }
 #endif
 
-static void bad_page(const char *function, struct page *page)
-{
-	printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
-		function, current->comm, page);
-	printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
-		(int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
-		page->mapping, page_mapcount(page), page_count(page));
-	printk(KERN_EMERG "Backtrace:\n");
+static void bad_page(struct page *page)
+{
+	printk(KERN_EMERG "Bad page state in process '%s'\n"
+		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
+		"Trying to fix it up, but a reboot is needed\n"
+		"Backtrace:\n",
+		current->comm, page, (int)(2*sizeof(unsigned long)),
+		(unsigned long)page->flags, page->mapping,
+		page_mapcount(page), page_count(page));
 	dump_stack();
-	printk(KERN_EMERG "Trying to fix it up, but a reboot is needed\n");
 	page->flags &= ~(1 << PG_lru	|
 			1 << PG_private |
 			1 << PG_locked	|
@@ -194,19 +194,15 @@ static void destroy_compound_page(struct page *page, unsigned long order)
 	int i;
 	int nr_pages = 1 << order;
 
-	if (!PageCompound(page))
-		return;
-
-	if (page[1].index != order)
-		bad_page(__FUNCTION__, page);
+	if (unlikely(page[1].index != order))
+		bad_page(page);
 
 	for (i = 0; i < nr_pages; i++) {
 		struct page *p = page + i;
 
-		if (!PageCompound(p))
-			bad_page(__FUNCTION__, page);
-		if (page_private(p) != (unsigned long)page)
-			bad_page(__FUNCTION__, page);
+		if (unlikely(!PageCompound(p) |
+				(page_private(p) != (unsigned long)page)))
+			bad_page(page);
 		ClearPageCompound(p);
 	}
 }
@@ -316,7 +312,7 @@ static inline void __free_pages_bulk (struct page *page,
 	unsigned long page_idx;
 	int order_size = 1 << order;
 
-	if (unlikely(order))
+	if (unlikely(PageCompound(page)))
 		destroy_compound_page(page, order);
 
 	page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
@@ -348,7 +344,7 @@ static inline void __free_pages_bulk (struct page *page,
 	zone->free_area[order].nr_free++;
 }
 
-static inline int free_pages_check(const char *function, struct page *page)
+static inline int free_pages_check(struct page *page)
 {
 	if (unlikely(page_mapcount(page) |
 		(page->mapping != NULL)  |
@@ -363,7 +359,7 @@ static inline int free_pages_check(const char *function, struct page *page)
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved ))))
-		bad_page(function, page);
+		bad_page(page);
 	if (PageDirty(page))
 		__ClearPageDirty(page);
 	/*
@@ -422,7 +418,7 @@ void __free_pages_ok(struct page *page, unsigned int order)
 #endif
 
 	for (i = 0 ; i < (1 << order) ; ++i)
-		reserved += free_pages_check(__FUNCTION__, page + i);
+		reserved += free_pages_check(page + i);
 	if (reserved)
 		return;
 
@@ -517,7 +513,7 @@ static int prep_new_page(struct page *page, int order)
 			1 << PG_swapcache |
 			1 << PG_writeback |
 			1 << PG_reserved ))))
-		bad_page(__FUNCTION__, page);
+		bad_page(page);
 
 	/*
 	 * For now, we report if PG_reserved was found set, but do not
@@ -716,7 +712,7 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 
 	if (PageAnon(page))
 		page->mapping = NULL;
-	if (free_pages_check(__FUNCTION__, page))
+	if (free_pages_check(page))
 		return;
 
 	inc_page_state(pgfree);
-- 
cgit v1.2.3


From f3fe65122da05e1cd4c9140340d96ea2f95d0c49 Mon Sep 17 00:00:00 2001
From: Con Kolivas <kernel@kolivas.org>
Date: Fri, 6 Jan 2006 00:11:15 -0800
Subject: [PATCH] mm: add populated_zone() helper

There are numerous places we check whether a zone is populated or not.

Provide a helper function to check for populated zones and convert all
checks for zone->present_pages.

Signed-off-by: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b9fd2c238f13..8f3de5af92dd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1358,7 +1358,7 @@ void show_free_areas(void)
 		show_node(zone);
 		printk("%s per-cpu:", zone->name);
 
-		if (!zone->present_pages) {
+		if (!populated_zone(zone)) {
 			printk(" empty\n");
 			continue;
 		} else
@@ -1435,7 +1435,7 @@ void show_free_areas(void)
 
 		show_node(zone);
 		printk("%s: ", zone->name);
-		if (!zone->present_pages) {
+		if (!populated_zone(zone)) {
 			printk("empty\n");
 			continue;
 		}
@@ -2134,7 +2134,7 @@ static int frag_show(struct seq_file *m, void *arg)
 	int order;
 
 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
-		if (!zone->present_pages)
+		if (!populated_zone(zone))
 			continue;
 
 		spin_lock_irqsave(&zone->lock, flags);
@@ -2167,7 +2167,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
 		int i;
 
-		if (!zone->present_pages)
+		if (!populated_zone(zone))
 			continue;
 
 		spin_lock_irqsave(&zone->lock, flags);
-- 
cgit v1.2.3


From 1a93205bdffd9d7278d4a66081cdb48452522a58 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:11:16 -0800
Subject: [PATCH] mm: simplify build_zonelists_node by removing the case
 statement.

Simplify build_zonelists_node by removing the case statement.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8f3de5af92dd..7adc9526d329 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1455,35 +1455,23 @@ void show_free_areas(void)
 
 /*
  * Builds allocation fallback zone lists.
+ *
+ * Add all populated zones of a node to the zonelist.
  */
-static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist, int j, int k)
-{
-	switch (k) {
-		struct zone *zone;
-	default:
-		BUG();
-	case ZONE_HIGHMEM:
-		zone = pgdat->node_zones + ZONE_HIGHMEM;
-		if (zone->present_pages) {
+static int __init build_zonelists_node(pg_data_t *pgdat,
+			struct zonelist *zonelist, int j, int k)
+{
+	struct zone *zone;
+
+	BUG_ON(k > ZONE_HIGHMEM);
+	for (zone = pgdat->node_zones + k; zone >= pgdat->node_zones; zone--) {
+		if (populated_zone(zone)) {
 #ifndef CONFIG_HIGHMEM
-			BUG();
+			BUG_ON(zone - pgdat->node_zones > ZONE_NORMAL);
 #endif
 			zonelist->zones[j++] = zone;
 		}
-	case ZONE_NORMAL:
-		zone = pgdat->node_zones + ZONE_NORMAL;
-		if (zone->present_pages)
-			zonelist->zones[j++] = zone;
-	case ZONE_DMA32:
-		zone = pgdat->node_zones + ZONE_DMA32;
-		if (zone->present_pages)
-			zonelist->zones[j++] = zone;
-	case ZONE_DMA:
-		zone = pgdat->node_zones + ZONE_DMA;
-		if (zone->present_pages)
-			zonelist->zones[j++] = zone;
 	}
-
 	return j;
 }
 
-- 
cgit v1.2.3


From 4be38e351c5f455f6f490f5aff29053e33ab4f99 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:11:17 -0800
Subject: [PATCH] mm: move determination of policy_zone into page allocator

Currently the function to build a zonelist for a BIND policy has the side
effect to set the policy_zone.  This seems to be a bit strange.  policy
zone seems to not be initialized elsewhere and therefore 0.  Do we police
ZONE_DMA if no bind policy has been used yet?

This patch moves the determination of the zone to apply policies to into
the page allocator.  We determine the zone while building the zonelist for
nodes.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7adc9526d329..512e3f4d4963 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -36,6 +36,7 @@
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
+#include <linux/mempolicy.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1470,6 +1471,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat,
 			BUG_ON(zone - pgdat->node_zones > ZONE_NORMAL);
 #endif
 			zonelist->zones[j++] = zone;
+			check_highest_zone(k);
 		}
 	}
 	return j;
-- 
cgit v1.2.3


From 02a68a5ebc7dd823da7496116f42290103e1e4a9 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:11:18 -0800
Subject: [PATCH] Fix zone policy determination

The use k in the inner loop means that the highest zone nr is always used
if any zone of a node is populated.  This means that the policy zone is not
correctly determined on arches that do no use HIGHMEM like ia64.

Change the loop to decrement k which also simplifies the BUG_ON.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 512e3f4d4963..ca978992c898 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1465,15 +1465,19 @@ static int __init build_zonelists_node(pg_data_t *pgdat,
 	struct zone *zone;
 
 	BUG_ON(k > ZONE_HIGHMEM);
-	for (zone = pgdat->node_zones + k; zone >= pgdat->node_zones; zone--) {
+
+	do {
+		zone = pgdat->node_zones + k;
 		if (populated_zone(zone)) {
 #ifndef CONFIG_HIGHMEM
-			BUG_ON(zone - pgdat->node_zones > ZONE_NORMAL);
+			BUG_ON(k > ZONE_NORMAL);
 #endif
 			zonelist->zones[j++] = zone;
 			check_highest_zone(k);
 		}
-	}
+		k--;
+
+	} while (k >= 0);
 	return j;
 }
 
-- 
cgit v1.2.3


From 070f80326a215d8e6c4fd6f175e28eb446c492bc Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:11:19 -0800
Subject: [PATCH] build_zonelists_node(): rename args

Give j and r meaningful names.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ca978992c898..7f580779abdb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1460,25 +1460,25 @@ void show_free_areas(void)
  * Add all populated zones of a node to the zonelist.
  */
 static int __init build_zonelists_node(pg_data_t *pgdat,
-			struct zonelist *zonelist, int j, int k)
+			struct zonelist *zonelist, int nr_zones, int zone_type)
 {
 	struct zone *zone;
 
-	BUG_ON(k > ZONE_HIGHMEM);
+	BUG_ON(zone_type > ZONE_HIGHMEM);
 
 	do {
-		zone = pgdat->node_zones + k;
+		zone = pgdat->node_zones + zone_type;
 		if (populated_zone(zone)) {
 #ifndef CONFIG_HIGHMEM
-			BUG_ON(k > ZONE_NORMAL);
+			BUG_ON(zone_type > ZONE_NORMAL);
 #endif
-			zonelist->zones[j++] = zone;
-			check_highest_zone(k);
+			zonelist->zones[nr_zones++] = zone;
+			check_highest_zone(zone_type);
 		}
-		k--;
+		zone_type--;
 
-	} while (k >= 0);
-	return j;
+	} while (zone_type >= 0);
+	return nr_zones;
 }
 
 static inline int highest_zone(int zone_bits)
-- 
cgit v1.2.3


From a74609fafa2e5cc31d558012abaaa55ec9ad9da4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:20 -0800
Subject: [PATCH] mm: page_state opt

Optimise page_state manipulations by introducing interrupt unsafe accessors
to page_state fields.  Callers must provide their own locking (either
disable interrupts or not update from interrupt context).

Switch over the hot callsites that can easily be moved under interrupts off
sections.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 89 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 50 insertions(+), 39 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7f580779abdb..fd47494cb989 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -424,9 +424,9 @@ void __free_pages_ok(struct page *page, unsigned int order)
 		return;
 
 	list_add(&page->lru, &list);
-	mod_page_state(pgfree, 1 << order);
 	kernel_map_pages(page, 1<<order, 0);
 	local_irq_save(flags);
+	__mod_page_state(pgfree, 1 << order);
 	free_pages_bulk(page_zone(page), 1, &list, order);
 	local_irq_restore(flags);
 }
@@ -674,18 +674,14 @@ void drain_local_pages(void)
 }
 #endif /* CONFIG_PM */
 
-static void zone_statistics(struct zonelist *zonelist, struct zone *z)
+static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
 {
 #ifdef CONFIG_NUMA
-	unsigned long flags;
-	int cpu;
 	pg_data_t *pg = z->zone_pgdat;
 	pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
 	struct per_cpu_pageset *p;
 
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	p = zone_pcp(z,cpu);
+	p = zone_pcp(z, cpu);
 	if (pg == orig) {
 		p->numa_hit++;
 	} else {
@@ -696,7 +692,6 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z)
 		p->local_node++;
 	else
 		p->other_node++;
-	local_irq_restore(flags);
 #endif
 }
 
@@ -716,11 +711,11 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	if (free_pages_check(page))
 		return;
 
-	inc_page_state(pgfree);
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
+	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
 	if (pcp->count >= pcp->high)
@@ -753,49 +748,58 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
  * we cheat by calling it from here, in the order > 0 path.  Saves a branch
  * or two.
  */
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static struct page *buffered_rmqueue(struct zonelist *zonelist,
+			struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
 	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
+	int cpu;
 
 again:
+	cpu  = get_cpu();
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
 
-		page = NULL;
-		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp[cold];
 		local_irq_save(flags);
-		if (!pcp->count)
+		if (!pcp->count) {
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (likely(pcp->count)) {
-			page = list_entry(pcp->list.next, struct page, lru);
-			list_del(&page->lru);
-			pcp->count--;
+			if (unlikely(!pcp->count))
+				goto failed;
 		}
-		local_irq_restore(flags);
-		put_cpu();
+		page = list_entry(pcp->list.next, struct page, lru);
+		list_del(&page->lru);
+		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		spin_unlock(&zone->lock);
+		if (!page)
+			goto failed;
 	}
 
-	if (page != NULL) {
-		BUG_ON(bad_range(zone, page));
-		mod_page_state_zone(zone, pgalloc, 1 << order);
-		if (prep_new_page(page, order))
-			goto again;
+	__mod_page_state_zone(zone, pgalloc, 1 << order);
+	zone_statistics(zonelist, zone, cpu);
+	local_irq_restore(flags);
+	put_cpu();
 
-		if (gfp_flags & __GFP_ZERO)
-			prep_zero_page(page, order, gfp_flags);
+	BUG_ON(bad_range(zone, page));
+	if (prep_new_page(page, order))
+		goto again;
 
-		if (order && (gfp_flags & __GFP_COMP))
-			prep_compound_page(page, order);
-	}
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
 	return page;
+
+failed:
+	local_irq_restore(flags);
+	put_cpu();
+	return NULL;
 }
 
 #define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
@@ -871,9 +875,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 				continue;
 		}
 
-		page = buffered_rmqueue(*z, order, gfp_mask);
+		page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
 		if (page) {
-			zone_statistics(zonelist, *z);
 			break;
 		}
 	} while (*(++z) != NULL);
@@ -1248,7 +1251,7 @@ void get_full_page_state(struct page_state *ret)
 	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
 }
 
-unsigned long __read_page_state(unsigned long offset)
+unsigned long read_page_state_offset(unsigned long offset)
 {
 	unsigned long ret = 0;
 	int cpu;
@@ -1262,18 +1265,26 @@ unsigned long __read_page_state(unsigned long offset)
 	return ret;
 }
 
-void __mod_page_state(unsigned long offset, unsigned long delta)
+void __mod_page_state_offset(unsigned long offset, unsigned long delta)
+{
+	void *ptr;
+
+	ptr = &__get_cpu_var(page_states);
+	*(unsigned long *)(ptr + offset) += delta;
+}
+EXPORT_SYMBOL(__mod_page_state_offset);
+
+void mod_page_state_offset(unsigned long offset, unsigned long delta)
 {
 	unsigned long flags;
-	void* ptr;
+	void *ptr;
 
 	local_irq_save(flags);
 	ptr = &__get_cpu_var(page_states);
-	*(unsigned long*)(ptr + offset) += delta;
+	*(unsigned long *)(ptr + offset) += delta;
 	local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL(__mod_page_state);
+EXPORT_SYMBOL(mod_page_state_offset);
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
-- 
cgit v1.2.3


From 84c2008af01132c4ca257ed9b595693c611df15d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sun, 8 Jan 2006 01:00:28 -0800
Subject: [PATCH] revert "mm: page_state fixes"

Hugh says:

page_alloc_cpu_notify() specifically contains code to

 		/* Add dead cpu's page_states to our own. */

which handles this more efficiently.

Cc: Hugh Dickins <hugh@veritas.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fd47494cb989..0b98f428b07b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1204,6 +1204,7 @@ static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
 	int cpu = 0;
 
 	memset(ret, 0, sizeof(*ret));
+	cpus_and(*cpumask, *cpumask, cpu_online_map);
 
 	cpu = first_cpu(*cpumask);
 	while (cpu < NR_CPUS) {
@@ -1256,7 +1257,7 @@ unsigned long read_page_state_offset(unsigned long offset)
 	unsigned long ret = 0;
 	int cpu;
 
-	for_each_cpu(cpu) {
+	for_each_online_cpu(cpu) {
 		unsigned long in;
 
 		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
-- 
cgit v1.2.3


From bec6b0c89b234090681a4516e20ac5debe3e7c59 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Sun, 8 Jan 2006 01:00:38 -0800
Subject: [PATCH] slab: remove nested #ifdef CONFIG_NUMA

For some reason there is an #ifdef CONFIG_NUMA within another #ifdef
CONFIG_NUMA in the page allocator.  Remove innermost #ifdef CONFIG_NUMA

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0b98f428b07b..5eeeadd9f66a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1883,7 +1883,6 @@ bad:
 
 static inline void free_zone_pagesets(int cpu)
 {
-#ifdef CONFIG_NUMA
 	struct zone *zone;
 
 	for_each_zone(zone) {
@@ -1892,7 +1891,6 @@ static inline void free_zone_pagesets(int cpu)
 		zone_pcp(zone, cpu) = NULL;
 		kfree(pset);
 	}
-#endif
 }
 
 static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
-- 
cgit v1.2.3


From 8ad4b1fb8205340dba16b63467bb23efc27264d6 Mon Sep 17 00:00:00 2001
From: Rohit Seth <rohit.seth@intel.com>
Date: Sun, 8 Jan 2006 01:00:40 -0800
Subject: [PATCH] Make high and batch sizes of per_cpu_pagelists configurable

As recently there has been lot of traffic on the right values for batch and
high water marks for per_cpu_pagelists.  This patch makes these two
variables configurable through /proc interface.

A new tunable /proc/sys/vm/percpu_pagelist_fraction is added.  This entry
controls the fraction of pages at most in each zone that are allocated for
each per cpu page list.  The min value for this is 8.  It means that we
don't allow more than 1/8th of pages in each zone to be allocated in any
single per_cpu_pagelist.

The batch value of each per cpu pagelist is also updated as a result.  It
is set to pcp->high/4.  The upper limit of batch is (PAGE_SHIFT * 8)

Signed-off-by: Rohit Seth <rohit.seth@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5eeeadd9f66a..2c46f697e8ff 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,7 @@ struct pglist_data *pgdat_list __read_mostly;
 unsigned long totalram_pages __read_mostly;
 unsigned long totalhigh_pages __read_mostly;
 long nr_swap_pages;
+int percpu_pagelist_fraction;
 
 static void fastcall free_hot_cold_page(struct page *page, int cold);
 
@@ -1831,6 +1832,24 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 	INIT_LIST_HEAD(&pcp->list);
 }
 
+/*
+ * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * to the value high for the pageset p.
+ */
+
+static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+				unsigned long high)
+{
+	struct per_cpu_pages *pcp;
+
+	pcp = &p->pcp[0]; /* hot list */
+	pcp->high = high;
+	pcp->batch = max(1UL, high/4);
+	if ((high/4) > (PAGE_SHIFT * 8))
+		pcp->batch = PAGE_SHIFT * 8;
+}
+
+
 #ifdef CONFIG_NUMA
 /*
  * Boot pageset table. One per cpu which is going to be used for all
@@ -1868,6 +1887,10 @@ static int __devinit process_zones(int cpu)
 			goto bad;
 
 		setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+
+		if (percpu_pagelist_fraction)
+			setup_pagelist_highmark(zone_pcp(zone, cpu),
+			 	(zone->present_pages / percpu_pagelist_fraction));
 	}
 
 	return 0;
@@ -2567,6 +2590,32 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
 	return 0;
 }
 
+/*
+ * percpu_pagelist_fraction - changes the pcp->high for each zone on each
+ * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
+ * can have before it gets flushed back to buddy allocator.
+ */
+
+int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	unsigned int cpu;
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (!write || (ret == -EINVAL))
+		return ret;
+	for_each_zone(zone) {
+		for_each_online_cpu(cpu) {
+			unsigned long  high;
+			high = zone->present_pages / percpu_pagelist_fraction;
+			setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+		}
+	}
+	return 0;
+}
+
 __initdata int hashdist = HASHDIST_DEFAULT;
 
 #ifdef CONFIG_NUMA
-- 
cgit v1.2.3


From 23316bc86fd31c5d644a71c398ec41d9fecacec4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sun, 8 Jan 2006 01:00:41 -0800
Subject: [PATCH] mm: cleanup zone_pcp

Use zone_pcp everywhere even though NUMA code "knows" the internal details
of the zone.  Stop other people trying to copy, and it looks nicer.

Also, only print the pagesets of online cpus in zoneinfo.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: "Seth, Rohit" <rohit.seth@intel.com>
Cc: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2c46f697e8ff..6b92a945ae6b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -597,7 +597,7 @@ void drain_remote_pages(void)
 		if (zone->zone_pgdat->node_id == numa_node_id())
 			continue;
 
-		pset = zone->pageset[smp_processor_id()];
+		pset = zone_pcp(zone, smp_processor_id());
 		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
 			struct per_cpu_pages *pcp;
 
@@ -1881,12 +1881,12 @@ static int __devinit process_zones(int cpu)
 
 	for_each_zone(zone) {
 
-		zone->pageset[cpu] = kmalloc_node(sizeof(struct per_cpu_pageset),
+		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
 					 GFP_KERNEL, cpu_to_node(cpu));
-		if (!zone->pageset[cpu])
+		if (!zone_pcp(zone, cpu))
 			goto bad;
 
-		setup_pageset(zone->pageset[cpu], zone_batchsize(zone));
+		setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
 
 		if (percpu_pagelist_fraction)
 			setup_pagelist_highmark(zone_pcp(zone, cpu),
@@ -1898,8 +1898,8 @@ bad:
 	for_each_zone(dzone) {
 		if (dzone == zone)
 			break;
-		kfree(dzone->pageset[cpu]);
-		dzone->pageset[cpu] = NULL;
+		kfree(zone_pcp(dzone, cpu));
+		zone_pcp(dzone, cpu) = NULL;
 	}
 	return -ENOMEM;
 }
@@ -1984,7 +1984,7 @@ static __devinit void zone_pcp_init(struct zone *zone)
 	for (cpu = 0; cpu < NR_CPUS; cpu++) {
 #ifdef CONFIG_NUMA
 		/* Early boot. Slab allocator not functional yet */
-		zone->pageset[cpu] = &boot_pageset[cpu];
+		zone_pcp(zone, cpu) = &boot_pageset[cpu];
 		setup_pageset(&boot_pageset[cpu],0);
 #else
 		setup_pageset(zone_pcp(zone,cpu), batch);
@@ -2227,7 +2227,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 		seq_printf(m,
 			   ")"
 			   "\n  pagesets");
-		for (i = 0; i < ARRAY_SIZE(zone->pageset); i++) {
+		for_each_online_cpu(i) {
 			struct per_cpu_pageset *pageset;
 			int j;
 
-- 
cgit v1.2.3


From 48db57f8ff10eb09ab887ccb6150b0da0c7be24e Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Sun, 8 Jan 2006 01:00:42 -0800
Subject: [PATCH] mm: free_pages opt

Try to streamline free_pages_bulk by ensuring callers don't pass in a
'count' that exceeds the list size.

Some cleanups:
Rename __free_pages_bulk to __free_one_page.
Put the page list manipulation from __free_pages_ok into free_one_page.
Make __free_pages_ok static.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 58 +++++++++++++++++++++++++++++----------------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6b92a945ae6b..ad3d0202cdef 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -308,7 +308,7 @@ static inline int page_is_buddy(struct page *page, int order)
  * -- wli
  */
 
-static inline void __free_pages_bulk (struct page *page,
+static inline void __free_one_page(struct page *page,
 		struct zone *zone, unsigned int order)
 {
 	unsigned long page_idx;
@@ -383,40 +383,42 @@ static inline int free_pages_check(struct page *page)
  * And clear the zone's pages_scanned counter, to hold off the "all pages are
  * pinned" detection logic.
  */
-static int
-free_pages_bulk(struct zone *zone, int count,
-		struct list_head *list, unsigned int order)
+static void free_pages_bulk(struct zone *zone, int count,
+					struct list_head *list, int order)
 {
-	struct page *page = NULL;
-	int ret = 0;
-
 	spin_lock(&zone->lock);
 	zone->all_unreclaimable = 0;
 	zone->pages_scanned = 0;
-	while (!list_empty(list) && count--) {
+	while (count--) {
+		struct page *page;
+
+		BUG_ON(list_empty(list));
 		page = list_entry(list->prev, struct page, lru);
-		/* have to delete it as __free_pages_bulk list manipulates */
+		/* have to delete it as __free_one_page list manipulates */
 		list_del(&page->lru);
-		__free_pages_bulk(page, zone, order);
-		ret++;
+		__free_one_page(page, zone, order);
 	}
 	spin_unlock(&zone->lock);
-	return ret;
 }
 
-void __free_pages_ok(struct page *page, unsigned int order)
+static void free_one_page(struct zone *zone, struct page *page, int order)
 {
-	unsigned long flags;
 	LIST_HEAD(list);
+	list_add(&page->lru, &list);
+	free_pages_bulk(zone, 1, &list, order);
+}
+
+static void __free_pages_ok(struct page *page, unsigned int order)
+{
+	unsigned long flags;
 	int i;
 	int reserved = 0;
 
 	arch_free_page(page, order);
 
 #ifndef CONFIG_MMU
-	if (order > 0)
-		for (i = 1 ; i < (1 << order) ; ++i)
-			__put_page(page + i);
+	for (i = 1 ; i < (1 << order) ; ++i)
+		__put_page(page + i);
 #endif
 
 	for (i = 0 ; i < (1 << order) ; ++i)
@@ -424,11 +426,10 @@ void __free_pages_ok(struct page *page, unsigned int order)
 	if (reserved)
 		return;
 
-	list_add(&page->lru, &list);
-	kernel_map_pages(page, 1<<order, 0);
+	kernel_map_pages(page, 1 << order, 0);
 	local_irq_save(flags);
 	__mod_page_state(pgfree, 1 << order);
-	free_pages_bulk(page_zone(page), 1, &list, order);
+	free_one_page(page_zone(page), page, order);
 	local_irq_restore(flags);
 }
 
@@ -602,9 +603,8 @@ void drain_remote_pages(void)
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
-			if (pcp->count)
-				pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
+			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+			pcp->count = 0;
 		}
 	}
 	local_irq_restore(flags);
@@ -627,8 +627,8 @@ static void __drain_pages(unsigned int cpu)
 
 			pcp = &pset->pcp[i];
 			local_irq_save(flags);
-			pcp->count -= free_pages_bulk(zone, pcp->count,
-						&pcp->list, 0);
+			free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+			pcp->count = 0;
 			local_irq_restore(flags);
 		}
 	}
@@ -719,8 +719,10 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
-	if (pcp->count >= pcp->high)
-		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+	if (pcp->count >= pcp->high) {
+		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+		pcp->count -= pcp->batch;
+	}
 	local_irq_restore(flags);
 	put_cpu();
 }
@@ -759,7 +761,7 @@ static struct page *buffered_rmqueue(struct zonelist *zonelist,
 
 again:
 	cpu  = get_cpu();
-	if (order == 0) {
+	if (likely(order == 0)) {
 		struct per_cpu_pages *pcp;
 
 		pcp = &zone_pcp(zone, cpu)->pcp[cold];
-- 
cgit v1.2.3


From 3e0d98b9f1eb757fc98efc84e74e54a08308aa73 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Sun, 8 Jan 2006 01:01:49 -0800
Subject: [PATCH] cpuset: memory pressure meter

Provide a simple per-cpuset metric of memory pressure, tracking the -rate-
that the tasks in a cpuset call try_to_free_pages(), the synchronous
(direct) memory reclaim code.

This enables batch managers monitoring jobs running in dedicated cpusets to
efficiently detect what level of memory pressure that job is causing.

This is useful both on tightly managed systems running a wide mix of
submitted jobs, which may choose to terminate or reprioritize jobs that are
trying to use more memory than allowed on the nodes assigned them, and with
tightly coupled, long running, massively parallel scientific computing jobs
that will dramatically fail to meet required performance goals if they
start to use more memory than allowed to them.

This patch just provides a very economical way for the batch manager to
monitor a cpuset for signs of memory pressure.  It's up to the batch
manager or other user code to decide what to do about it and take action.

==> Unless this feature is enabled by writing "1" to the special file
    /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
    code of __alloc_pages() for this metric reduces to simply noticing
    that the cpuset_memory_pressure_enabled flag is zero.  So only
    systems that enable this feature will compute the metric.

Why a per-cpuset, running average:

    Because this meter is per-cpuset, rather than per-task or mm, the
    system load imposed by a batch scheduler monitoring this metric is
    sharply reduced on large systems, because a scan of the tasklist can be
    avoided on each set of queries.

    Because this meter is a running average, instead of an accumulating
    counter, a batch scheduler can detect memory pressure with a single
    read, instead of having to read and accumulate results for a period of
    time.

    Because this meter is per-cpuset rather than per-task or mm, the
    batch scheduler can obtain the key information, memory pressure in a
    cpuset, with a single read, rather than having to query and accumulate
    results over all the (dynamically changing) set of tasks in the cpuset.

A per-cpuset simple digital filter (requires a spinlock and 3 words of data
per-cpuset) is kept, and updated by any task attached to that cpuset, if it
enters the synchronous (direct) page reclaim code.

A per-cpuset file provides an integer number representing the recent
(half-life of 10 seconds) rate of direct page reclaims caused by the tasks
in the cpuset, in units of reclaims attempted per second, times 1000.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ad3d0202cdef..e0e84924171b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -976,6 +976,7 @@ rebalance:
 	cond_resched();
 
 	/* We now go into synchronous reclaim */
+	cpuset_memory_pressure_bump();
 	p->flags |= PF_MEMALLOC;
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
-- 
cgit v1.2.3


From de5097c2e73f826302cd8957c225b3725e0c7553 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 9 Jan 2006 15:59:21 -0800
Subject: [PATCH] mutex subsystem, more debugging code

more mutex debugging: check for held locks during memory freeing,
task exit, enable sysrq printouts, etc.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Arjan van de Ven <arjan@infradead.org>
---
 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0e84924171b..a5e6891f7bb6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -415,6 +415,9 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	int reserved = 0;
 
 	arch_free_page(page, order);
+	if (!PageHighMem(page))
+		mutex_debug_check_no_locks_freed(page_address(page),
+			page_address(page+(1<<order)));
 
 #ifndef CONFIG_MMU
 	for (i = 1 ; i < (1 << order) ; ++i)
-- 
cgit v1.2.3


From a4fc7ab1d065a9dd89ed0e74439ef87d4a16e980 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Wed, 11 Jan 2006 14:41:26 +0000
Subject: [PATCH] fix/simplify mutex debugging code

Let's switch mutex_debug_check_no_locks_freed() to take (addr, len) as
arguments instead, since all its callers were just calculating the 'to'
address for themselves anyway... (and sometimes doing so badly).

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a5e6891f7bb6..8e363536e2da 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -417,7 +417,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	arch_free_page(page, order);
 	if (!PageHighMem(page))
 		mutex_debug_check_no_locks_freed(page_address(page),
-			page_address(page+(1<<order)));
+						 PAGE_SIZE<<order);
 
 #ifndef CONFIG_MMU
 	for (i = 1 ; i < (1 << order) ; ++i)
-- 
cgit v1.2.3


From 7365f3d169a1eae00e713c1a4eea90e86415dcc5 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Wed, 11 Jan 2006 12:17:18 -0800
Subject: [PATCH] Restore KERN_EMERG to each line printed by bad_page

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8e363536e2da..ce991b173aa9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -137,9 +137,9 @@ static inline int bad_range(struct zone *zone, struct page *page)
 static void bad_page(struct page *page)
 {
 	printk(KERN_EMERG "Bad page state in process '%s'\n"
-		"page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
-		"Trying to fix it up, but a reboot is needed\n"
-		"Backtrace:\n",
+		KERN_EMERG "page:%p flags:0x%0*lx mapping:%p mapcount:%d count:%d\n"
+		KERN_EMERG "Trying to fix it up, but a reboot is needed\n"
+		KERN_EMERG "Backtrace:\n",
 		current->comm, page, (int)(2*sizeof(unsigned long)),
 		(unsigned long)page->flags, page->mapping,
 		page_mapcount(page), page_count(page));
-- 
cgit v1.2.3


From 4eac915d02453e81a32595cd7423492c81337a26 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 11 Jan 2006 12:17:19 -0800
Subject: [PATCH] mm: gfp_atomic comments

Clarify in comments that GFP_ATOMIC means both "don't sleep" and "use
emergency pools", hence both ALLOC_HARDER and ALLOC_HIGH.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ce991b173aa9..d41a0662d4da 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -931,7 +931,8 @@ restart:
 	 *
 	 * The caller may dip into page reserves a bit more if the caller
 	 * cannot run direct reclaim, or if the caller has realtime scheduling
-	 * policy.
+	 * policy or is asking for __GFP_HIGH memory.  GFP_ATOMIC requests will
+	 * set both ALLOC_HARDER (!wait) and ALLOC_HIGH (__GFP_HIGH).
 	 */
 	alloc_flags = ALLOC_WMARK_MIN;
 	if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
-- 
cgit v1.2.3


From cbe8dd4af2967ee1c2d54ec9d4db35cf3ecc98d3 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@snapgear.com>
Date: Thu, 12 Jan 2006 01:05:24 -0800
Subject: [PATCH] memmap_init_zone(): remove uneccesary page++

Remove unecessary page++ from memmap_init_zone loop.

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/page_alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'mm/page_alloc.c')

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d41a0662d4da..8c960b469593 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1742,7 +1742,7 @@ void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 	unsigned long end_pfn = start_pfn + size;
 	unsigned long pfn;
 
-	for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) {
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
 		if (!early_pfn_valid(pfn))
 			continue;
 		page = pfn_to_page(pfn);
-- 
cgit v1.2.3