summaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c112
1 files changed, 85 insertions, 27 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dc523a1f270d..253a450c400d 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
#include <linux/mempolicy.h>
#include <asm/tlbflush.h>
+#include <asm/div64.h>
#include "internal.h"
/*
@@ -51,6 +52,7 @@ nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
EXPORT_SYMBOL(node_possible_map);
unsigned long totalram_pages __read_mostly;
unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
@@ -151,7 +153,8 @@ static void bad_page(struct page *page)
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback );
+ 1 << PG_writeback |
+ 1 << PG_buddy );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
@@ -230,18 +233,20 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
* zone->lock is already acquired when we use these.
* So, we don't need atomic page->flags operations here.
*/
-static inline unsigned long page_order(struct page *page) {
+static inline unsigned long page_order(struct page *page)
+{
return page_private(page);
}
-static inline void set_page_order(struct page *page, int order) {
+static inline void set_page_order(struct page *page, int order)
+{
set_page_private(page, order);
- __SetPagePrivate(page);
+ __SetPageBuddy(page);
}
static inline void rmv_page_order(struct page *page)
{
- __ClearPagePrivate(page);
+ __ClearPageBuddy(page);
set_page_private(page, 0);
}
@@ -280,11 +285,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
*
+ * For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, int order)
{
@@ -293,11 +300,11 @@ static inline int page_is_buddy(struct page *page, int order)
return 0;
#endif
- if (PagePrivate(page) &&
- (page_order(page) == order) &&
- page_count(page) == 0)
- return 1;
- return 0;
+ if (PageBuddy(page) && page_order(page) == order) {
+ BUG_ON(page_count(page) != 0);
+ return 1;
+ }
+ return 0;
}
/*
@@ -313,7 +320,7 @@ static inline int page_is_buddy(struct page *page, int order)
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
@@ -376,7 +383,8 @@ static inline int free_pages_check(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
@@ -524,7 +532,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_reserved ))))
+ 1 << PG_reserved |
+ 1 << PG_buddy ))))
bad_page(page);
/*
@@ -942,7 +951,7 @@ restart:
goto got_pg;
do {
- if (cpuset_zone_allowed(*z, gfp_mask))
+ if (cpuset_zone_allowed(*z, gfp_mask|__GFP_HARDWALL))
wakeup_kswapd(*z, order);
} while (*(++z));
@@ -961,7 +970,8 @@ restart:
alloc_flags |= ALLOC_HARDER;
if (gfp_mask & __GFP_HIGH)
alloc_flags |= ALLOC_HIGH;
- alloc_flags |= ALLOC_CPUSET;
+ if (wait)
+ alloc_flags |= ALLOC_CPUSET;
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
@@ -1954,7 +1964,7 @@ static inline void free_zone_pagesets(int cpu)
}
}
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+static int pageset_cpuup_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -2115,14 +2125,22 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat)
#ifdef CONFIG_FLAT_NODE_MEM_MAP
/* ia64 gets its own node_mem_map, before this, without bootmem */
if (!pgdat->node_mem_map) {
- unsigned long size;
+ unsigned long size, start, end;
struct page *map;
- size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+ /*
+ * The zone's endpoints aren't required to be MAX_ORDER
+ * aligned but the node_mem_map endpoints must be in order
+ * for the buddy allocator to function correctly.
+ */
+ start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1);
+ end = pgdat->node_start_pfn + pgdat->node_spanned_pages;
+ end = ALIGN(end, MAX_ORDER_NR_PAGES);
+ size = (end - start) * sizeof(struct page);
map = alloc_remap(pgdat->node_id, size);
if (!map)
map = alloc_bootmem_node(pgdat, size);
- pgdat->node_mem_map = map;
+ pgdat->node_mem_map = map + (pgdat->node_start_pfn - start);
}
#ifdef CONFIG_FLATMEM
/*
@@ -2472,6 +2490,38 @@ void __init page_alloc_init(void)
}
/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ * or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+ struct pglist_data *pgdat;
+ unsigned long reserve_pages = 0;
+ int i, j;
+
+ for_each_online_pgdat(pgdat) {
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+ unsigned long max = 0;
+
+ /* Find valid and maximum lowmem_reserve in the zone */
+ for (j = i; j < MAX_NR_ZONES; j++) {
+ if (zone->lowmem_reserve[j] > max)
+ max = zone->lowmem_reserve[j];
+ }
+
+ /* we treat pages_high as reserved pages. */
+ max += zone->pages_high;
+
+ if (max > zone->present_pages)
+ max = zone->present_pages;
+ reserve_pages += max;
+ }
+ }
+ totalreserve_pages = reserve_pages;
+}
+
+/*
* setup_per_zone_lowmem_reserve - called whenever
* sysctl_lower_zone_reserve_ratio changes. Ensures that each zone
* has a correct pages reserved value, so an adequate number of
@@ -2502,6 +2552,9 @@ static void setup_per_zone_lowmem_reserve(void)
}
}
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*
@@ -2523,9 +2576,11 @@ void setup_per_zone_pages_min(void)
}
for_each_zone(zone) {
- unsigned long tmp;
+ u64 tmp;
+
spin_lock_irqsave(&zone->lru_lock, flags);
- tmp = (pages_min * zone->present_pages) / lowmem_pages;
+ tmp = (u64)pages_min * zone->present_pages;
+ do_div(tmp, lowmem_pages);
if (is_highmem(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -2552,10 +2607,13 @@ void setup_per_zone_pages_min(void)
zone->pages_min = tmp;
}
- zone->pages_low = zone->pages_min + tmp / 4;
- zone->pages_high = zone->pages_min + tmp / 2;
+ zone->pages_low = zone->pages_min + (tmp >> 2);
+ zone->pages_high = zone->pages_min + (tmp >> 1);
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
+
+ /* update totalreserve_pages */
+ calculate_totalreserve_pages();
}
/*