From f650316c8b80fe61a31b8b575405b37cbf170459 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Fri, 5 Mar 2010 13:41:52 -0800 Subject: mm/page_alloc.c: remove duplicate call to trace_mm_page_free_direct trace_mm_page_free_direct() is called in function __free_pages(). But it is called again in free_hot_page() if order == 0 and produce duplicate records in trace file for mm_page_free_direct event. As below: K-PID CPU# TIMESTAMP FUNCTION gnome-terminal-1567 [000] 4415.246466: mm_page_free_direct: page=ffffea0003db9f40 pfn=1155800 order=0 gnome-terminal-1567 [000] 4415.246468: mm_page_free_direct: page=ffffea0003db9f40 pfn=1155800 order=0 gnome-terminal-1567 [000] 4415.246506: mm_page_alloc: page=ffffea0003db9f40 pfn=1155800 order=0 migratetype=0 gfp_flags=GFP_KERNEL gnome-terminal-1567 [000] 4415.255557: mm_page_free_direct: page=ffffea0003db9f40 pfn=1155800 order=0 gnome-terminal-1567 [000] 4415.255557: mm_page_free_direct: page=ffffea0003db9f40 pfn=1155800 order=0 This patch removes the first call and adds a call to trace_mm_page_free_direct() in __free_pages_ok(). Signed-off-by: Li Hong Cc: Mel Gorman Cc: Rik van Riel Cc: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a6b17aa4740b..ee37091b191b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -583,6 +583,7 @@ static void __free_pages_ok(struct page *page, unsigned int order) int bad = 0; int wasMlocked = __TestClearPageMlocked(page); + trace_mm_page_free_direct(page, order); kmemcheck_free_shadow(page, order); for (i = 0 ; i < (1 << order) ; ++i) @@ -2008,7 +2009,6 @@ void __pagevec_free(struct pagevec *pvec) void __free_pages(struct page *page, unsigned int order) { if (put_page_testzero(page)) { - trace_mm_page_free_direct(page, order); if (order == 0) free_hot_page(page); else -- cgit v1.2.3 From c475dab63ae798d81fb597a6a1859986b296d9d0 Mon Sep 17 00:00:00 2001 From: Li Hong Date: Fri, 5 Mar 2010 13:41:53 -0800 Subject: mm/page_alloc.c: adjust a call site to trace_mm_page_free_direct Move a call of trace_mm_page_free_direct() from free_hot_page() to free_hot_cold_page(). It is clearer and close to kmemcheck_free_shadow(), as it is done in function __free_pages_ok(). Signed-off-by: Li Hong Cc: Mel Gorman Cc: Rik van Riel Cc: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ee37091b191b..caa7df60a4a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1083,6 +1083,7 @@ static void free_hot_cold_page(struct page *page, int cold) int migratetype; int wasMlocked = __TestClearPageMlocked(page); + trace_mm_page_free_direct(page, 0); kmemcheck_free_shadow(page, 0); if (PageAnon(page)) @@ -1136,7 +1137,6 @@ out: void free_hot_page(struct page *page) { - trace_mm_page_free_direct(page, 0); free_hot_cold_page(page, 0); } -- cgit v1.2.3 From fc91668eaf9e7ba61e867fc2218b7e9fb67faa4f Mon Sep 17 00:00:00 2001 From: Li Hong Date: Fri, 5 Mar 2010 13:41:54 -0800 Subject: mm: remove free_hot_page() free_hot_page() is just a wrapper around free_hot_cold_page() with parameter 'cold = 0'. After adding a clear comment for free_hot_cold_page(), it is reasonable to remove a level of call. [akpm@linux-foundation.org: fix build] Signed-off-by: Li Hong Cc: Mel Gorman Cc: Rik van Riel Cc: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Cc: KOSAKI Motohiro Cc: Americo Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index caa7df60a4a1..80bcee0c5034 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1074,8 +1074,9 @@ void mark_free_pages(struct zone *zone) /* * Free a 0-order page + * cold == 1 ? free a cold page : free a hot page */ -static void free_hot_cold_page(struct page *page, int cold) +void free_hot_cold_page(struct page *page, int cold) { struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; @@ -1135,11 +1136,6 @@ out: local_irq_restore(flags); } -void free_hot_page(struct page *page) -{ - free_hot_cold_page(page, 0); -} - /* * split_page takes a non-compound higher-order page, and splits it into * n (1< Date: Fri, 5 Mar 2010 13:41:55 -0800 Subject: mm: restore zone->all_unreclaimable to independence word commit e815af95 ("change all_unreclaimable zone member to flags") changed all_unreclaimable member to bit flag. But it had an undesireble side effect. free_one_page() is one of most hot path in linux kernel and increasing atomic ops in it can reduce kernel performance a bit. Thus, this patch revert such commit partially. at least all_unreclaimable shouldn't share memory word with other zone flags. [akpm@linux-foundation.org: fix patch interaction] Signed-off-by: KOSAKI Motohiro Cc: David Rientjes Cc: Wu Fengguang Cc: KAMEZAWA Hiroyuki Cc: Minchan Kim Cc: Huang Shijie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 80bcee0c5034..0734bedabd9c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -530,7 +530,7 @@ static void free_pcppages_bulk(struct zone *zone, int count, int batch_free = 0; spin_lock(&zone->lock); - zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); + zone->all_unreclaimable = 0; zone->pages_scanned = 0; __mod_zone_page_state(zone, NR_FREE_PAGES, count); @@ -568,7 +568,7 @@ static void free_one_page(struct zone *zone, struct page *page, int order, int migratetype) { spin_lock(&zone->lock); - zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE); + zone->all_unreclaimable = 0; zone->pages_scanned = 0; __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order); @@ -2262,7 +2262,7 @@ void show_free_areas(void) K(zone_page_state(zone, NR_BOUNCE)), K(zone_page_state(zone, NR_WRITEBACK_TEMP)), zone->pages_scanned, - (zone_is_all_unreclaimable(zone) ? "yes" : "no") + (zone->all_unreclaimable ? "yes" : "no") ); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) -- cgit v1.2.3 From 452aa6999e6703ffbddd7f6ea124d3968915f3e3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 5 Mar 2010 13:42:13 -0800 Subject: mm/pm: force GFP_NOIO during suspend/hibernation and resume There are quite a few GFP_KERNEL memory allocations made during suspend/hibernation and resume that may cause the system to hang, because the I/O operations they depend on cannot be completed due to the underlying devices being suspended. Avoid this problem by clearing the __GFP_IO and __GFP_FS bits in gfp_allowed_mask before suspend/hibernation and restoring the original values of these bits in gfp_allowed_mask durig the subsequent resume. [akpm@linux-foundation.org: fix CONFIG_PM=n linkage] Signed-off-by: Rafael J. Wysocki Reported-by: Maxim Levitsky Cc: Sebastian Ott Cc: Benjamin Herrenschmidt Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 0734bedabd9c..298f307c63a1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -76,6 +76,31 @@ unsigned long totalreserve_pages __read_mostly; int percpu_pagelist_fraction; gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; +#ifdef CONFIG_PM_SLEEP +/* + * The following functions are used by the suspend/hibernate code to temporarily + * change gfp_allowed_mask in order to avoid using I/O during memory allocations + * while devices are suspended. To avoid races with the suspend/hibernate code, + * they should always be called with pm_mutex held (gfp_allowed_mask also should + * only be modified with pm_mutex held, unless the suspend/hibernate code is + * guaranteed not to run in parallel with that modification). + */ +void set_gfp_allowed_mask(gfp_t mask) +{ + WARN_ON(!mutex_is_locked(&pm_mutex)); + gfp_allowed_mask = mask; +} + +gfp_t clear_gfp_allowed_mask(gfp_t mask) +{ + gfp_t ret = gfp_allowed_mask; + + WARN_ON(!mutex_is_locked(&pm_mutex)); + gfp_allowed_mask &= ~mask; + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE int pageblock_order __read_mostly; #endif -- cgit v1.2.3 From 72f0ba0252e7177965255ed2c663be126b6b5f91 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 5 Mar 2010 13:42:14 -0800 Subject: mm: suppress pfn range output for zones without pages free_area_init_nodes() emits pfn ranges for all zones on the system. There may be no pages on a higher zone, however, due to memory limitations or the use of the mem= kernel parameter. For example: Zone PFN ranges: DMA 0x00000001 -> 0x00001000 DMA32 0x00001000 -> 0x00100000 Normal 0x00100000 -> 0x00100000 The implementation copies the previous zone's highest pfn, if any, as the next zone's lowest pfn. If its highest pfn is then greater than the amount of addressable memory, the upper memory limit is used instead. Thus, both the lowest and highest possible pfn for higher zones without memory may be the same. The pfn range for zones without memory is now shown as "empty" instead. Signed-off-by: David Rientjes Cc: Mel Gorman Reviewed-by: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 298f307c63a1..a8182c89de59 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4392,8 +4392,12 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) for (i = 0; i < MAX_NR_ZONES; i++) { if (i == ZONE_MOVABLE) continue; - printk(" %-8s %0#10lx -> %0#10lx\n", - zone_names[i], + printk(" %-8s ", zone_names[i]); + if (arch_zone_lowest_possible_pfn[i] == + arch_zone_highest_possible_pfn[i]) + printk("empty\n"); + else + printk("%0#10lx -> %0#10lx\n", arch_zone_lowest_possible_pfn[i], arch_zone_highest_possible_pfn[i]); } -- cgit v1.2.3 From 2d30a1f6315b8940537e8e98882c6038fbac9ba5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 10 Mar 2010 15:20:40 -0800 Subject: mm: do not iterate over NR_CPUS in __zone_pcp_update() __zone_pcp_update() iterates over NR_CPUS instead of limiting the access to the possible cpus. This might result in access to uninitialized areas as the per cpu allocator only populates the per cpu memory for possible cpus. This problem was created as a result of the dynamic allocation of pagesets from percpu memory that went in during the merge window - commit 99dcc3e5a94ed491fbef402831d8c0bbb267f995 ("this_cpu: Page allocator conversion"). Signed-off-by: Thomas Gleixner Acked-by: Pekka Enberg Acked-by: Tejun Heo Acked-by: Christoph Lameter Acked-by: Mel Gorman Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a8182c89de59..78ce90dd671f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3224,7 +3224,7 @@ static int __zone_pcp_update(void *data) int cpu; unsigned long batch = zone_batchsize(zone), flags; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_possible_cpu(cpu) { struct per_cpu_pageset *pset; struct per_cpu_pages *pcp; -- cgit v1.2.3 From 718a38211bf4375c0a1efad3afbc5dbaef5d33f9 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Wed, 10 Mar 2010 15:20:43 -0800 Subject: mm: introduce dump_page() and print symbolic flag names - introduce dump_page() to print the page info for debugging some error condition. - convert three mm users: bad_page(), print_bad_pte() and memory offline failure. - print an extra field: the symbolic names of page->flags Example dump_page() output: [ 157.521694] page:ffffea0000a7cba8 count:2 mapcount:1 mapping:ffff88001c901791 index:0x147 [ 157.525570] page flags: 0x100000000100068(uptodate|lru|active|swapbacked) Signed-off-by: Wu Fengguang Cc: Ingo Molnar Cc: Alex Chiang Cc: Rik van Riel Cc: Andi Kleen Cc: Mel Gorman Cc: Christoph Lameter Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 79 insertions(+), 4 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 78ce90dd671f..d03c946d5566 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include @@ -288,10 +289,7 @@ static void bad_page(struct page *page) printk(KERN_ALERT "BUG: Bad page state in process %s pfn:%05lx\n", current->comm, page_to_pfn(page)); - printk(KERN_ALERT - "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n", - page, (void *)page->flags, page_count(page), - page_mapcount(page), page->mapping, page->index); + dump_page(page); dump_stack(); out: @@ -5183,3 +5181,80 @@ bool is_free_buddy_page(struct page *page) return order < MAX_ORDER; } #endif + +static struct trace_print_flags pageflag_names[] = { + {1UL << PG_locked, "locked" }, + {1UL << PG_error, "error" }, + {1UL << PG_referenced, "referenced" }, + {1UL << PG_uptodate, "uptodate" }, + {1UL << PG_dirty, "dirty" }, + {1UL << PG_lru, "lru" }, + {1UL << PG_active, "active" }, + {1UL << PG_slab, "slab" }, + {1UL << PG_owner_priv_1, "owner_priv_1" }, + {1UL << PG_arch_1, "arch_1" }, + {1UL << PG_reserved, "reserved" }, + {1UL << PG_private, "private" }, + {1UL << PG_private_2, "private_2" }, + {1UL << PG_writeback, "writeback" }, +#ifdef CONFIG_PAGEFLAGS_EXTENDED + {1UL << PG_head, "head" }, + {1UL << PG_tail, "tail" }, +#else + {1UL << PG_compound, "compound" }, +#endif + {1UL << PG_swapcache, "swapcache" }, + {1UL << PG_mappedtodisk, "mappedtodisk" }, + {1UL << PG_reclaim, "reclaim" }, + {1UL << PG_buddy, "buddy" }, + {1UL << PG_swapbacked, "swapbacked" }, + {1UL << PG_unevictable, "unevictable" }, +#ifdef CONFIG_MMU + {1UL << PG_mlocked, "mlocked" }, +#endif +#ifdef CONFIG_ARCH_USES_PG_UNCACHED + {1UL << PG_uncached, "uncached" }, +#endif +#ifdef CONFIG_MEMORY_FAILURE + {1UL << PG_hwpoison, "hwpoison" }, +#endif + {-1UL, NULL }, +}; + +static void dump_page_flags(unsigned long flags) +{ + const char *delim = ""; + unsigned long mask; + int i; + + printk(KERN_ALERT "page flags: %#lx(", flags); + + /* remove zone id */ + flags &= (1UL << NR_PAGEFLAGS) - 1; + + for (i = 0; pageflag_names[i].name && flags; i++) { + + mask = pageflag_names[i].mask; + if ((flags & mask) != mask) + continue; + + flags &= ~mask; + printk("%s%s", delim, pageflag_names[i].name); + delim = "|"; + } + + /* check for left over flags */ + if (flags) + printk("%s%#lx", delim, flags); + + printk(")\n"); +} + +void dump_page(struct page *page) +{ + printk(KERN_ALERT + "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n", + page, page_count(page), page_mapcount(page), + page->mapping, page->index); + dump_page_flags(page->flags); +} -- cgit v1.2.3