Revert "mm: page allocator: adjust the per-cpu counter threshold when memory is low"

Otherwise playing fullHD videos on HDMI crashes with: [<800303cc>] (dump_backtrace+0x0/0x10c) from [<803d4104>] (dump_stack+0x18/0x1c) r7:8053a048 r6:8053a048 r5:0000000c r4:00000001 [<803d40ec>] (dump_stack+0x0/0x1c) from [<800891d4>] (__alloc_pages_nodemask+0x524/0x57c) [<80088cb0>] (__alloc_pages_nodemask+0x0/0x57c) from [<80031a4c>] (__dma_alloc+0xf4/0x2b8) [<80031958>] (__dma_alloc+0x0/0x2b8) from [<80031c38>] (dma_alloc_writecombine+0x28/0x34) [<80031c10>] (dma_alloc_writecombine+0x0/0x34) from [<801d51e0>] (mxcfb_set_par+0xd8/0x464) .... This reverts commit 7c2141d484fbfa03af5f83602162d9576564121b. Signed-off-by: Javier Viguera <javier.viguera@digi.com>
author: Javier Viguera <javier.viguera@digi.com> 2012-01-25 19:45:32 +0100
committer: Javier Viguera <javier.viguera@digi.com> 2012-01-25 19:45:32 +0100
commit: 92f4a7fca23e9bb686fa93c49819a661fd9577fc (patch)
tree: 157c8b0432b992b9749e0ec53ca60ad6bcdfbd51 /mm
parent: fd98745dd8f0866c5597ae731a5a69fe0b4d2515 (diff)
4 files changed, 41 insertions, 108 deletions
diff --git a/mm/mmzone.c b/mm/mmzone.c
index f5b7d1760213..e35bfb82c855 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn,
 	return 1;
 }
 #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
+
+#ifdef CONFIG_SMP
+/* Called when a more accurate view of NR_FREE_PAGES is needed */
+unsigned long zone_nr_free_pages(struct zone *zone)
+{
+	unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES);
+
+	/*
+	 * While kswapd is awake, it is considered the zone is under some
+	 * memory pressure. Under pressure, there is a risk that
+	 * per-cpu-counter-drift will allow the min watermark to be breached
+	 * potentially causing a live-lock. While kswapd is awake and
+	 * free pages are low, get a better estimate for free pages
+	 */
+	if (nr_free_pages < zone->percpu_drift_mark &&
+			!waitqueue_active(&zone->zone_pgdat->kswapd_wait))
+		return zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+	return nr_free_pages;
+}
+#endif /* CONFIG_SMP */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 98c699d9136f..61bd682fdcc1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1459,24 +1459,24 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
 #endif /* CONFIG_FAIL_PAGE_ALLOC */
 
 /*
- * Return true if free pages are above 'mark'. This takes into account the order
+ * Return 1 if free pages are above 'mark'. This takes into account the order
  * of the allocation.
  */
-static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int alloc_flags, long free_pages)
+int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+		      int classzone_idx, int alloc_flags)
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
+	long free_pages = zone_nr_free_pages(z) - (1 << order) + 1;
 	int o;
 
-	free_pages -= (1 << order) + 1;
 	if (alloc_flags & ALLOC_HIGH)
 		min -= min / 2;
 	if (alloc_flags & ALLOC_HARDER)
 		min -= min / 4;
 
 	if (free_pages <= min + z->lowmem_reserve[classzone_idx])
-		return false;
+		return 0;
 	for (o = 0; o < order; o++) {
 		/* At the next order, this order's pages become unavailable */
 		free_pages -= z->free_area[o].nr_free << o;
@@ -1485,28 +1485,9 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		min >>= 1;
 
 		if (free_pages <= min)
-			return false;
+			return 0;
 	}
-	return true;
-}
-
-bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int alloc_flags)
-{
-	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
-					zone_page_state(z, NR_FREE_PAGES));
-}
-
-bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int alloc_flags)
-{
-	long free_pages = zone_page_state(z, NR_FREE_PAGES);
-
-	if (z->percpu_drift_mark && free_pages < z->percpu_drift_mark)
-		free_pages = zone_page_state_snapshot(z, NR_FREE_PAGES);
-
-	return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
-								free_pages);
+	return 1;
 }
 
 #ifdef CONFIG_NUMA
@@ -2449,7 +2430,7 @@ void show_free_areas(void)
 			" all_unreclaimable? %s"
 			"\n",
 			zone->name,
-			K(zone_page_state(zone, NR_FREE_PAGES)),
+			K(zone_nr_free_pages(zone)),
 			K(min_wmark_pages(zone)),
 			K(low_wmark_pages(zone)),
 			K(high_wmark_pages(zone)),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 22e56769e006..975362619e19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2007,7 +2007,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 		if (zone->all_unreclaimable)
 			continue;
 
-		if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
+		if (!zone_watermark_ok(zone, order, high_wmark_pages(zone),
 								0, 0))
 			return 1;
 	}
@@ -2104,7 +2104,7 @@ loop_again:
 				shrink_active_list(SWAP_CLUSTER_MAX, zone,
 							&sc, priority, 0);
 
-			if (!zone_watermark_ok_safe(zone, order,
+			if (!zone_watermark_ok(zone, order,
 					high_wmark_pages(zone), 0, 0)) {
 				end_zone = i;
 				break;
@@ -2155,7 +2155,7 @@ loop_again:
 			 * We put equal pressure on every zone, unless one
 			 * zone has way too many pages free already.
 			 */
-			if (!zone_watermark_ok_safe(zone, order,
+			if (!zone_watermark_ok(zone, order,
 					8*high_wmark_pages(zone), end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
@@ -2176,7 +2176,7 @@ loop_again:
 			    total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
 				sc.may_writepage = 1;
 
-			if (!zone_watermark_ok_safe(zone, order,
+			if (!zone_watermark_ok(zone, order,
 					high_wmark_pages(zone), end_zone, 0)) {
 				all_zones_ok = 0;
 				/*
@@ -2184,7 +2184,7 @@ loop_again:
 				 * means that we have a GFP_ATOMIC allocation
 				 * failure risk. Hurry up!
 				 */
-				if (!zone_watermark_ok_safe(zone, order,
+				if (!zone_watermark_ok(zone, order,
 					    min_wmark_pages(zone), end_zone, 0))
 					has_under_min_watermark_zone = 1;
 			}
@@ -2326,11 +2326,9 @@ static int kswapd(void *p)
 				 * premature sleep. If not, then go fully
 				 * to sleep until explicitly woken up
 				 */
-				if (!sleeping_prematurely(pgdat, order, remaining)) {
-					restore_pgdat_percpu_threshold(pgdat);
+				if (!sleeping_prematurely(pgdat, order, remaining))
 					schedule();
-					reduce_pgdat_percpu_threshold(pgdat);
-				} else {
+				else {
 					if (remaining)
 						count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
 					else
@@ -2366,16 +2364,15 @@ void wakeup_kswapd(struct zone *zone, int order)
 	if (!populated_zone(zone))
 		return;
 
-	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
-		return;
 	pgdat = zone->zone_pgdat;
+	if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
+		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
-	if (!waitqueue_active(&pgdat->kswapd_wait))
+	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
 		return;
-	if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+	if (!waitqueue_active(&pgdat->kswapd_wait))
 		return;
-
 	wake_up_interruptible(&pgdat->kswapd_wait);
 }
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 41dc8cd96a68..26d5716b1fce 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -81,30 +81,6 @@ EXPORT_SYMBOL(vm_stat);
 
 #ifdef CONFIG_SMP
 
-static int calculate_pressure_threshold(struct zone *zone)
-{
-	int threshold;
-	int watermark_distance;
-
-	/*
-	 * As vmstats are not up to date, there is drift between the estimated
-	 * and real values. For high thresholds and a high number of CPUs, it
-	 * is possible for the min watermark to be breached while the estimated
-	 * value looks fine. The pressure threshold is a reduced value such
-	 * that even the maximum amount of drift will not accidentally breach
-	 * the min watermark
-	 */
-	watermark_distance = low_wmark_pages(zone) - min_wmark_pages(zone);
-	threshold = max(1, (int)(watermark_distance / num_online_cpus()));
-
-	/*
-	 * Maximum threshold is 125
-	 */
-	threshold = min(125, threshold);
-
-	return threshold;
-}
-
 static int calculate_threshold(struct zone *zone)
 {
 	int threshold;
@@ -183,48 +159,6 @@ static void refresh_zone_stat_thresholds(void)
 	}
 }
 
-void reduce_pgdat_percpu_threshold(pg_data_t *pgdat)
-{
-	struct zone *zone;
-	int cpu;
-	int threshold;
-	int i;
-
-	get_online_cpus();
-	for (i = 0; i < pgdat->nr_zones; i++) {
-		zone = &pgdat->node_zones[i];
-		if (!zone->percpu_drift_mark)
-			continue;
-
-		threshold = calculate_pressure_threshold(zone);
-		for_each_online_cpu(cpu)
-			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
-							= threshold;
-	}
-	put_online_cpus();
-}
-
-void restore_pgdat_percpu_threshold(pg_data_t *pgdat)
-{
-	struct zone *zone;
-	int cpu;
-	int threshold;
-	int i;
-
-	get_online_cpus();
-	for (i = 0; i < pgdat->nr_zones; i++) {
-		zone = &pgdat->node_zones[i];
-		if (!zone->percpu_drift_mark)
-			continue;
-
-		threshold = calculate_threshold(zone);
-		for_each_online_cpu(cpu)
-			per_cpu_ptr(zone->pageset, cpu)->stat_threshold
-							= threshold;
-	}
-	put_online_cpus();
-}
-
 /*
  * For use when we know that interrupts are disabled.
  */
@@ -892,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
 		   "\n        scanned  %lu"
 		   "\n        spanned  %lu"
 		   "\n        present  %lu",
-		   zone_page_state(zone, NR_FREE_PAGES),
+		   zone_nr_free_pages(zone),
 		   min_wmark_pages(zone),
 		   low_wmark_pages(zone),
 		   high_wmark_pages(zone),
author	Javier Viguera <javier.viguera@digi.com>	2012-01-25 19:45:32 +0100
committer	Javier Viguera <javier.viguera@digi.com>	2012-01-25 19:45:32 +0100
commit	92f4a7fca23e9bb686fa93c49819a661fd9577fc (patch)
tree	157c8b0432b992b9749e0ec53ca60ad6bcdfbd51 /mm
parent	fd98745dd8f0866c5597ae731a5a69fe0b4d2515 (diff)