diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 115 |
1 files changed, 80 insertions, 35 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index eee3efa58c91..83637dfba110 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -90,6 +90,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = { #ifdef CONFIG_HIGHMEM [N_HIGH_MEMORY] = { { [0] = 1UL } }, #endif +#ifdef CONFIG_MOVABLE_NODE + [N_MEMORY] = { { [0] = 1UL } }, +#endif [N_CPU] = { { [0] = 1UL } }, #endif /* NUMA */ }; @@ -732,6 +735,13 @@ static void __free_pages_ok(struct page *page, unsigned int order) local_irq_restore(flags); } +/* + * Read access to zone->managed_pages is safe because it's unsigned long, + * but we still need to serialize writers. Currently all callers of + * __free_pages_bootmem() except put_page_bootmem() should only be used + * at boot time. So for shorter boot time, we shift the burden to + * put_page_bootmem() to serialize writers. + */ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) { unsigned int nr_pages = 1 << order; @@ -747,6 +757,7 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order) set_page_count(p, 0); } + page_zone(page)->managed_pages += 1 << order; set_page_refcounted(page); __free_pages(page, order); } @@ -1695,7 +1706,7 @@ bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark, * * If the zonelist cache is present in the passed in zonelist, then * returns a pointer to the allowed node mask (either the current - * tasks mems_allowed, or node_states[N_HIGH_MEMORY].) + * tasks mems_allowed, or node_states[N_MEMORY].) * * If the zonelist cache is not available for this zonelist, does * nothing and returns NULL. @@ -1724,7 +1735,7 @@ static nodemask_t *zlc_setup(struct zonelist *zonelist, int alloc_flags) allowednodes = !in_interrupt() && (alloc_flags & ALLOC_CPUSET) ? &cpuset_current_mems_allowed : - &node_states[N_HIGH_MEMORY]; + &node_states[N_MEMORY]; return allowednodes; } @@ -2981,6 +2992,7 @@ void show_free_areas(unsigned int filter) " isolated(anon):%lukB" " isolated(file):%lukB" " present:%lukB" + " managed:%lukB" " mlocked:%lukB" " dirty:%lukB" " writeback:%lukB" @@ -3010,6 +3022,7 @@ void show_free_areas(unsigned int filter) K(zone_page_state(zone, NR_ISOLATED_ANON)), K(zone_page_state(zone, NR_ISOLATED_FILE)), K(zone->present_pages), + K(zone->managed_pages), K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_FILE_DIRTY)), K(zone_page_state(zone, NR_WRITEBACK)), @@ -3238,7 +3251,7 @@ static int find_next_best_node(int node, nodemask_t *used_node_mask) return node; } - for_each_node_state(n, N_HIGH_MEMORY) { + for_each_node_state(n, N_MEMORY) { /* Don't want a node to appear more than once */ if (node_isset(n, *used_node_mask)) @@ -3380,7 +3393,7 @@ static int default_zonelist_order(void) * local memory, NODE_ORDER may be suitable. */ average_size = total_size / - (nodes_weight(node_states[N_HIGH_MEMORY]) + 1); + (nodes_weight(node_states[N_MEMORY]) + 1); for_each_online_node(nid) { low_kmem_size = 0; total_size = 0; @@ -4476,6 +4489,26 @@ void __init set_pageblock_order(void) #endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ +static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages, + unsigned long present_pages) +{ + unsigned long pages = spanned_pages; + + /* + * Provide a more accurate estimation if there are holes within + * the zone and SPARSEMEM is in use. If there are holes within the + * zone, each populated memory region may cost us one or two extra + * memmap pages due to alignment because memmap pages for each + * populated regions may not naturally algined on page boundary. + * So the (present_pages >> 4) heuristic is a tradeoff for that. + */ + if (spanned_pages > present_pages + (present_pages >> 4) && + IS_ENABLED(CONFIG_SPARSEMEM)) + pages = present_pages; + + return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT; +} + /* * Set up the zone data structures: * - mark all pages reserved @@ -4499,48 +4532,56 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; - unsigned long size, realsize, memmap_pages; + unsigned long size, realsize, freesize, memmap_pages; size = zone_spanned_pages_in_node(nid, j, zones_size); - realsize = size - zone_absent_pages_in_node(nid, j, + realsize = freesize = size - zone_absent_pages_in_node(nid, j, zholes_size); /* - * Adjust realsize so that it accounts for how much memory + * Adjust freesize so that it accounts for how much memory * is used by this zone for memmap. This affects the watermark * and per-cpu initialisations */ - memmap_pages = - PAGE_ALIGN(size * sizeof(struct page)) >> PAGE_SHIFT; - if (realsize >= memmap_pages) { - realsize -= memmap_pages; + memmap_pages = calc_memmap_size(size, realsize); + if (freesize >= memmap_pages) { + freesize -= memmap_pages; if (memmap_pages) printk(KERN_DEBUG " %s zone: %lu pages used for memmap\n", zone_names[j], memmap_pages); } else printk(KERN_WARNING - " %s zone: %lu pages exceeds realsize %lu\n", - zone_names[j], memmap_pages, realsize); + " %s zone: %lu pages exceeds freesize %lu\n", + zone_names[j], memmap_pages, freesize); /* Account for reserved pages */ - if (j == 0 && realsize > dma_reserve) { - realsize -= dma_reserve; + if (j == 0 && freesize > dma_reserve) { + freesize -= dma_reserve; printk(KERN_DEBUG " %s zone: %lu pages reserved\n", zone_names[0], dma_reserve); } if (!is_highmem_idx(j)) - nr_kernel_pages += realsize; - nr_all_pages += realsize; + nr_kernel_pages += freesize; + /* Charge for highmem memmap if there are enough kernel pages */ + else if (nr_kernel_pages > memmap_pages * 2) + nr_kernel_pages -= memmap_pages; + nr_all_pages += freesize; zone->spanned_pages = size; - zone->present_pages = realsize; + zone->present_pages = freesize; + /* + * Set an approximate value for lowmem here, it will be adjusted + * when the bootmem allocator frees pages into the buddy system. + * And all highmem pages will be managed by the buddy system. + */ + zone->managed_pages = is_highmem_idx(j) ? realsize : freesize; #ifdef CONFIG_NUMA zone->node = nid; - zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio) + zone->min_unmapped_pages = (freesize*sysctl_min_unmapped_ratio) / 100; - zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100; + zone->min_slab_pages = (freesize * sysctl_min_slab_ratio) / 100; #endif zone->name = zone_names[j]; spin_lock_init(&zone->lock); @@ -4731,7 +4772,7 @@ unsigned long __init find_min_pfn_with_active_regions(void) /* * early_calculate_totalpages() * Sum pages in active regions for movable zone. - * Populate N_HIGH_MEMORY for calculating usable_nodes. + * Populate N_MEMORY for calculating usable_nodes. */ static unsigned long __init early_calculate_totalpages(void) { @@ -4744,7 +4785,7 @@ static unsigned long __init early_calculate_totalpages(void) totalpages += pages; if (pages) - node_set_state(nid, N_HIGH_MEMORY); + node_set_state(nid, N_MEMORY); } return totalpages; } @@ -4761,9 +4802,9 @@ static void __init find_zone_movable_pfns_for_nodes(void) unsigned long usable_startpfn; unsigned long kernelcore_node, kernelcore_remaining; /* save the state before borrow the nodemask */ - nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; + nodemask_t saved_node_state = node_states[N_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); - int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); + int usable_nodes = nodes_weight(node_states[N_MEMORY]); /* * If movablecore was specified, calculate what size of @@ -4798,7 +4839,7 @@ static void __init find_zone_movable_pfns_for_nodes(void) restart: /* Spread kernelcore memory as evenly as possible throughout nodes */ kernelcore_node = required_kernelcore / usable_nodes; - for_each_node_state(nid, N_HIGH_MEMORY) { + for_each_node_state(nid, N_MEMORY) { unsigned long start_pfn, end_pfn; /* @@ -4890,23 +4931,27 @@ restart: out: /* restore the node_state */ - node_states[N_HIGH_MEMORY] = saved_node_state; + node_states[N_MEMORY] = saved_node_state; } -/* Any regular memory on that node ? */ -static void __init check_for_regular_memory(pg_data_t *pgdat) +/* Any regular or high memory on that node ? */ +static void check_for_memory(pg_data_t *pgdat, int nid) { -#ifdef CONFIG_HIGHMEM enum zone_type zone_type; - for (zone_type = 0; zone_type <= ZONE_NORMAL; zone_type++) { + if (N_MEMORY == N_NORMAL_MEMORY) + return; + + for (zone_type = 0; zone_type <= ZONE_MOVABLE - 1; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (zone->present_pages) { - node_set_state(zone_to_nid(zone), N_NORMAL_MEMORY); + node_set_state(nid, N_HIGH_MEMORY); + if (N_NORMAL_MEMORY != N_HIGH_MEMORY && + zone_type <= ZONE_NORMAL) + node_set_state(nid, N_NORMAL_MEMORY); break; } } -#endif } /** @@ -4989,8 +5034,8 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) /* Any memory on that node */ if (pgdat->node_present_pages) - node_set_state(nid, N_HIGH_MEMORY); - check_for_regular_memory(pgdat); + node_set_state(nid, N_MEMORY); + check_for_memory(pgdat, nid); } } @@ -5727,7 +5772,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, unsigned int tries = 0; int ret = 0; - migrate_prep_local(); + migrate_prep(); while (pfn < end || !list_empty(&cc->migratepages)) { if (fatal_signal_pending(current)) { |