From 83285c72e08c42848808039ef2d3b67a1bb88832 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Tue, 12 Nov 2013 15:07:19 -0800 Subject: mm: use pgdat_end_pfn() to simplify the code in others Use "pgdat_end_pfn()" instead of "pgdat->node_start_pfn + pgdat->node_spanned_pages". Simplify the code, no functional change. Signed-off-by: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index ed85fe3870e2..375a42d76b2c 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -365,8 +365,7 @@ out_fail: static void grow_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, unsigned long end_pfn) { - unsigned long old_pgdat_end_pfn = - pgdat->node_start_pfn + pgdat->node_spanned_pages; + unsigned long old_pgdat_end_pfn = pgdat_end_pfn(pgdat); if (!pgdat->node_spanned_pages || start_pfn < pgdat->node_start_pfn) pgdat->node_start_pfn = start_pfn; @@ -579,9 +578,9 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, static void shrink_pgdat_span(struct pglist_data *pgdat, unsigned long start_pfn, unsigned long end_pfn) { - unsigned long pgdat_start_pfn = pgdat->node_start_pfn; - unsigned long pgdat_end_pfn = - pgdat->node_start_pfn + pgdat->node_spanned_pages; + unsigned long pgdat_start_pfn = pgdat->node_start_pfn; + unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */ + unsigned long pgdat_end_pfn = p; unsigned long pfn; struct mem_section *ms; int nid = pgdat->node_id; -- cgit v1.2.3 From d6de9d5349db61e134ab7fb6b2436a4c7938714c Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Tue, 12 Nov 2013 15:07:20 -0800 Subject: mm/memory_hotplug.c: rename the function is_memblock_offlined_cb() A is_memblock_offlined() return or 1 means memory block is offlined, but is_memblock_offlined_cb() returning 1 means memory block is not offlined, this will confuse somebody, so rename the function. Signed-off-by: Xishi Qiu Acked-by: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 375a42d76b2c..133a4e132632 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1701,7 +1701,7 @@ int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, } #ifdef CONFIG_MEMORY_HOTREMOVE -static int is_memblock_offlined_cb(struct memory_block *mem, void *arg) +static int check_memblock_offlined_cb(struct memory_block *mem, void *arg) { int ret = !is_memblock_offlined(mem); @@ -1853,7 +1853,7 @@ void __ref remove_memory(int nid, u64 start, u64 size) * if this is not the case. */ ret = walk_memory_range(PFN_DOWN(start), PFN_UP(start + size - 1), NULL, - is_memblock_offlined_cb); + check_memblock_offlined_cb); if (ret) { unlock_memory_hotplug(); BUG(); -- cgit v1.2.3 From 9c2606b77d6bffb422928bca66c8dc84d85089be Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Tue, 12 Nov 2013 15:07:21 -0800 Subject: mm/memory_hotplug.c: use pfn_to_nid() instead of page_to_nid(pfn_to_page()) Use "pfn_to_nid(pfn)" instead of "page_to_nid(pfn_to_page(pfn))". Signed-off-by: Xishi Qiu Acked-by: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 133a4e132632..5118028468eb 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -934,7 +934,7 @@ int __ref online_pages(unsigned long pfn, unsigned long nr_pages, int online_typ arg.nr_pages = nr_pages; node_states_check_changes_online(nr_pages, zone, &arg); - nid = page_to_nid(pfn_to_page(pfn)); + nid = pfn_to_nid(pfn); ret = memory_notify(MEM_GOING_ONLINE, &arg); ret = notifier_to_errno(ret); -- cgit v1.2.3 From 01b0f19707c51ef247404e6af1d4a97a11ba34f7 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 12 Nov 2013 15:07:25 -0800 Subject: cpu/mem hotplug: add try_online_node() for cpu_up() cpu_up() has #ifdef CONFIG_MEMORY_HOTPLUG code blocks, which call mem_online_node() to put its node online if offlined and then call build_all_zonelists() to initialize the zone list. These steps are specific to memory hotplug, and should be managed in mm/memory_hotplug.c. lock_memory_hotplug() should also be held for the whole steps. For this reason, this patch replaces mem_online_node() with try_online_node(), which performs the whole steps with lock_memory_hotplug() held. try_online_node() is named after try_offline_node() as they have similar purpose. There is no functional change in this patch. Signed-off-by: Toshi Kani Reviewed-by: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5118028468eb..8285346be663 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1043,17 +1043,23 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat) } -/* +/** + * try_online_node - online a node if offlined + * * called by cpu_up() to online a node without onlined memory. */ -int mem_online_node(int nid) +int try_online_node(int nid) { pg_data_t *pgdat; int ret; + if (node_online(nid)) + return 0; + lock_memory_hotplug(); pgdat = hotadd_new_pgdat(nid, 0); if (!pgdat) { + pr_err("Cannot online node %d due to NULL pgdat\n", nid); ret = -ENOMEM; goto out; } @@ -1061,6 +1067,12 @@ int mem_online_node(int nid) ret = register_one_node(nid); BUG_ON(ret); + if (pgdat->node_zonelists->_zonerefs->zone == NULL) { + mutex_lock(&zonelists_mutex); + build_all_zonelists(NULL, NULL); + mutex_unlock(&zonelists_mutex); + } + out: unlock_memory_hotplug(); return ret; -- cgit v1.2.3 From 85b35feaecd4d2284505b22708795bc1f03fc897 Mon Sep 17 00:00:00 2001 From: Zhang Yanfei Date: Tue, 12 Nov 2013 15:07:42 -0800 Subject: mm/sparsemem: use PAGES_PER_SECTION to remove redundant nr_pages parameter For below functions, - sparse_add_one_section() - kmalloc_section_memmap() - __kmalloc_section_memmap() - __kfree_section_memmap() they are always invoked to operate on one memory section, so it is redundant to always pass a nr_pages parameter, which is the page numbers in one section. So we can directly use predefined macro PAGES_PER_SECTION instead of passing the parameter. Signed-off-by: Zhang Yanfei Cc: Wen Congyang Cc: Tang Chen Cc: Toshi Kani Cc: Yasuaki Ishimatsu Cc: Yinghai Lu Cc: Yasunori Goto Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 8285346be663..1b6fe8ca71e6 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -401,13 +401,12 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) static int __meminit __add_section(int nid, struct zone *zone, unsigned long phys_start_pfn) { - int nr_pages = PAGES_PER_SECTION; int ret; if (pfn_valid(phys_start_pfn)) return -EEXIST; - ret = sparse_add_one_section(zone, phys_start_pfn, nr_pages); + ret = sparse_add_one_section(zone, phys_start_pfn); if (ret < 0) return ret; -- cgit v1.2.3 From c5320926e370b4cfb8f10c2169e26f960079cf67 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Tue, 12 Nov 2013 15:08:10 -0800 Subject: mem-hotplug: introduce movable_node boot option The hot-Pluggable field in SRAT specifies which memory is hotpluggable. As we mentioned before, if hotpluggable memory is used by the kernel, it cannot be hot-removed. So memory hotplug users may want to set all hotpluggable memory in ZONE_MOVABLE so that the kernel won't use it. Memory hotplug users may also set a node as movable node, which has ZONE_MOVABLE only, so that the whole node can be hot-removed. But the kernel cannot use memory in ZONE_MOVABLE. By doing this, the kernel cannot use memory in movable nodes. This will cause NUMA performance down. And other users may be unhappy. So we need a way to allow users to enable and disable this functionality. In this patch, we introduce movable_node boot option to allow users to choose to not to consume hotpluggable memory at early boot time and later we can set it as ZONE_MOVABLE. To achieve this, the movable_node boot option will control the memblock allocation direction. That said, after memblock is ready, before SRAT is parsed, we should allocate memory near the kernel image as we explained in the previous patches. So if movable_node boot option is set, the kernel does the following: 1. After memblock is ready, make memblock allocate memory bottom up. 2. After SRAT is parsed, make memblock behave as default, allocate memory top down. Users can specify "movable_node" in kernel commandline to enable this functionality. For those who don't use memory hotplug or who don't want to lose their NUMA performance, just don't specify anything. The kernel will work as before. Signed-off-by: Tang Chen Signed-off-by: Zhang Yanfei Suggested-by: Kamezawa Hiroyuki Suggested-by: Ingo Molnar Acked-by: Tejun Heo Acked-by: Toshi Kani Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Wanpeng Li Cc: Thomas Renninger Cc: Yinghai Lu Cc: Jiang Liu Cc: Wen Congyang Cc: Lai Jiangshan Cc: Yasuaki Ishimatsu Cc: Taku Izumi Cc: Mel Gorman Cc: Michal Nazarewicz Cc: Minchan Kim Cc: Rik van Riel Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'mm/memory_hotplug.c') diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1b6fe8ca71e6..489f235502db 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -1422,6 +1423,36 @@ static bool can_offline_normal(struct zone *zone, unsigned long nr_pages) } #endif /* CONFIG_MOVABLE_NODE */ +static int __init cmdline_parse_movable_node(char *p) +{ +#ifdef CONFIG_MOVABLE_NODE + /* + * Memory used by the kernel cannot be hot-removed because Linux + * cannot migrate the kernel pages. When memory hotplug is + * enabled, we should prevent memblock from allocating memory + * for the kernel. + * + * ACPI SRAT records all hotpluggable memory ranges. But before + * SRAT is parsed, we don't know about it. + * + * The kernel image is loaded into memory at very early time. We + * cannot prevent this anyway. So on NUMA system, we set any + * node the kernel resides in as un-hotpluggable. + * + * Since on modern servers, one node could have double-digit + * gigabytes memory, we can assume the memory around the kernel + * image is also un-hotpluggable. So before SRAT is parsed, just + * allocate memory near the kernel image to try the best to keep + * the kernel away from hotpluggable memory. + */ + memblock_set_bottom_up(true); +#else + pr_warn("movable_node option not supported\n"); +#endif + return 0; +} +early_param("movable_node", cmdline_parse_movable_node); + /* check which state of node_states will be changed when offline memory */ static void node_states_check_changes_offline(unsigned long nr_pages, struct zone *zone, struct memory_notify *arg) -- cgit v1.2.3