From 802f192e4a600f7ef84ca25c8b818c8830acef5a Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Sat, 3 Sep 2005 15:54:26 -0700 Subject: [PATCH] SPARSEMEM EXTREME A new option for SPARSEMEM is ARCH_SPARSEMEM_EXTREME. Architecture platforms with a very sparse physical address space would likely want to select this option. For those architecture platforms that don't select the option, the code generated is equivalent to SPARSEMEM currently in -mm. I'll be posting a patch on ia64 ml which uses this new SPARSEMEM feature. ARCH_SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM -mm implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. ARCH_SPARSEMEM_EXTREME depends on 64BIT and is by default boolean false. I've boot tested under aim load ia64 configured for ARCH_SPARSEMEM_EXTREME. I've also boot tested a 4 way Opteron machine with !ARCH_SPARSEMEM_EXTREME and tested with aim. Signed-off-by: Andy Whitcroft Signed-off-by: Bob Picco Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index b54e304df4a7..b2b456bf0a5d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -13,7 +13,26 @@ * * 1) mem_section - memory sections, mem_map's for valid memory */ -struct mem_section mem_section[NR_MEM_SECTIONS]; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +struct mem_section *mem_section[NR_SECTION_ROOTS] + ____cacheline_maxaligned_in_smp; + +static void sparse_index_init(unsigned long section, int nid) +{ + unsigned long root = SECTION_TO_ROOT(section); + + if (mem_section[root]) + return; + mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); + if (mem_section[root]) + memset(mem_section[root], 0, PAGE_SIZE); + else + panic("memory_present: NO MEMORY\n"); +} +#else +struct mem_section mem_section[NR_MEM_SECTIONS] + ____cacheline_maxaligned_in_smp; +#endif EXPORT_SYMBOL(mem_section); /* Record a memory area against a node. */ @@ -24,8 +43,13 @@ void memory_present(int nid, unsigned long start, unsigned long end) start &= PAGE_SECTION_MASK; for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); - if (!mem_section[section].section_mem_map) - mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; + struct mem_section *ms; + + sparse_index_init(section, nid); + + ms = __nr_to_section(section); + if (!ms->section_mem_map) + ms->section_mem_map = SECTION_MARKED_PRESENT; } } @@ -85,6 +109,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) { struct page *map; int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + struct mem_section *ms = __nr_to_section(pnum); map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); if (map) @@ -96,7 +121,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) return map; printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); - mem_section[pnum].section_mem_map = 0; + ms->section_mem_map = 0; return NULL; } @@ -114,8 +139,9 @@ void sparse_init(void) continue; map = sparse_early_mem_map_alloc(pnum); - if (map) - sparse_init_one_section(&mem_section[pnum], pnum, map); + if (!map) + continue; + sparse_init_one_section(__nr_to_section(pnum), pnum, map); } } -- cgit v1.2.3 From 3e347261a80b57df792ab9464b5f0ed59add53a8 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Sat, 3 Sep 2005 15:54:28 -0700 Subject: [PATCH] sparsemem extreme implementation With cleanups from Dave Hansen SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. SPARSEMEM_EXTREME requires bootmem to be functioning at the time of memory_present() calls. This is not always feasible, so architectures which do not need it may allocate everything statically by using SPARSEMEM_STATIC. Signed-off-by: Andy Whitcroft Signed-off-by: Bob Picco Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index b2b456bf0a5d..fa01292157a9 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -13,28 +13,36 @@ * * 1) mem_section - memory sections, mem_map's for valid memory */ -#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +#ifdef CONFIG_SPARSEMEM_EXTREME struct mem_section *mem_section[NR_SECTION_ROOTS] ____cacheline_maxaligned_in_smp; +#else +struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] + ____cacheline_maxaligned_in_smp; +#endif +EXPORT_SYMBOL(mem_section); + +static void sparse_alloc_root(unsigned long root, int nid) +{ +#ifdef CONFIG_SPARSEMEM_EXTREME + mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); +#endif +} static void sparse_index_init(unsigned long section, int nid) { - unsigned long root = SECTION_TO_ROOT(section); + unsigned long root = SECTION_NR_TO_ROOT(section); if (mem_section[root]) return; - mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); + + sparse_alloc_root(root, nid); + if (mem_section[root]) memset(mem_section[root], 0, PAGE_SIZE); else panic("memory_present: NO MEMORY\n"); } -#else -struct mem_section mem_section[NR_MEM_SECTIONS] - ____cacheline_maxaligned_in_smp; -#endif -EXPORT_SYMBOL(mem_section); - /* Record a memory area against a node. */ void memory_present(int nid, unsigned long start, unsigned long end) { -- cgit v1.2.3 From 28ae55c98e4d16eac9a05a8a259d7763ef3aeb18 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 3 Sep 2005 15:54:29 -0700 Subject: [PATCH] sparsemem extreme: hotplug preparation This splits up sparse_index_alloc() into two pieces. This is needed because we'll allocate the memory for the second level in a different place from where we actually consume it to keep the allocation from happening underneath a lock Signed-off-by: Dave Hansen Signed-off-by: Bob Picco Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 53 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index fa01292157a9..347249a4917a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -6,6 +6,7 @@ #include #include #include +#include #include /* @@ -22,27 +23,55 @@ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] #endif EXPORT_SYMBOL(mem_section); -static void sparse_alloc_root(unsigned long root, int nid) -{ #ifdef CONFIG_SPARSEMEM_EXTREME - mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); -#endif +static struct mem_section *sparse_index_alloc(int nid) +{ + struct mem_section *section = NULL; + unsigned long array_size = SECTIONS_PER_ROOT * + sizeof(struct mem_section); + + section = alloc_bootmem_node(NODE_DATA(nid), array_size); + + if (section) + memset(section, 0, array_size); + + return section; } -static void sparse_index_init(unsigned long section, int nid) +static int sparse_index_init(unsigned long section_nr, int nid) { - unsigned long root = SECTION_NR_TO_ROOT(section); + static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; + unsigned long root = SECTION_NR_TO_ROOT(section_nr); + struct mem_section *section; + int ret = 0; if (mem_section[root]) - return; + return -EEXIST; - sparse_alloc_root(root, nid); + section = sparse_index_alloc(nid); + /* + * This lock keeps two different sections from + * reallocating for the same index + */ + spin_lock(&index_init_lock); - if (mem_section[root]) - memset(mem_section[root], 0, PAGE_SIZE); - else - panic("memory_present: NO MEMORY\n"); + if (mem_section[root]) { + ret = -EEXIST; + goto out; + } + + mem_section[root] = section; +out: + spin_unlock(&index_init_lock); + return ret; } +#else /* !SPARSEMEM_EXTREME */ +static inline int sparse_index_init(unsigned long section_nr, int nid) +{ + return 0; +} +#endif + /* Record a memory area against a node. */ void memory_present(int nid, unsigned long start, unsigned long end) { -- cgit v1.2.3