summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2009-08-14 15:00:52 +0900
committerTejun Heo <tj@kernel.org>2009-08-14 15:00:52 +0900
commit4518e6a0c038b98be4c480e6f4481e8676bd15dd (patch)
tree81b222a74951186aa85b5dfddae1fcae3b7e8ea9
parentc8826dd538602d730ed2c18c6753f1bbfa6c4933 (diff)
x86,percpu: use embedding for 64bit NUMA and page for 32bit NUMA
Embedding percpu first chunk allocator can now handle very sparse unit mapping. Use embedding allocator instead of lpage for 64bit NUMA. This removes extra TLB pressure and the need to do complex and fragile dancing when changing page attributes. For 32bit, using very sparse unit mapping isn't a good idea because the vmalloc space is very constrained. 32bit NUMA machines aren't exactly the focus of optimization and it isn't very clear whether lpage performs better than page. Use page first chunk allocator for 32bit NUMAs. As this leaves setup_pcpu_*() functions pretty much empty, fold them into setup_per_cpu_areas(). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Andi Kleen <andi@firstfloor.org>
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/kernel/setup_percpu.c155
2 files changed, 28 insertions, 131 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f7ac27215512..869d7d301448 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -156,10 +156,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
config NEED_PER_CPU_PAGE_FIRST_CHUNK
def_bool y
-config NEED_PER_CPU_LPAGE_FIRST_CHUNK
- def_bool y
- depends on NEED_MULTIPLE_NODES
-
config HAVE_CPUMASK_OF_CPU_MAP
def_bool X86_64_SMP
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 67f6314de9f1..d559af913e1f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
#define PERCPU_FIRST_CHUNK_RESERVE 0
#endif
+#ifdef CONFIG_X86_32
/**
* pcpu_need_numa - determine percpu allocation needs to consider NUMA
*
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void)
#endif
return false;
}
+#endif
/**
* pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
@@ -136,128 +138,23 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
free_bootmem(__pa(ptr), size);
}
-/*
- * Large page remapping allocator
- */
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-static void __init pcpul_map(void *ptr, size_t size, void *addr)
-{
- pmd_t *pmd, pmd_v;
-
- pmd = populate_extra_pmd((unsigned long)addr);
- pmd_v = pfn_pmd(page_to_pfn(virt_to_page(ptr)), PAGE_KERNEL_LARGE);
- set_pmd(pmd, pmd_v);
-}
-
-static int pcpu_lpage_cpu_distance(unsigned int from, unsigned int to)
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
{
+#ifdef CONFIG_NEED_MULTIPLE_NODES
if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;
-}
-
-static int __init setup_pcpu_lpage(bool chosen)
-{
- size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
- size_t dyn_size = reserve - PERCPU_FIRST_CHUNK_RESERVE;
- struct pcpu_alloc_info *ai;
- int rc;
-
- /* on non-NUMA, embedding is better */
- if (!chosen && !pcpu_need_numa())
- return -EINVAL;
-
- /* need PSE */
- if (!cpu_has_pse) {
- pr_warning("PERCPU: lpage allocator requires PSE\n");
- return -EINVAL;
- }
-
- /* allocate and build unit_map */
- ai = pcpu_build_alloc_info(PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
- PMD_SIZE, pcpu_lpage_cpu_distance);
- if (IS_ERR(ai)) {
- pr_warning("PERCPU: failed to build unit_map (%ld)\n",
- PTR_ERR(ai));
- return PTR_ERR(ai);
- }
-
- /* do the parameters look okay? */
- if (!chosen) {
- size_t vm_size = VMALLOC_END - VMALLOC_START;
- size_t tot_size = 0;
- int group;
-
- for (group = 0; group < ai->nr_groups; group++)
- tot_size += ai->unit_size * ai->groups[group].nr_units;
-
- /* don't consume more than 20% of vmalloc area */
- if (tot_size > vm_size / 5) {
- pr_info("PERCPU: too large chunk size %zuMB for "
- "large page remap\n", tot_size >> 20);
- rc = -EINVAL;
- goto out_free;
- }
- }
-
- rc = pcpu_lpage_first_chunk(ai, pcpu_fc_alloc, pcpu_fc_free, pcpul_map);
-out_free:
- pcpu_free_alloc_info(ai);
- return rc;
-}
#else
-static int __init setup_pcpu_lpage(bool chosen)
-{
- return -EINVAL;
-}
+ return LOCAL_DISTANCE;
#endif
-
-/*
- * Embedding allocator
- *
- * The first chunk is sized to just contain the static area plus
- * module and dynamic reserves and embedded into linear physical
- * mapping so that it can use PMD mapping without additional TLB
- * pressure.
- */
-static int __init setup_pcpu_embed(bool chosen)
-{
- size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
-
- /*
- * If large page isn't supported, there's no benefit in doing
- * this. Also, embedding allocation doesn't play well with
- * NUMA.
- */
- if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
- return -EINVAL;
-
- return pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- reserve - PERCPU_FIRST_CHUNK_RESERVE,
- PAGE_SIZE, NULL, pcpu_fc_alloc,
- pcpu_fc_free);
}
-/*
- * Page allocator
- *
- * Boring fallback 4k page allocator. This allocator puts more
- * pressure on PTE TLBs but other than that behaves nicely on both UMA
- * and NUMA.
- */
static void __init pcpup_populate_pte(unsigned long addr)
{
populate_extra_pte(addr);
}
-static int __init setup_pcpu_page(void)
-{
- return pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
- pcpu_fc_alloc, pcpu_fc_free,
- pcpup_populate_pte);
-}
-
static inline void setup_percpu_segment(int cpu)
{
#ifdef CONFIG_X86_32
@@ -281,30 +178,34 @@ void __init setup_per_cpu_areas(void)
NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
/*
- * Allocate percpu area. If PSE is supported, try to make use
- * of large page mappings. Please read comments on top of
- * each allocator for details.
+ * Allocate percpu area. Embedding allocator is our favorite;
+ * however, on NUMA configurations, it can result in very
+ * sparse unit mapping and vmalloc area isn't spacious enough
+ * on 32bit. Use page in that case.
*/
+#ifdef CONFIG_X86_32
+ if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
+ pcpu_chosen_fc = PCPU_FC_PAGE;
+#endif
rc = -EINVAL;
- if (pcpu_chosen_fc != PCPU_FC_AUTO) {
- if (pcpu_chosen_fc != PCPU_FC_PAGE) {
- if (pcpu_chosen_fc == PCPU_FC_LPAGE)
- rc = setup_pcpu_lpage(true);
- else
- rc = setup_pcpu_embed(true);
-
- if (rc < 0)
- pr_warning("PERCPU: %s allocator failed (%d), "
- "falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
- }
- } else {
- rc = setup_pcpu_lpage(false);
+ if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+ const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
+ const size_t dyn_size = PERCPU_MODULE_RESERVE +
+ PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
+
+ rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
+ dyn_size, atom_size,
+ pcpu_cpu_distance,
+ pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
- rc = setup_pcpu_embed(false);
+ pr_warning("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
- rc = setup_pcpu_page();
+ rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
+ pcpu_fc_alloc, pcpu_fc_free,
+ pcpup_populate_pte);
if (rc < 0)
panic("cannot initialize percpu area (err=%d)", rc);