From e74e396204bfcb67570ba4517b08f5918e69afea Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 30 Mar 2009 19:07:44 +0900 Subject: percpu: use dynamic percpu allocator as the default percpu allocator This patch makes most !CONFIG_HAVE_SETUP_PER_CPU_AREA archs use dynamic percpu allocator. The first chunk is allocated using embedding helper and 8k is reserved for modules. This ensures that the new allocator behaves almost identically to the original allocator as long as static percpu variables are concerned, so it shouldn't introduce much breakage. s390 and alpha use custom SHIFT_PERCPU_PTR() to work around addressing range limit the addressing model imposes. Unfortunately, this breaks if the address is specified using a variable, so for now, the two archs aren't converted. The following architectures are affected by this change. * sh * arm * cris * mips * sparc(32) * blackfin * avr32 * parisc (broken, under investigation) * m32r * powerpc(32) As this change makes the dynamic allocator the default one, CONFIG_HAVE_DYNAMIC_PER_CPU_AREA is replaced with its invert - CONFIG_HAVE_LEGACY_PER_CPU_AREA, which is added to yet-to-be converted archs. These archs implement their own setup_per_cpu_areas() and the conversion is not trivial. * powerpc(64) * sparc(64) * ia64 * alpha * s390 Boot and batch alloc/free tests on x86_32 with debug code (x86_32 doesn't use default first chunk initialization). Compile tested on sparc(32), powerpc(32), arm and alpha. Kyle McMartin reported that this change breaks parisc. The problem is still under investigation and he is okay with pushing this patch forward and fixing parisc later. [ Impact: use dynamic allocator for most archs w/o custom percpu setup ] Signed-off-by: Tejun Heo Acked-by: Rusty Russell Acked-by: David S. Miller Acked-by: Benjamin Herrenschmidt Acked-by: Martin Schwidefsky Reviewed-by: Christoph Lameter Cc: Paul Mundt Cc: Russell King Cc: Mikael Starvik Cc: Ralf Baechle Cc: Bryan Wu Cc: Kyle McMartin Cc: Matthew Wilcox Cc: Grant Grundler Cc: Hirokazu Takata Cc: Richard Henderson Cc: Ivan Kokshaysky Cc: Heiko Carstens Cc: Ingo Molnar --- kernel/module.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 38928fcaff2b..f5934954fa99 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -364,7 +364,7 @@ EXPORT_SYMBOL_GPL(find_module); #ifdef CONFIG_SMP -#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA +#ifndef CONFIG_HAVE_LEGACY_PER_CPU_AREA static void *percpu_modalloc(unsigned long size, unsigned long align, const char *name) @@ -389,7 +389,7 @@ static void percpu_modfree(void *freeme) free_percpu(freeme); } -#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#else /* ... CONFIG_HAVE_LEGACY_PER_CPU_AREA */ /* Number of blocks used and allocated. */ static unsigned int pcpu_num_used, pcpu_num_allocated; @@ -535,7 +535,7 @@ static int percpu_modinit(void) } __initcall(percpu_modinit); -#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ +#endif /* CONFIG_HAVE_LEGACY_PER_CPU_AREA */ static unsigned int find_pcpusec(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, -- cgit v1.2.3 From fc5377668c3d808e1d53c4aee152c836f55c3490 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Sep 2009 19:35:28 +0200 Subject: tracing: Remove markers Now that the last users of markers have migrated to the event tracer we can kill off the (now orphan) support code. Signed-off-by: Christoph Hellwig Acked-by: Mathieu Desnoyers Cc: Steven Rostedt Cc: Frederic Weisbecker LKML-Reference: <20090917173527.GA1699@lst.de> Signed-off-by: Ingo Molnar --- kernel/module.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 05ce49ced8f6..b6ee424245dd 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2237,10 +2237,6 @@ static noinline struct module *load_module(void __user *umod, sizeof(*mod->ctors), &mod->num_ctors); #endif -#ifdef CONFIG_MARKERS - mod->markers = section_objs(hdr, sechdrs, secstrings, "__markers", - sizeof(*mod->markers), &mod->num_markers); -#endif #ifdef CONFIG_TRACEPOINTS mod->tracepoints = section_objs(hdr, sechdrs, secstrings, "__tracepoints", @@ -2958,20 +2954,6 @@ void module_layout(struct module *mod, EXPORT_SYMBOL(module_layout); #endif -#ifdef CONFIG_MARKERS -void module_update_markers(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers); - mutex_unlock(&module_mutex); -} -#endif - #ifdef CONFIG_TRACEPOINTS void module_update_tracepoints(void) { -- cgit v1.2.3 From eb8cdec4a984fde123a91250dcc9e0bddf5eafdc Mon Sep 17 00:00:00 2001 From: Bernd Schmidt Date: Mon, 21 Sep 2009 17:03:57 -0700 Subject: nommu: add support for Memory Protection Units (MPU) Some architectures (like the Blackfin arch) implement some of the "simpler" features that one would expect out of a MMU such as memory protection. In our case, we actually get read/write/exec protection down to the page boundary so processes can't stomp on each other let alone the kernel. There is a performance decrease (which depends greatly on the workload) however as the hardware/software interaction was not optimized at design time. Signed-off-by: Bernd Schmidt Signed-off-by: Bryan Wu Signed-off-by: Mike Frysinger Acked-by: David Howells Acked-by: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/module.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index b6ee424245dd..e6bc4b28aa62 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1535,6 +1536,10 @@ static void free_module(struct module *mod) /* Finally, free the core (containing the module structure) */ module_free(mod, mod->module_core); + +#ifdef CONFIG_MPU + update_protections(current->mm); +#endif } void *__symbol_get(const char *symbol) -- cgit v1.2.3 From 4a4962263f07d14660849ec134ee42b63e95ea9a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:50:42 +0100 Subject: module: reduce symbol table for loaded modules (v2) Discard all symbols not interesting for kallsyms use: absolute, section, and in the common case (!KALLSYMS_ALL) data ones. Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- kernel/module.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 87 insertions(+), 4 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index e6bc4b28aa62..97f4d5e15535 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1862,13 +1862,68 @@ static char elf_type(const Elf_Sym *sym, return '?'; } +static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, + unsigned int shnum) +{ + const Elf_Shdr *sec; + + if (src->st_shndx == SHN_UNDEF + || src->st_shndx >= shnum + || !src->st_name) + return false; + + sec = sechdrs + src->st_shndx; + if (!(sec->sh_flags & SHF_ALLOC) +#ifndef CONFIG_KALLSYMS_ALL + || !(sec->sh_flags & SHF_EXECINSTR) +#endif + || (sec->sh_entsize & INIT_OFFSET_MASK)) + return false; + + return true; +} + +static unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Ehdr *hdr, + const char *secstrings) +{ + unsigned long symoffs; + Elf_Shdr *symsect = sechdrs + symindex; + const Elf_Sym *src; + unsigned int i, nsrc, ndst; + + /* Put symbol section at end of init part of module. */ + symsect->sh_flags |= SHF_ALLOC; + symsect->sh_entsize = get_offset(mod, &mod->init_size, symsect, + symindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + symsect->sh_name); + + src = (void *)hdr + symsect->sh_offset; + nsrc = symsect->sh_size / sizeof(*src); + for (ndst = i = 1; i < nsrc; ++i, ++src) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + ++ndst; + + /* Append room for core symbols at end of core part. */ + symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); + mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + + return symoffs; +} + static void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { - unsigned int i; + unsigned int i, ndst; + const Elf_Sym *src; + Elf_Sym *dst; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1878,12 +1933,32 @@ static void add_kallsyms(struct module *mod, for (i = 0; i < mod->num_symtab; i++) mod->symtab[i].st_info = elf_type(&mod->symtab[i], sechdrs, secstrings, mod); + + mod->core_symtab = dst = mod->module_core + symoffs; + src = mod->symtab; + *dst = *src; + for (ndst = i = 1; i < mod->num_symtab; ++i, ++src) { + if (!is_core_symbol(src, sechdrs, shnum)) + continue; + dst[ndst] = *src; + ++ndst; + } + mod->core_num_syms = ndst; } #else +static inline unsigned long layout_symtab(struct module *mod, + Elf_Shdr *sechdrs, + unsigned int symindex, + const Elf_Hdr *hdr, + const char *secstrings) +{ +} static inline void add_kallsyms(struct module *mod, Elf_Shdr *sechdrs, + unsigned int shnum, unsigned int symindex, unsigned int strindex, + unsigned long symoffs, const char *secstrings) { } @@ -1959,6 +2034,9 @@ static noinline struct module *load_module(void __user *umod, struct module *mod; long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ +#ifdef CONFIG_KALLSYMS + unsigned long symoffs; +#endif mm_segment_t old_fs; DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n", @@ -2041,8 +2119,7 @@ static noinline struct module *load_module(void __user *umod, sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; #ifdef CONFIG_KALLSYMS - /* Keep symbol and string tables for decoding later. */ - sechdrs[symindex].sh_flags |= SHF_ALLOC; + /* Keep string table for decoding later. */ sechdrs[strindex].sh_flags |= SHF_ALLOC; #endif @@ -2109,6 +2186,7 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2313,7 +2391,8 @@ static noinline struct module *load_module(void __user *umod, percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr, sechdrs[pcpuindex].sh_size); - add_kallsyms(mod, sechdrs, symindex, strindex, secstrings); + add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, + symoffs, secstrings); if (!mod->taints) { struct _ddebug *debug; @@ -2491,6 +2570,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, /* Drop initial reference. */ module_put(mod); trim_init_extable(mod); +#ifdef CONFIG_KALLSYMS + mod->num_symtab = mod->core_num_syms; + mod->symtab = mod->core_symtab; +#endif module_free(mod, mod->module_init); mod->module_init = NULL; mod->init_size = 0; -- cgit v1.2.3 From 554bdfe5acf3715e87c8d5e25a4f9a896ac9f014 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 6 Jul 2009 14:51:44 +0100 Subject: module: reduce string table for loaded modules (v2) Also remove all parts of the string table (referenced by the symbol table) that are not needed for kallsyms use (i.e. which were only referenced by symbols discarded by the previous patch, or not referenced at all for whatever reason). Signed-off-by: Jan Beulich Signed-off-by: Rusty Russell --- kernel/module.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 56 insertions(+), 12 deletions(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 97f4d5e15535..39827c3d9484 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1886,12 +1886,17 @@ static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs, static unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Ehdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { unsigned long symoffs; Elf_Shdr *symsect = sechdrs + symindex; + Elf_Shdr *strsect = sechdrs + strindex; const Elf_Sym *src; + const char *strtab; unsigned int i, nsrc, ndst; /* Put symbol section at end of init part of module. */ @@ -1902,14 +1907,31 @@ static unsigned long layout_symtab(struct module *mod, src = (void *)hdr + symsect->sh_offset; nsrc = symsect->sh_size / sizeof(*src); + strtab = (void *)hdr + strsect->sh_offset; for (ndst = i = 1; i < nsrc; ++i, ++src) - if (is_core_symbol(src, sechdrs, hdr->e_shnum)) + if (is_core_symbol(src, sechdrs, hdr->e_shnum)) { + unsigned int j = src->st_name; + + while(!__test_and_set_bit(j, strmap) && strtab[j]) + ++j; ++ndst; + } /* Append room for core symbols at end of core part. */ symoffs = ALIGN(mod->core_size, symsect->sh_addralign ?: 1); mod->core_size = symoffs + ndst * sizeof(Elf_Sym); + /* Put string table section at end of init part of module. */ + strsect->sh_flags |= SHF_ALLOC; + strsect->sh_entsize = get_offset(mod, &mod->init_size, strsect, + strindex) | INIT_OFFSET_MASK; + DEBUGP("\t%s\n", secstrings + strsect->sh_name); + + /* Append room for core symbols' strings at end of core part. */ + *pstroffs = mod->core_size; + __set_bit(0, strmap); + mod->core_size += bitmap_weight(strmap, strsect->sh_size); + return symoffs; } @@ -1919,11 +1941,14 @@ static void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + unsigned long *strmap) { unsigned int i, ndst; const Elf_Sym *src; Elf_Sym *dst; + char *s; mod->symtab = (void *)sechdrs[symindex].sh_addr; mod->num_symtab = sechdrs[symindex].sh_size / sizeof(Elf_Sym); @@ -1941,16 +1966,25 @@ static void add_kallsyms(struct module *mod, if (!is_core_symbol(src, sechdrs, shnum)) continue; dst[ndst] = *src; + dst[ndst].st_name = bitmap_weight(strmap, dst[ndst].st_name); ++ndst; } mod->core_num_syms = ndst; + + mod->core_strtab = s = mod->module_core + stroffs; + for (*s = 0, i = 1; i < sechdrs[strindex].sh_size; ++i) + if (test_bit(i, strmap)) + *++s = mod->strtab[i]; } #else static inline unsigned long layout_symtab(struct module *mod, Elf_Shdr *sechdrs, unsigned int symindex, + unsigned int strindex, const Elf_Hdr *hdr, - const char *secstrings) + const char *secstrings, + unsigned long *pstroffs, + unsigned long *strmap) { } static inline void add_kallsyms(struct module *mod, @@ -1959,7 +1993,9 @@ static inline void add_kallsyms(struct module *mod, unsigned int symindex, unsigned int strindex, unsigned long symoffs, - const char *secstrings) + unsigned long stroffs, + const char *secstrings, + const unsigned long *strmap) { } #endif /* CONFIG_KALLSYMS */ @@ -2035,7 +2071,7 @@ static noinline struct module *load_module(void __user *umod, long err = 0; void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */ #ifdef CONFIG_KALLSYMS - unsigned long symoffs; + unsigned long symoffs, stroffs, *strmap; #endif mm_segment_t old_fs; @@ -2118,10 +2154,6 @@ static noinline struct module *load_module(void __user *umod, /* Don't keep modinfo and version sections. */ sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC; sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC; -#ifdef CONFIG_KALLSYMS - /* Keep string table for decoding later. */ - sechdrs[strindex].sh_flags |= SHF_ALLOC; -#endif /* Check module struct version now, before we try to use module. */ if (!check_modstruct_version(sechdrs, versindex, mod)) { @@ -2157,6 +2189,13 @@ static noinline struct module *load_module(void __user *umod, goto free_hdr; } + strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size) + * sizeof(long), GFP_KERNEL); + if (!strmap) { + err = -ENOMEM; + goto free_mod; + } + if (find_module(mod->name)) { err = -EEXIST; goto free_mod; @@ -2186,7 +2225,8 @@ static noinline struct module *load_module(void __user *umod, this is done generically; there doesn't appear to be any special cases for the architectures. */ layout_sections(mod, hdr, sechdrs, secstrings); - symoffs = layout_symtab(mod, sechdrs, symindex, hdr, secstrings); + symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr, + secstrings, &stroffs, strmap); /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); @@ -2392,7 +2432,9 @@ static noinline struct module *load_module(void __user *umod, sechdrs[pcpuindex].sh_size); add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex, - symoffs, secstrings); + symoffs, stroffs, secstrings, strmap); + kfree(strmap); + strmap = NULL; if (!mod->taints) { struct _ddebug *debug; @@ -2481,6 +2523,7 @@ static noinline struct module *load_module(void __user *umod, percpu_modfree(percpu); free_mod: kfree(args); + kfree(strmap); free_hdr: vfree(hdr); return ERR_PTR(err); @@ -2573,6 +2616,7 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, #ifdef CONFIG_KALLSYMS mod->num_symtab = mod->core_num_syms; mod->symtab = mod->core_symtab; + mod->strtab = mod->core_strtab; #endif module_free(mod, mod->module_init); mod->module_init = NULL; -- cgit v1.2.3 From a263f7763c364015f92e7c097fa46c6673f6fcb0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Sep 2009 00:32:58 -0600 Subject: module: fix memory leak when load fails after srcversion/version allocated Normally the twisty paths of sysfs will free the attributes, but not if we fail before we hook it into sysfs (which is the last thing we do in load_module). (This sysfs code is a turd, no doubt there are other issues lurking too). Reported-by: Tetsuo Handa Signed-off-by: Rusty Russell Cc: Catalin Marinas Tested-by: Tetsuo Handa --- kernel/module.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index 39827c3d9484..c54f10d90e1c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1797,6 +1797,17 @@ static void setup_modinfo(struct module *mod, Elf_Shdr *sechdrs, } } +static void free_modinfo(struct module *mod) +{ + struct module_attribute *attr; + int i; + + for (i = 0; (attr = modinfo_attrs[i]); i++) { + if (attr->free) + attr->free(mod); + } +} + #ifdef CONFIG_KALLSYMS /* lookup symbol in given range of kernel_symbols */ @@ -2506,6 +2517,7 @@ static noinline struct module *load_module(void __user *umod, synchronize_sched(); module_arch_cleanup(mod); cleanup: + free_modinfo(mod); kobject_del(&mod->mkobj.kobj); kobject_put(&mod->mkobj.kobj); free_unload: -- cgit v1.2.3 From ffa9f12a41ec117207e8d953f90b9c179546c8d7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 25 Sep 2009 00:32:59 -0600 Subject: module: don't call percpu_modfree on NULL pointer. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The general one handles NULL, the static obsolescent (CONFIG_HAVE_LEGACY_PER_CPU_AREA) one in module.c doesn't; Eric's commit 720eba31 assumed it did, and various frobbings since then kept that assumption. All other callers in module.c all protect it with an if; this effectively does the same as free_init is only goto if we fail percpu_modalloc(). Reported-by: Kamalesh Babulal Signed-off-by: Rusty Russell Cc: Eric Dumazet Cc: Masami Hiramatsu Cc: Américo Wang Tested-by: Kamalesh Babulal --- kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module.c') diff --git a/kernel/module.c b/kernel/module.c index c54f10d90e1c..5a29397ca4b6 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2523,8 +2523,8 @@ static noinline struct module *load_module(void __user *umod, free_unload: module_unload_free(mod); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) - free_init: percpu_modfree(mod->refptr); + free_init: #endif module_free(mod, mod->module_init); free_core: -- cgit v1.2.3