From 66bc1a173328dec3e37c203a999f2a2914c96b56 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sat, 6 Apr 2024 15:52:02 +0200 Subject: treewide: Use sysfs_bin_attr_simple_read() helper Deduplicate ->read() callbacks of bin_attributes which are backed by a simple buffer in memory: Use the newly introduced sysfs_bin_attr_simple_read() helper instead, either by referencing it directly or by declaring such bin_attributes with BIN_ATTR_SIMPLE_RO() or BIN_ATTR_SIMPLE_ADMIN_RO(). Aside from a reduction of LoC, this shaves off a few bytes from vmlinux (304 bytes on an x86_64 allyesconfig). No functional change intended. Signed-off-by: Lukas Wunner Acked-by: Zhi Wang Acked-by: Michael Ellerman Acked-by: Rafael J. Wysocki Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/92ee0a0e83a5a3f3474845db6c8575297698933a.1712410202.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- kernel/module/sysfs.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/sysfs.c b/kernel/module/sysfs.c index d964167c6658..26efe1305c12 100644 --- a/kernel/module/sysfs.c +++ b/kernel/module/sysfs.c @@ -146,17 +146,6 @@ struct module_notes_attrs { struct bin_attribute attrs[] __counted_by(notes); }; -static ssize_t module_notes_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, - char *buf, loff_t pos, size_t count) -{ - /* - * The caller checked the pos and count against our size. - */ - memcpy(buf, bin_attr->private + pos, count); - return count; -} - static void free_notes_attrs(struct module_notes_attrs *notes_attrs, unsigned int i) { @@ -205,7 +194,7 @@ static void add_notes_attrs(struct module *mod, const struct load_info *info) nattr->attr.mode = 0444; nattr->size = info->sechdrs[i].sh_size; nattr->private = (void *)info->sechdrs[i].sh_addr; - nattr->read = module_notes_read; + nattr->read = sysfs_bin_attr_simple_read; ++nattr; } ++loaded; -- cgit v1.2.3 From a473573964e51dcb6efc182f773cd3924be4a184 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:33 -0700 Subject: lib: code tagging module support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for code tagging from dynamically loaded modules. Link: https://lkml.kernel.org/r/20240321163705.3067592-12-surenb@google.com Signed-off-by: Suren Baghdasaryan Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- kernel/module/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..ffa6b3e9cb43 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -1242,6 +1243,7 @@ static void free_module(struct module *mod) { trace_module_free(mod); + codetag_unload_module(mod); mod_sysfs_teardown(mod); /* @@ -2995,6 +2997,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Get rid of temporary copy. */ free_copy(info, flags); + codetag_load_module(mod); + /* Done! */ trace_module_load(mod); -- cgit v1.2.3 From 47a92dfbe01f41bcbf359250ccb3caa589763abf Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:34 -0700 Subject: lib: prevent module unloading if memory is not freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip freeing module's data section if there are non-zero allocation tags because otherwise, once these allocations are freed, the access to their code tag would cause UAF. Link: https://lkml.kernel.org/r/20240321163705.3067592-13-surenb@google.com Signed-off-by: Suren Baghdasaryan Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Kent Overstreet Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- kernel/module/main.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index ffa6b3e9cb43..2d25eebc549d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1211,15 +1211,19 @@ static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) return module_alloc(size); } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(void *ptr, enum mod_mem_type type, + bool unload_codetags) { + if (!unload_codetags && mod_mem_type_is_core_data(type)) + return; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else module_memfree(ptr); } -static void free_mod_mem(struct module *mod) +static void free_mod_mem(struct module *mod, bool unload_codetags) { for_each_mod_mem_type(type) { struct module_memory *mod_mem = &mod->mem[type]; @@ -1230,20 +1234,27 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod_mem->base, type, + unload_codetags); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA, unload_codetags); } /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { + bool unload_codetags; + trace_module_free(mod); - codetag_unload_module(mod); + unload_codetags = codetag_unload_module(mod); + if (!unload_codetags) + pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n", + mod->name); + mod_sysfs_teardown(mod); /* @@ -1285,7 +1296,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - free_mod_mem(mod); + free_mod_mem(mod, unload_codetags); } void *__symbol_get(const char *symbol) @@ -2298,7 +2309,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod->mem[t].base, t, true); return ret; } @@ -2428,7 +2439,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); - free_mod_mem(mod); + free_mod_mem(mod, true); } int __weak module_finalize(const Elf_Ehdr *hdr, -- cgit v1.2.3 From 8d0b728840fdcfd0f0bc814c8ac9ef7c677839da Mon Sep 17 00:00:00 2001 From: Yifan Hong Date: Wed, 10 Apr 2024 19:48:02 +0000 Subject: module: allow UNUSED_KSYMS_WHITELIST to be relative against objtree. If UNUSED_KSYMS_WHITELIST is a file generated before Kbuild runs, and the source tree is in a read-only filesystem, the developer must put the file somewhere and specify an absolute path to UNUSED_KSYMS_WHITELIST. This worked, but if IKCONFIG=y, an absolute path is embedded into .config and eventually into vmlinux, causing the build to be less reproducible when building on a different machine. This patch makes the handling of UNUSED_KSYMS_WHITELIST to be similar to MODULE_SIG_KEY. First, check if UNUSED_KSYMS_WHITELIST is an absolute path, just as before this patch. If so, use the path as is. If it is a relative path, use wildcard to check the existence of the file below objtree first. If it does not exist, fall back to the original behavior of adding $(srctree)/ before the value. After this patch, the developer can put the generated file in objtree, then use a relative path against objtree in .config, eradicating any absolute paths that may be evaluated differently on different machines. Signed-off-by: Yifan Hong Reviewed-by: Elliot Berman Signed-off-by: Luis Chamberlain --- kernel/module/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module') diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index f3e0329337f6..cb8377a18927 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -392,7 +392,7 @@ config UNUSED_KSYMS_WHITELIST exported at all times, even in absence of in-tree users. The value to set here is the path to a text file containing the list of symbols, one per line. The path can be absolute, or relative to the kernel - source tree. + source or obj tree. config MODULES_TREE_LOOKUP def_bool y -- cgit v1.2.3 From 086437d94aa3591b459e64bffed657b88dcc46a7 Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Fri, 12 Apr 2024 18:53:47 +0000 Subject: kallsyms: replace deprecated strncpy with strscpy strncpy() is deprecated for use on NUL-terminated destination strings [1] and as such we should prefer more robust and less ambiguous string interfaces. The goal is to remove its use completely [2]. namebuf is eventually cleaned of any trailing llvm suffixes using strstr(). This hints that namebuf should be NUL-terminated. static void cleanup_symbol_name(char *s) { char *res; ... res = strstr(s, ".llvm."); ... } Due to this, use strscpy() over strncpy() as it guarantees NUL-termination on the destination buffer. Drop the -1 from the length calculation as it is no longer needed to ensure NUL-termination. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://manpages.debian.org/testing/linux-manual-4.8/strscpy.9.en.html Link: https://github.com/KSPP/linux/issues/90 [2] Cc: linux-hardening@vger.kernel.org Signed-off-by: Justin Stitt Signed-off-by: Luis Chamberlain --- kernel/module/kallsyms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/module') diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index ef73ae7c8909..62fb57bb9f16 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -348,7 +348,7 @@ const char *module_address_lookup(unsigned long addr, } /* Make a copy in here where it's safe */ if (ret) { - strncpy(namebuf, ret, KSYM_NAME_LEN - 1); + strscpy(namebuf, ret, KSYM_NAME_LEN); ret = namebuf; } preempt_enable(); -- cgit v1.2.3 From bc6b94d3ea062454ca889884db99e145efffcb93 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:17 +0300 Subject: module: make module_memory_{alloc,free} more self-contained MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the logic related to the memory allocation and freeing into module_memory_alloc() and module_memory_free(). Signed-off-by: Mike Rapoport (IBM) Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Masami Hiramatsu (Google) Acked-by: Song Liu Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 64 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..5b82b069e0d3 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type) mod_mem_type_is_core_data(type); } -static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) +static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { + unsigned int size = PAGE_ALIGN(mod->mem[type].size); + void *ptr; + + mod->mem[type].size = size; + if (mod_mem_use_vmalloc(type)) - return vzalloc(size); - return module_alloc(size); + ptr = vmalloc(size); + else + ptr = module_alloc(size); + + if (!ptr) + return -ENOMEM; + + /* + * The pointer to these blocks of memory are stored on the module + * structure and we keep that around so long as the module is + * around. We only free that memory when we unload the module. + * Just mark them as not being a leak then. The .init* ELF + * sections *do* get freed after boot so we *could* treat them + * slightly differently with kmemleak_ignore() and only grey + * them out as they work as typical memory allocations which + * *do* eventually get freed, but let's just keep things simple + * and avoid *any* false positives. + */ + kmemleak_not_leak(ptr); + + memset(ptr, 0, size); + mod->mem[type].base = ptr; + + return 0; } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(struct module *mod, enum mod_mem_type type) { + void *ptr = mod->mem[type].base; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else @@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod, type); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod, MOD_DATA); } /* Free a module, remove from lists, etc. */ @@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { int i; - void *ptr; enum mod_mem_type t = 0; int ret = -ENOMEM; @@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct load_info *info) mod->mem[type].base = NULL; continue; } - mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size); - ptr = module_memory_alloc(mod->mem[type].size, type); - /* - * The pointer to these blocks of memory are stored on the module - * structure and we keep that around so long as the module is - * around. We only free that memory when we unload the module. - * Just mark them as not being a leak then. The .init* ELF - * sections *do* get freed after boot so we *could* treat them - * slightly differently with kmemleak_ignore() and only grey - * them out as they work as typical memory allocations which - * *do* eventually get freed, but let's just keep things simple - * and avoid *any* false positives. - */ - kmemleak_not_leak(ptr); - if (!ptr) { + + ret = module_memory_alloc(mod, type); + if (ret) { t = type; goto out_enomem; } - memset(ptr, 0, mod->mem[type].size); - mod->mem[type].base = ptr; } /* Transfer each section which specifies SHF_ALLOC */ @@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod, t); return ret; } -- cgit v1.2.3 From 12af2b83d0b17ec8b379b721dd4a8fbcd5d791f3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:18 +0300 Subject: mm: introduce execmem_alloc() and execmem_free() module_alloc() is used everywhere as a mean to allocate memory for code. Beside being semantically wrong, this unnecessarily ties all subsystems that need to allocate code, such as ftrace, kprobes and BPF to modules and puts the burden of code allocation to the modules code. Several architectures override module_alloc() because of various constraints where the executable memory can be located and this causes additional obstacles for improvements of code allocation. Start splitting code allocation from modules by introducing execmem_alloc() and execmem_free() APIs. Initially, execmem_alloc() is a wrapper for module_alloc() and execmem_free() is a replacement of module_memfree() to allow updating all call sites to use the new APIs. Since architectures define different restrictions on placement, permissions, alignment and other parameters for memory that can be used by different subsystems that allocate executable memory, execmem_alloc() takes a type argument, that will be used to identify the calling subsystem and to allow architectures define parameters for ranges suitable for that subsystem. No functional changes. Signed-off-by: Mike Rapoport (IBM) Acked-by: Masami Hiramatsu (Google) Acked-by: Song Liu Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- kernel/module/Kconfig | 1 + kernel/module/main.c | 25 ++++++++----------------- 2 files changed, 9 insertions(+), 17 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/Kconfig b/kernel/module/Kconfig index cb8377a18927..4047b6d48255 100644 --- a/kernel/module/Kconfig +++ b/kernel/module/Kconfig @@ -2,6 +2,7 @@ menuconfig MODULES bool "Enable loadable module support" modules + select EXECMEM help Kernel modules are small pieces of compiled code which can be inserted in the running kernel, rather than being diff --git a/kernel/module/main.c b/kernel/module/main.c index 5b82b069e0d3..d56b7df0cbb6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1179,16 +1180,6 @@ resolve_symbol_wait(struct module *mod, return ksym; } -void __weak module_memfree(void *module_region) -{ - /* - * This memory may be RO, and freeing RO memory in an interrupt is not - * supported by vmalloc. - */ - WARN_ON(in_interrupt()); - vfree(module_region); -} - void __weak module_arch_cleanup(struct module *mod) { } @@ -1213,7 +1204,7 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (mod_mem_use_vmalloc(type)) ptr = vmalloc(size); else - ptr = module_alloc(size); + ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); if (!ptr) return -ENOMEM; @@ -1244,7 +1235,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) if (mod_mem_use_vmalloc(type)) vfree(ptr); else - module_memfree(ptr); + execmem_free(ptr); } static void free_mod_mem(struct module *mod) @@ -2496,9 +2487,9 @@ static void do_free_init(struct work_struct *w) llist_for_each_safe(pos, n, list) { initfree = container_of(pos, struct mod_initfree, node); - module_memfree(initfree->init_text); - module_memfree(initfree->init_data); - module_memfree(initfree->init_rodata); + execmem_free(initfree->init_text); + execmem_free(initfree->init_data); + execmem_free(initfree->init_rodata); kfree(initfree); } } @@ -2608,10 +2599,10 @@ static noinline int do_init_module(struct module *mod) * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we * call synchronize_rcu(), but we don't want to slow down the success - * path. module_memfree() cannot be called in an interrupt, so do the + * path. execmem_free() cannot be called in an interrupt, so do the * work and call synchronize_rcu() in a work queue. * - * Note that module_alloc() on most architectures creates W+X page + * Note that execmem_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work by invoking -- cgit v1.2.3 From 223b5e57d0d50b0c07b933350dbcde92018d3080 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:20 +0300 Subject: mm/execmem, arch: convert remaining overrides of module_alloc to execmem Extend execmem parameters to accommodate more complex overrides of module_alloc() by architectures. This includes specification of a fallback range required by arm, arm64 and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for allocation of KASAN shadow required by s390 and x86 and support for late initialization of execmem required by arm64. The core implementation of execmem_alloc() takes care of suppressing warnings when the initial allocation fails but there is a fallback range defined. Signed-off-by: Mike Rapoport (IBM) Acked-by: Will Deacon Acked-by: Song Liu Tested-by: Liviu Dudau Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index d56b7df0cbb6..91e185607d4b 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1188,24 +1188,20 @@ void __weak module_arch_freeing_init(struct module *mod) { } -static bool mod_mem_use_vmalloc(enum mod_mem_type type) -{ - return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) && - mod_mem_type_is_core_data(type); -} - static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); + enum execmem_type execmem_type; void *ptr; mod->mem[type].size = size; - if (mod_mem_use_vmalloc(type)) - ptr = vmalloc(size); + if (mod_mem_type_is_data(type)) + execmem_type = EXECMEM_MODULE_DATA; else - ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); + execmem_type = EXECMEM_MODULE_TEXT; + ptr = execmem_alloc(execmem_type, size); if (!ptr) return -ENOMEM; @@ -1232,10 +1228,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) { void *ptr = mod->mem[type].base; - if (mod_mem_use_vmalloc(type)) - vfree(ptr); - else - execmem_free(ptr); + execmem_free(ptr); } static void free_mod_mem(struct module *mod) @@ -1630,13 +1623,6 @@ static void free_modinfo(struct module *mod) } } -void * __weak module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - bool __weak module_init_section(const char *name) { return strstarts(name, ".init"); -- cgit v1.2.3