From a473573964e51dcb6efc182f773cd3924be4a184 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:33 -0700 Subject: lib: code tagging module support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for code tagging from dynamically loaded modules. Link: https://lkml.kernel.org/r/20240321163705.3067592-12-surenb@google.com Signed-off-by: Suren Baghdasaryan Co-developed-by: Kent Overstreet Signed-off-by: Kent Overstreet Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- kernel/module/main.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..ffa6b3e9cb43 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -1242,6 +1243,7 @@ static void free_module(struct module *mod) { trace_module_free(mod); + codetag_unload_module(mod); mod_sysfs_teardown(mod); /* @@ -2995,6 +2997,8 @@ static int load_module(struct load_info *info, const char __user *uargs, /* Get rid of temporary copy. */ free_copy(info, flags); + codetag_load_module(mod); + /* Done! */ trace_module_load(mod); -- cgit v1.2.3 From 47a92dfbe01f41bcbf359250ccb3caa589763abf Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Thu, 21 Mar 2024 09:36:34 -0700 Subject: lib: prevent module unloading if memory is not freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip freeing module's data section if there are non-zero allocation tags because otherwise, once these allocations are freed, the access to their code tag would cause UAF. Link: https://lkml.kernel.org/r/20240321163705.3067592-13-surenb@google.com Signed-off-by: Suren Baghdasaryan Tested-by: Kees Cook Cc: Alexander Viro Cc: Alex Gaynor Cc: Alice Ryhl Cc: Andreas Hindborg Cc: Benno Lossin Cc: "Björn Roy Baron" Cc: Boqun Feng Cc: Christoph Lameter Cc: Dennis Zhou Cc: Gary Guo Cc: Kent Overstreet Cc: Miguel Ojeda Cc: Pasha Tatashin Cc: Peter Zijlstra Cc: Tejun Heo Cc: Vlastimil Babka Cc: Wedson Almeida Filho Signed-off-by: Andrew Morton --- kernel/module/main.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index ffa6b3e9cb43..2d25eebc549d 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1211,15 +1211,19 @@ static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) return module_alloc(size); } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(void *ptr, enum mod_mem_type type, + bool unload_codetags) { + if (!unload_codetags && mod_mem_type_is_core_data(type)) + return; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else module_memfree(ptr); } -static void free_mod_mem(struct module *mod) +static void free_mod_mem(struct module *mod, bool unload_codetags) { for_each_mod_mem_type(type) { struct module_memory *mod_mem = &mod->mem[type]; @@ -1230,20 +1234,27 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod_mem->base, type, + unload_codetags); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA, unload_codetags); } /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { + bool unload_codetags; + trace_module_free(mod); - codetag_unload_module(mod); + unload_codetags = codetag_unload_module(mod); + if (!unload_codetags) + pr_warn("%s: memory allocation(s) from the module still alive, cannot unload cleanly\n", + mod->name); + mod_sysfs_teardown(mod); /* @@ -1285,7 +1296,7 @@ static void free_module(struct module *mod) kfree(mod->args); percpu_modfree(mod); - free_mod_mem(mod); + free_mod_mem(mod, unload_codetags); } void *__symbol_get(const char *symbol) @@ -2298,7 +2309,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod->mem[t].base, t, true); return ret; } @@ -2428,7 +2439,7 @@ static void module_deallocate(struct module *mod, struct load_info *info) percpu_modfree(mod); module_arch_freeing_init(mod); - free_mod_mem(mod); + free_mod_mem(mod, true); } int __weak module_finalize(const Elf_Ehdr *hdr, -- cgit v1.2.3 From bc6b94d3ea062454ca889884db99e145efffcb93 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:17 +0300 Subject: module: make module_memory_{alloc,free} more self-contained MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the logic related to the memory allocation and freeing into module_memory_alloc() and module_memory_free(). Signed-off-by: Mike Rapoport (IBM) Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Masami Hiramatsu (Google) Acked-by: Song Liu Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 64 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 25 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index e1e8a7a9d6c1..5b82b069e0d3 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1203,15 +1203,44 @@ static bool mod_mem_use_vmalloc(enum mod_mem_type type) mod_mem_type_is_core_data(type); } -static void *module_memory_alloc(unsigned int size, enum mod_mem_type type) +static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { + unsigned int size = PAGE_ALIGN(mod->mem[type].size); + void *ptr; + + mod->mem[type].size = size; + if (mod_mem_use_vmalloc(type)) - return vzalloc(size); - return module_alloc(size); + ptr = vmalloc(size); + else + ptr = module_alloc(size); + + if (!ptr) + return -ENOMEM; + + /* + * The pointer to these blocks of memory are stored on the module + * structure and we keep that around so long as the module is + * around. We only free that memory when we unload the module. + * Just mark them as not being a leak then. The .init* ELF + * sections *do* get freed after boot so we *could* treat them + * slightly differently with kmemleak_ignore() and only grey + * them out as they work as typical memory allocations which + * *do* eventually get freed, but let's just keep things simple + * and avoid *any* false positives. + */ + kmemleak_not_leak(ptr); + + memset(ptr, 0, size); + mod->mem[type].base = ptr; + + return 0; } -static void module_memory_free(void *ptr, enum mod_mem_type type) +static void module_memory_free(struct module *mod, enum mod_mem_type type) { + void *ptr = mod->mem[type].base; + if (mod_mem_use_vmalloc(type)) vfree(ptr); else @@ -1229,12 +1258,12 @@ static void free_mod_mem(struct module *mod) /* Free lock-classes; relies on the preceding sync_rcu(). */ lockdep_free_key_range(mod_mem->base, mod_mem->size); if (mod_mem->size) - module_memory_free(mod_mem->base, type); + module_memory_free(mod, type); } /* MOD_DATA hosts mod, so free it at last */ lockdep_free_key_range(mod->mem[MOD_DATA].base, mod->mem[MOD_DATA].size); - module_memory_free(mod->mem[MOD_DATA].base, MOD_DATA); + module_memory_free(mod, MOD_DATA); } /* Free a module, remove from lists, etc. */ @@ -2225,7 +2254,6 @@ static int find_module_sections(struct module *mod, struct load_info *info) static int move_module(struct module *mod, struct load_info *info) { int i; - void *ptr; enum mod_mem_type t = 0; int ret = -ENOMEM; @@ -2234,26 +2262,12 @@ static int move_module(struct module *mod, struct load_info *info) mod->mem[type].base = NULL; continue; } - mod->mem[type].size = PAGE_ALIGN(mod->mem[type].size); - ptr = module_memory_alloc(mod->mem[type].size, type); - /* - * The pointer to these blocks of memory are stored on the module - * structure and we keep that around so long as the module is - * around. We only free that memory when we unload the module. - * Just mark them as not being a leak then. The .init* ELF - * sections *do* get freed after boot so we *could* treat them - * slightly differently with kmemleak_ignore() and only grey - * them out as they work as typical memory allocations which - * *do* eventually get freed, but let's just keep things simple - * and avoid *any* false positives. - */ - kmemleak_not_leak(ptr); - if (!ptr) { + + ret = module_memory_alloc(mod, type); + if (ret) { t = type; goto out_enomem; } - memset(ptr, 0, mod->mem[type].size); - mod->mem[type].base = ptr; } /* Transfer each section which specifies SHF_ALLOC */ @@ -2296,7 +2310,7 @@ static int move_module(struct module *mod, struct load_info *info) return 0; out_enomem: for (t--; t >= 0; t--) - module_memory_free(mod->mem[t].base, t); + module_memory_free(mod, t); return ret; } -- cgit v1.2.3 From 12af2b83d0b17ec8b379b721dd4a8fbcd5d791f3 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:18 +0300 Subject: mm: introduce execmem_alloc() and execmem_free() module_alloc() is used everywhere as a mean to allocate memory for code. Beside being semantically wrong, this unnecessarily ties all subsystems that need to allocate code, such as ftrace, kprobes and BPF to modules and puts the burden of code allocation to the modules code. Several architectures override module_alloc() because of various constraints where the executable memory can be located and this causes additional obstacles for improvements of code allocation. Start splitting code allocation from modules by introducing execmem_alloc() and execmem_free() APIs. Initially, execmem_alloc() is a wrapper for module_alloc() and execmem_free() is a replacement of module_memfree() to allow updating all call sites to use the new APIs. Since architectures define different restrictions on placement, permissions, alignment and other parameters for memory that can be used by different subsystems that allocate executable memory, execmem_alloc() takes a type argument, that will be used to identify the calling subsystem and to allow architectures define parameters for ranges suitable for that subsystem. No functional changes. Signed-off-by: Mike Rapoport (IBM) Acked-by: Masami Hiramatsu (Google) Acked-by: Song Liu Acked-by: Steven Rostedt (Google) Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index 5b82b069e0d3..d56b7df0cbb6 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1179,16 +1180,6 @@ resolve_symbol_wait(struct module *mod, return ksym; } -void __weak module_memfree(void *module_region) -{ - /* - * This memory may be RO, and freeing RO memory in an interrupt is not - * supported by vmalloc. - */ - WARN_ON(in_interrupt()); - vfree(module_region); -} - void __weak module_arch_cleanup(struct module *mod) { } @@ -1213,7 +1204,7 @@ static int module_memory_alloc(struct module *mod, enum mod_mem_type type) if (mod_mem_use_vmalloc(type)) ptr = vmalloc(size); else - ptr = module_alloc(size); + ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); if (!ptr) return -ENOMEM; @@ -1244,7 +1235,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) if (mod_mem_use_vmalloc(type)) vfree(ptr); else - module_memfree(ptr); + execmem_free(ptr); } static void free_mod_mem(struct module *mod) @@ -2496,9 +2487,9 @@ static void do_free_init(struct work_struct *w) llist_for_each_safe(pos, n, list) { initfree = container_of(pos, struct mod_initfree, node); - module_memfree(initfree->init_text); - module_memfree(initfree->init_data); - module_memfree(initfree->init_rodata); + execmem_free(initfree->init_text); + execmem_free(initfree->init_data); + execmem_free(initfree->init_rodata); kfree(initfree); } } @@ -2608,10 +2599,10 @@ static noinline int do_init_module(struct module *mod) * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we * call synchronize_rcu(), but we don't want to slow down the success - * path. module_memfree() cannot be called in an interrupt, so do the + * path. execmem_free() cannot be called in an interrupt, so do the * work and call synchronize_rcu() in a work queue. * - * Note that module_alloc() on most architectures creates W+X page + * Note that execmem_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work by invoking -- cgit v1.2.3 From 223b5e57d0d50b0c07b933350dbcde92018d3080 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Sun, 5 May 2024 19:06:20 +0300 Subject: mm/execmem, arch: convert remaining overrides of module_alloc to execmem Extend execmem parameters to accommodate more complex overrides of module_alloc() by architectures. This includes specification of a fallback range required by arm, arm64 and powerpc, EXECMEM_MODULE_DATA type required by powerpc, support for allocation of KASAN shadow required by s390 and x86 and support for late initialization of execmem required by arm64. The core implementation of execmem_alloc() takes care of suppressing warnings when the initial allocation fails but there is a fallback range defined. Signed-off-by: Mike Rapoport (IBM) Acked-by: Will Deacon Acked-by: Song Liu Tested-by: Liviu Dudau Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'kernel/module/main.c') diff --git a/kernel/module/main.c b/kernel/module/main.c index d56b7df0cbb6..91e185607d4b 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -1188,24 +1188,20 @@ void __weak module_arch_freeing_init(struct module *mod) { } -static bool mod_mem_use_vmalloc(enum mod_mem_type type) -{ - return IS_ENABLED(CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC) && - mod_mem_type_is_core_data(type); -} - static int module_memory_alloc(struct module *mod, enum mod_mem_type type) { unsigned int size = PAGE_ALIGN(mod->mem[type].size); + enum execmem_type execmem_type; void *ptr; mod->mem[type].size = size; - if (mod_mem_use_vmalloc(type)) - ptr = vmalloc(size); + if (mod_mem_type_is_data(type)) + execmem_type = EXECMEM_MODULE_DATA; else - ptr = execmem_alloc(EXECMEM_MODULE_TEXT, size); + execmem_type = EXECMEM_MODULE_TEXT; + ptr = execmem_alloc(execmem_type, size); if (!ptr) return -ENOMEM; @@ -1232,10 +1228,7 @@ static void module_memory_free(struct module *mod, enum mod_mem_type type) { void *ptr = mod->mem[type].base; - if (mod_mem_use_vmalloc(type)) - vfree(ptr); - else - execmem_free(ptr); + execmem_free(ptr); } static void free_mod_mem(struct module *mod) @@ -1630,13 +1623,6 @@ static void free_modinfo(struct module *mod) } } -void * __weak module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - bool __weak module_init_section(const char *name) { return strstarts(name, ".init"); -- cgit v1.2.3