From cb0b50b813f6198b7d44ae8e169803440333577a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 9 May 2023 15:49:02 +0200 Subject: module: Remove preempt_disable() from module reference counting. The preempt_disable() section in module_put() was added in commit e1783a240f491 ("module: Use this_cpu_xx to dynamically allocate counters") while the per-CPU counter were switched to another API. The API requires that during the RMW operation the CPU remained the same. This counting API was later replaced with atomic_t in commit 2f35c41f58a97 ("module: Replace module_ref with atomic_t refcnt") Since this atomic_t replacement there is no need to keep preemption disabled while the reference counter is modified. Remove preempt_disable() from module_put(), __module_get() and try_module_get(). Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Luis Chamberlain --- kernel/module/main.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 044aa2c9e3cb..ea7d0c7f3e60 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -820,10 +820,8 @@ static struct module_attribute modinfo_refcnt = void __module_get(struct module *module) { if (module) { - preempt_disable(); atomic_inc(&module->refcnt); trace_module_get(module, _RET_IP_); - preempt_enable(); } } EXPORT_SYMBOL(__module_get); @@ -833,15 +831,12 @@ bool try_module_get(struct module *module) bool ret = true; if (module) { - preempt_disable(); /* Note: here, we can fail to get a reference */ if (likely(module_is_live(module) && atomic_inc_not_zero(&module->refcnt) != 0)) trace_module_get(module, _RET_IP_); else ret = false; - - preempt_enable(); } return ret; } @@ -852,11 +847,9 @@ void module_put(struct module *module) int ret; if (module) { - preempt_disable(); ret = atomic_dec_if_positive(&module->refcnt); WARN_ON(ret < 0); /* Failed to put refcount */ trace_module_put(module, _RET_IP_); - preempt_enable(); } } EXPORT_SYMBOL(module_put); -- cgit v1.2.3 From 4f521bab5bfc854ec0dab7ef560dfa75247e615d Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Fri, 26 May 2023 12:51:23 +0530 Subject: kallsyms: remove unsed API lookup_symbol_attrs with commit '7878c231dae0 ("slab: remove /proc/slab_allocators")' lookup_symbol_attrs usage is removed. Thus removing redundant API. Signed-off-by: Maninder Singh Reviewed-by: Kees Cook Signed-off-by: Luis Chamberlain --- kernel/module/kallsyms.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/kallsyms.c b/kernel/module/kallsyms.c index c550d7d45f2f..ef73ae7c8909 100644 --- a/kernel/module/kallsyms.c +++ b/kernel/module/kallsyms.c @@ -381,34 +381,6 @@ out: return -ERANGE; } -int lookup_module_symbol_attrs(unsigned long addr, unsigned long *size, - unsigned long *offset, char *modname, char *name) -{ - struct module *mod; - - preempt_disable(); - list_for_each_entry_rcu(mod, &modules, list) { - if (mod->state == MODULE_STATE_UNFORMED) - continue; - if (within_module(addr, mod)) { - const char *sym; - - sym = find_kallsyms_symbol(mod, addr, size, offset); - if (!sym) - goto out; - if (modname) - strscpy(modname, mod->name, MODULE_NAME_LEN); - if (name) - strscpy(name, sym, KSYM_NAME_LEN); - preempt_enable(); - return 0; - } - } -out: - preempt_enable(); - return -ERANGE; -} - int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *name, char *module_name, int *exported) { -- cgit v1.2.3 From ddb5cdbafaaad6b99d7007ae1740403124502d03 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 12 Jun 2023 00:50:52 +0900 Subject: kbuild: generate KSYMTAB entries by modpost Commit 7b4537199a4a ("kbuild: link symbol CRCs at final link, removing CONFIG_MODULE_REL_CRCS") made modpost output CRCs in the same way whether the EXPORT_SYMBOL() is placed in *.c or *.S. For further cleanups, this commit applies a similar approach to the entire data structure of EXPORT_SYMBOL(). The EXPORT_SYMBOL() compilation is split into two stages. When a source file is compiled, EXPORT_SYMBOL() will be converted into a dummy symbol in the .export_symbol section. For example, EXPORT_SYMBOL(foo); EXPORT_SYMBOL_NS_GPL(bar, BAR_NAMESPACE); will be encoded into the following assembly code: .section ".export_symbol","a" __export_symbol_foo: .asciz "" /* license */ .asciz "" /* name space */ .balign 8 .quad foo /* symbol reference */ .previous .section ".export_symbol","a" __export_symbol_bar: .asciz "GPL" /* license */ .asciz "BAR_NAMESPACE" /* name space */ .balign 8 .quad bar /* symbol reference */ .previous They are mere markers to tell modpost the name, license, and namespace of the symbols. They will be dropped from the final vmlinux and modules because the *(.export_symbol) will go into /DISCARD/ in the linker script. Then, modpost extracts all the information about EXPORT_SYMBOL() from the .export_symbol section, and generates the final C code: KSYMTAB_FUNC(foo, "", ""); KSYMTAB_FUNC(bar, "_gpl", "BAR_NAMESPACE"); KSYMTAB_FUNC() (or KSYMTAB_DATA() if it is data) is expanded to struct kernel_symbol that will be linked to the vmlinux or a module. With this change, EXPORT_SYMBOL() works in the same way for *.c and *.S files, providing the following benefits. [1] Deprecate EXPORT_DATA_SYMBOL() In the old days, EXPORT_SYMBOL() was only available in C files. To export a symbol in *.S, EXPORT_SYMBOL() was placed in a separate *.c file. arch/arm/kernel/armksyms.c is one example written in the classic manner. Commit 22823ab419d8 ("EXPORT_SYMBOL() for asm") removed this limitation. Since then, EXPORT_SYMBOL() can be placed close to the symbol definition in *.S files. It was a nice improvement. However, as that commit mentioned, you need to use EXPORT_DATA_SYMBOL() for data objects on some architectures. In the new approach, modpost checks symbol's type (STT_FUNC or not), and outputs KSYMTAB_FUNC() or KSYMTAB_DATA() accordingly. There are only two users of EXPORT_DATA_SYMBOL: EXPORT_DATA_SYMBOL_GPL(empty_zero_page) (arch/ia64/kernel/head.S) EXPORT_DATA_SYMBOL(ia64_ivt) (arch/ia64/kernel/ivt.S) They are transformed as follows and output into .vmlinux.export.c KSYMTAB_DATA(empty_zero_page, "_gpl", ""); KSYMTAB_DATA(ia64_ivt, "", ""); The other EXPORT_SYMBOL users in ia64 assembly are output as KSYMTAB_FUNC(). EXPORT_DATA_SYMBOL() is now deprecated. [2] merge and There are two similar header implementations: include/linux/export.h for .c files include/asm-generic/export.h for .S files Ideally, the functionality should be consistent between them, but they tend to diverge. Commit 8651ec01daed ("module: add support for symbol namespaces.") did not support the namespace for *.S files. This commit shifts the essential implementation part to C, which supports EXPORT_SYMBOL_NS() for *.S files. and will remain as a wrapper of for a while. They will be removed after #include directives are all replaced with #include . [3] Implement CONFIG_TRIM_UNUSED_KSYMS in one-pass algorithm (by a later commit) When CONFIG_TRIM_UNUSED_KSYMS is enabled, Kbuild recursively traverses the directory tree to determine which EXPORT_SYMBOL to trim. If an EXPORT_SYMBOL turns out to be unused by anyone, Kbuild begins the second traverse, where some source files are recompiled with their EXPORT_SYMBOL() tuned into a no-op. We can do this better now; modpost can selectively emit KSYMTAB entries that are really used by modules. Signed-off-by: Masahiro Yamada Reviewed-by: Nick Desaulniers --- kernel/module/internal.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/module') diff --git a/kernel/module/internal.h b/kernel/module/internal.h index dc7b0160c480..c8b7b4dcf782 100644 --- a/kernel/module/internal.h +++ b/kernel/module/internal.h @@ -32,6 +32,18 @@ /* Maximum number of characters written by module_flags() */ #define MODULE_FLAGS_BUF_SIZE (TAINT_FLAGS_COUNT + 4) +struct kernel_symbol { +#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS + int value_offset; + int name_offset; + int namespace_offset; +#else + unsigned long value; + const char *name; + const char *namespace; +#endif +}; + extern struct mutex module_mutex; extern struct list_head modules; -- cgit v1.2.3 From 054a73009c22a5fb8bbeee5394980809276bc9fe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2023 20:55:13 -0400 Subject: module: split up 'finit_module()' into init_module_from_file() helper This will simplify the next step, where we can then key off the inode to do one idempotent module load. Let's do the obvious re-organization in one step, and then the new code in another. Signed-off-by: Linus Torvalds --- kernel/module/main.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 4e2cf784cf8c..23abfe3e2674 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3057,26 +3057,16 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, return load_module(&info, uargs, 0); } -SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) +static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { struct load_info info = { }; void *buf = NULL; int len; - int err; - - err = may_init_module(); - if (err) - return err; - pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + if (!f || !(f->f_mode & FMODE_READ)) + return -EBADF; - if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS - |MODULE_INIT_IGNORE_VERMAGIC - |MODULE_INIT_COMPRESSED_FILE)) - return -EINVAL; - - len = kernel_read_file_from_fd(fd, 0, &buf, INT_MAX, NULL, - READING_MODULE); + len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE); if (len < 0) { mod_stat_inc(&failed_kreads); mod_stat_add_long(len, &invalid_kread_bytes); @@ -3084,7 +3074,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) } if (flags & MODULE_INIT_COMPRESSED_FILE) { - err = module_decompress(&info, buf, len); + int err = module_decompress(&info, buf, len); vfree(buf); /* compressed data is no longer needed */ if (err) { mod_stat_inc(&failed_decompress); @@ -3099,6 +3089,28 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) return load_module(&info, uargs, flags); } +SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) +{ + int err; + struct fd f; + + err = may_init_module(); + if (err) + return err; + + pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + + if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS + |MODULE_INIT_IGNORE_VERMAGIC + |MODULE_INIT_COMPRESSED_FILE)) + return -EINVAL; + + f = fdget(fd); + err = init_module_from_file(f.file, uargs, flags); + fdput(f); + return err; +} + /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */ char *module_flags(struct module *mod, char *buf, bool show_state) { -- cgit v1.2.3 From 9b9879fc03275ffe0da328cf5b864d9e694167c8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 29 May 2023 21:39:51 -0400 Subject: modules: catch concurrent module loads, treat them as idempotent This is the new-and-improved attempt at avoiding huge memory load spikes when the user space boot sequence tries to load hundreds (or even thousands) of redundant duplicate modules in parallel. See commit 9828ed3f695a ("module: error out early on concurrent load of the same module file") for background and an earlier failed attempt that was reverted. That earlier attempt just said "concurrently loading the same module is silly, just open the module file exclusively and return -ETXTBSY if somebody else is already loading it". While it is true that concurrent module loads of the same module is silly, the reason that earlier attempt then failed was that the concurrently loaded module would often be a prerequisite for another module. Thus failing to load the prerequisite would then cause cascading failures of the other modules, rather than just short-circuiting that one unnecessary module load. At the same time, we still really don't want to load the contents of the same module file hundreds of times, only to then wait for an eventually successful load, and have everybody else return -EEXIST. As a result, this takes another approach, and treats concurrent module loads from the same file as "idempotent" in the inode. So if one module load is ongoing, we don't start a new one, but instead just wait for the first one to complete and return the same return value as it did. So unlike the first attempt, this does not return early: the intent is not to speed up the boot, but to avoid a thundering herd problem in allocating memory (both physical and virtual) for a module more than once. Also note that this does change behavior: it used to be that when you had concurrent loads, you'd have one "winner" that would return success, and everybody else would return -EEXIST. In contrast, this idempotent logic goes all Oprah on the problem, and says "You are a winner! And you are a winner! We are ALL winners". But since there's no possible actual real semantic difference between "you loaded the module" and "somebody else already loaded the module", this is more of a feel-good change than an actual honest-to-goodness semantic change. Of course, any true Johnny-come-latelies that don't get caught in the concurrency filter will still return -EEXIST. It's no different from not even getting a seat at an Oprah taping. That's life. See the long thread on the kernel mailing list about this all, which includes some numbers for memory use before and after the patch. Link: https://lore.kernel.org/lkml/20230524213620.3509138-1-mcgrof@kernel.org/ Reviewed-by: Johan Hovold Tested-by: Johan Hovold Tested-by: Luis Chamberlain Tested-by: Dan Williams Tested-by: Rudi Heitbaum Tested-by: David Hildenbrand Signed-off-by: Linus Torvalds --- kernel/module/main.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 2 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 23abfe3e2674..d6de66a6a1ac 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3057,15 +3057,82 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, return load_module(&info, uargs, 0); } +struct idempotent { + const void *cookie; + struct hlist_node entry; + struct completion complete; + int ret; +}; + +#define IDEM_HASH_BITS 8 +static struct hlist_head idem_hash[1 << IDEM_HASH_BITS]; +static DEFINE_SPINLOCK(idem_lock); + +static bool idempotent(struct idempotent *u, const void *cookie) +{ + int hash = hash_ptr(cookie, IDEM_HASH_BITS); + struct hlist_head *head = idem_hash + hash; + struct idempotent *existing; + bool first; + + u->ret = 0; + u->cookie = cookie; + init_completion(&u->complete); + + spin_lock(&idem_lock); + first = true; + hlist_for_each_entry(existing, head, entry) { + if (existing->cookie != cookie) + continue; + first = false; + break; + } + hlist_add_head(&u->entry, idem_hash + hash); + spin_unlock(&idem_lock); + + return !first; +} + +/* + * We were the first one with 'cookie' on the list, and we ended + * up completing the operation. We now need to walk the list, + * remove everybody - which includes ourselves - fill in the return + * value, and then complete the operation. + */ +static void idempotent_complete(struct idempotent *u, int ret) +{ + const void *cookie = u->cookie; + int hash = hash_ptr(cookie, IDEM_HASH_BITS); + struct hlist_head *head = idem_hash + hash; + struct hlist_node *next; + struct idempotent *pos; + + spin_lock(&idem_lock); + hlist_for_each_entry_safe(pos, next, head, entry) { + if (pos->cookie != cookie) + continue; + hlist_del(&pos->entry); + pos->ret = ret; + complete(&pos->complete); + } + spin_unlock(&idem_lock); +} + static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { + struct idempotent idem; struct load_info info = { }; void *buf = NULL; - int len; + int len, ret; if (!f || !(f->f_mode & FMODE_READ)) return -EBADF; + if (idempotent(&idem, file_inode(f))) { + wait_for_completion(&idem.complete); + return idem.ret; + } + len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE); if (len < 0) { mod_stat_inc(&failed_kreads); @@ -3086,7 +3153,9 @@ static int init_module_from_file(struct file *f, const char __user * uargs, int info.len = len; } - return load_module(&info, uargs, flags); + ret = load_module(&info, uargs, flags); + idempotent_complete(&idem, ret); + return ret; } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) -- cgit v1.2.3 From f1962207150c8b602e980616f04b37ea4e64bb9f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 4 Jul 2023 06:37:32 -0700 Subject: module: fix init_module_from_file() error handling Vegard Nossum pointed out two different problems with the error handling in init_module_from_file(): (a) the idempotent loading code didn't clean up properly in some error cases, leaving the on-stack 'struct idempotent' element still in the hash table (b) failure to read the module file would nonsensically update the 'invalid_kread_bytes' stat counter with the error value The first error is quite nasty, in that it can then cause subsequent idempotent loads of that same file to access stale stack contents of the previous failure. The case may not happen in any normal situation (explaining all the "Tested-by's on the original change), and requires admin privileges, but syzkaller triggers random bad behavior as a result: BUG: soft lockup in sys_finit_module BUG: unable to handle kernel paging request in init_module_from_file general protection fault in init_module_from_file INFO: task hung in init_module_from_file KASAN: out-of-bounds Read in init_module_from_file KASAN: slab-out-of-bounds Read in init_module_from_file ... The second error is fairly benign and just leads to nonsensical stats (and has been around since the debug stats were added). Vegard also provided a patch for the idempotent loading issue, but I'd rather re-organize the code and make it more legible using another level of helper functions than add the usual "goto out" error handling. Link: https://lore.kernel.org/lkml/20230704100852.23452-1-vegard.nossum@oracle.com/ Fixes: 9b9879fc0327 ("modules: catch concurrent module loads, treat them as idempotent") Reported-by: Vegard Nossum Reported-by: Harshit Mogalapalli Reported-by: syzbot+9c2bdc9d24e4a7abe741@syzkaller.appspotmail.com Signed-off-by: Linus Torvalds --- kernel/module/main.c | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) (limited to 'kernel/module') diff --git a/kernel/module/main.c b/kernel/module/main.c index 834de86ebe35..59b1d067e528 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3092,7 +3092,7 @@ static bool idempotent(struct idempotent *u, const void *cookie) * remove everybody - which includes ourselves - fill in the return * value, and then complete the operation. */ -static void idempotent_complete(struct idempotent *u, int ret) +static int idempotent_complete(struct idempotent *u, int ret) { const void *cookie = u->cookie; int hash = hash_ptr(cookie, IDEM_HASH_BITS); @@ -3109,27 +3109,18 @@ static void idempotent_complete(struct idempotent *u, int ret) complete(&pos->complete); } spin_unlock(&idem_lock); + return ret; } static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { - struct idempotent idem; struct load_info info = { }; void *buf = NULL; - int len, ret; - - if (!f || !(f->f_mode & FMODE_READ)) - return -EBADF; - - if (idempotent(&idem, file_inode(f))) { - wait_for_completion(&idem.complete); - return idem.ret; - } + int len; len = kernel_read_file(f, 0, &buf, INT_MAX, NULL, READING_MODULE); if (len < 0) { mod_stat_inc(&failed_kreads); - mod_stat_add_long(len, &invalid_kread_bytes); return len; } @@ -3146,9 +3137,25 @@ static int init_module_from_file(struct file *f, const char __user * uargs, int info.len = len; } - ret = load_module(&info, uargs, flags); - idempotent_complete(&idem, ret); - return ret; + return load_module(&info, uargs, flags); +} + +static int idempotent_init_module(struct file *f, const char __user * uargs, int flags) +{ + struct idempotent idem; + + if (!f || !(f->f_mode & FMODE_READ)) + return -EBADF; + + /* See if somebody else is doing the operation? */ + if (idempotent(&idem, file_inode(f))) { + wait_for_completion(&idem.complete); + return idem.ret; + } + + /* Otherwise, we'll do it and complete others */ + return idempotent_complete(&idem, + init_module_from_file(f, uargs, flags)); } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) @@ -3168,7 +3175,7 @@ SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) return -EINVAL; f = fdget(fd); - err = init_module_from_file(f.file, uargs, flags); + err = idempotent_init_module(f.file, uargs, flags); fdput(f); return err; } -- cgit v1.2.3