From cd4eaccc00d79ab97d9a96f7922558558b13f220 Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 10 Oct 2025 10:21:38 +0200 Subject: treewide: drop outdated compiler version remarks in Kconfig help texts As of writing, Documentation/Changes states the minimal versions of GNU C being 8.1, Clang being 15.0.0 and binutils being 2.30. A few Kconfig help texts are pointing out that specific GCC and Clang versions are needed, but by now, those pointers to versions, such later than 4.0, later than 4.4, or clang later than 5.0, are obsolete and unlikely to be found by users configuring their kernel builds anyway. Drop these outdated remarks in Kconfig help texts referring to older compiler and binutils versions. No functional change. Link: https://lkml.kernel.org/r/20251010082138.185752-1-lukas.bulwahn@redhat.com Signed-off-by: Lukas Bulwahn Cc: Bill Wendling Cc: Justin Stitt Cc: Nathan Chancellor Cc: Russel King Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 3034e294d50d..e89c024dcbdf 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -332,8 +332,7 @@ config DEBUG_INFO_COMPRESSED_ZLIB depends on $(cc-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help - Compress the debug information using zlib. Requires GCC 5.0+ or Clang - 5.0+, binutils 2.26+, and zlib. + Compress the debug information using zlib. Users of dpkg-deb via debian/rules may find an increase in size of their debug .deb packages with this config set, due to the -- cgit v1.2.3 From 9544f9e6947f6508d29f0d0cc2dacaa749fc1613 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Wed, 15 Oct 2025 14:36:15 +0800 Subject: hung_task: panic when there are more than N hung tasks at the same time The hung_task_panic sysctl is currently a blunt instrument: it's all or nothing. Panicking on a single hung task can be an overreaction to a transient glitch. A more reliable indicator of a systemic problem is when multiple tasks hang simultaneously. Extend hung_task_panic to accept an integer threshold, allowing the kernel to panic only when N hung tasks are detected in a single scan. This provides finer control to distinguish between isolated incidents and system-wide failures. The accepted values are: - 0: Don't panic (unchanged) - 1: Panic on the first hung task (unchanged) - N > 1: Panic after N hung tasks are detected in a single scan The original behavior is preserved for values 0 and 1, maintaining full backward compatibility. [lance.yang@linux.dev: new changelog] Link: https://lkml.kernel.org/r/20251015063615.2632-1-lirongqing@baidu.com Signed-off-by: Li RongQing Reviewed-by: Masami Hiramatsu (Google) Reviewed-by: Lance Yang Tested-by: Lance Yang Acked-by: Andrew Jeffery [aspeed_g5_defconfig] Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: David Hildenbrand Cc: Florian Wesphal Cc: Jakub Kacinski Cc: Jason A. Donenfeld Cc: Joel Granados Cc: Joel Stanley Cc: Jonathan Corbet Cc: Kees Cook Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: "Paul E . McKenney" Cc: Pawan Gupta Cc: Petr Mladek Cc: Phil Auld Cc: Randy Dunlap Cc: Russell King Cc: Shuah Khan Cc: Simon Horman Cc: Stanislav Fomichev Cc: Steven Rostedt Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e89c024dcbdf..19592a57e1ed 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1257,12 +1257,13 @@ config DEFAULT_HUNG_TASK_TIMEOUT Keeping the default should be fine in most cases. config BOOTPARAM_HUNG_TASK_PANIC - bool "Panic (Reboot) On Hung Tasks" + int "Number of hung tasks to trigger kernel panic" depends on DETECT_HUNG_TASK + default 0 help - Say Y here to enable the kernel to panic on "hung tasks", - which are bugs that cause the kernel to leave a task stuck - in uninterruptible "D" state. + When set to a non-zero value, a kernel panic will be triggered + if the number of hung tasks found during a single scan reaches + this value. The panic can be used in combination with panic_timeout, to cause the system to reboot automatically after a -- cgit v1.2.3 From 57f3d89691149f11bfb6c4fef9fca4890def8fb1 Mon Sep 17 00:00:00 2001 From: Ankan Biswas Date: Tue, 14 Oct 2025 10:54:36 +0530 Subject: lib/xz: remove dead IA-64 (Itanium) support code Support for the IA-64 (Itanium) architecture was removed in commit cf8e8658100d ("arch: Remove Itanium (IA-64) architecture"). This patch drops the IA-64 specific decompression code from lib/xz, which was conditionally compiled with the now-obsolete CONFIG_XZ_DEC_IA64 option. Link: https://lkml.kernel.org/r/20251014052738.31185-1-spyjetfayed@gmail.com Signed-off-by: Ankan Biswas Reviewed-by: Kuan-Wei Chiu Reviewed-by: Khalid Aziz Acked-by: Lasse Collin Cc: David Hunter Cc: Shuah Khan Signed-off-by: Andrew Morton --- lib/xz/xz_dec_bcj.c | 95 ----------------------------------------------------- lib/xz/xz_private.h | 4 --- 2 files changed, 99 deletions(-) (limited to 'lib') diff --git a/lib/xz/xz_dec_bcj.c b/lib/xz/xz_dec_bcj.c index 8237db17eee3..610d58d947ab 100644 --- a/lib/xz/xz_dec_bcj.c +++ b/lib/xz/xz_dec_bcj.c @@ -20,7 +20,6 @@ struct xz_dec_bcj { enum { BCJ_X86 = 4, /* x86 or x86-64 */ BCJ_POWERPC = 5, /* Big endian only */ - BCJ_IA64 = 6, /* Big or little endian */ BCJ_ARM = 7, /* Little endian only */ BCJ_ARMTHUMB = 8, /* Little endian only */ BCJ_SPARC = 9, /* Big or little endian */ @@ -180,92 +179,6 @@ static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) } #endif -#ifdef XZ_DEC_IA64 -static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - static const uint8_t branch_table[32] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 6, 6, 0, 0, 7, 7, - 4, 4, 0, 0, 4, 4, 0, 0 - }; - - /* - * The local variables take a little bit stack space, but it's less - * than what LZMA2 decoder takes, so it doesn't make sense to reduce - * stack usage here without doing that for the LZMA2 decoder too. - */ - - /* Loop counters */ - size_t i; - size_t j; - - /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ - uint32_t slot; - - /* Bitwise offset of the instruction indicated by slot */ - uint32_t bit_pos; - - /* bit_pos split into byte and bit parts */ - uint32_t byte_pos; - uint32_t bit_res; - - /* Address part of an instruction */ - uint32_t addr; - - /* Mask used to detect which instructions to convert */ - uint32_t mask; - - /* 41-bit instruction stored somewhere in the lowest 48 bits */ - uint64_t instr; - - /* Instruction normalized with bit_res for easier manipulation */ - uint64_t norm; - - size &= ~(size_t)15; - - for (i = 0; i < size; i += 16) { - mask = branch_table[buf[i] & 0x1F]; - for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { - if (((mask >> slot) & 1) == 0) - continue; - - byte_pos = bit_pos >> 3; - bit_res = bit_pos & 7; - instr = 0; - for (j = 0; j < 6; ++j) - instr |= (uint64_t)(buf[i + j + byte_pos]) - << (8 * j); - - norm = instr >> bit_res; - - if (((norm >> 37) & 0x0F) == 0x05 - && ((norm >> 9) & 0x07) == 0) { - addr = (norm >> 13) & 0x0FFFFF; - addr |= ((uint32_t)(norm >> 36) & 1) << 20; - addr <<= 4; - addr -= s->pos + (uint32_t)i; - addr >>= 4; - - norm &= ~((uint64_t)0x8FFFFF << 13); - norm |= (uint64_t)(addr & 0x0FFFFF) << 13; - norm |= (uint64_t)(addr & 0x100000) - << (36 - 20); - - instr &= (1 << bit_res) - 1; - instr |= norm << bit_res; - - for (j = 0; j < 6; j++) - buf[i + j + byte_pos] - = (uint8_t)(instr >> (8 * j)); - } - } - } - - return i; -} -#endif - #ifdef XZ_DEC_ARM static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { @@ -509,11 +422,6 @@ static void bcj_apply(struct xz_dec_bcj *s, filtered = bcj_powerpc(s, buf, size); break; #endif -#ifdef XZ_DEC_IA64 - case BCJ_IA64: - filtered = bcj_ia64(s, buf, size); - break; -#endif #ifdef XZ_DEC_ARM case BCJ_ARM: filtered = bcj_arm(s, buf, size); @@ -699,9 +607,6 @@ enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) #ifdef XZ_DEC_POWERPC case BCJ_POWERPC: #endif -#ifdef XZ_DEC_IA64 - case BCJ_IA64: -#endif #ifdef XZ_DEC_ARM case BCJ_ARM: #endif diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index 8409784b1639..6775078f3cce 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -24,9 +24,6 @@ # ifdef CONFIG_XZ_DEC_POWERPC # define XZ_DEC_POWERPC # endif -# ifdef CONFIG_XZ_DEC_IA64 -# define XZ_DEC_IA64 -# endif # ifdef CONFIG_XZ_DEC_ARM # define XZ_DEC_ARM # endif @@ -103,7 +100,6 @@ */ #ifndef XZ_DEC_BCJ # if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ - || defined(XZ_DEC_IA64) \ || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ || defined(XZ_DEC_SPARC) || defined(XZ_DEC_ARM64) \ || defined(XZ_DEC_RISCV) -- cgit v1.2.3 From d99dc586ca7c7729450af2ed39ca1483c0eb7b5c Mon Sep 17 00:00:00 2001 From: "Yury Norov (NVIDIA)" Date: Thu, 23 Oct 2025 13:16:06 -0400 Subject: uaccess: decouple INLINE_COPY_FROM_USER and CONFIG_RUST MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 1f9a8286bc0c ("uaccess: always export _copy_[from|to]_user with CONFIG_RUST") exports _copy_{from,to}_user() unconditionally, if RUST is enabled. This pollutes exported symbols namespace, and spreads RUST ifdefery in core files. It's better to declare a corresponding helper under the rust/helpers, similarly to how non-underscored copy_{from,to}_user() is handled. [yury.norov@gmail.com: drop rust part of comment for _copy_from_user(), per Alice] Link: https://lkml.kernel.org/r/20251024154754.99768-1-yury.norov@gmail.com Link: https://lkml.kernel.org/r/20251023171607.1171534-1-yury.norov@gmail.com Signed-off-by: Yury Norov (NVIDIA) Acked-by: Arnd Bergmann Acked-by: Miguel Ojeda Reviewed-by: Alice Ryhl Tested-by: Alice Ryhl Cc: Alex Gaynor Cc: Andreas Hindborg Cc: Björn Roy Baron Cc: Boqun Feng Cc: Danilo Krummrich Cc: Gary Guo Cc: John Hubbard Cc: Trevor Gross Signed-off-by: Andrew Morton --- lib/usercopy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/usercopy.c b/lib/usercopy.c index 7b17b83c8042..b00a3a957de6 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -12,7 +12,7 @@ /* out-of-line parts */ -#if !defined(INLINE_COPY_FROM_USER) || defined(CONFIG_RUST) +#if !defined(INLINE_COPY_FROM_USER) unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { return _inline_copy_from_user(to, from, n); @@ -20,7 +20,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n EXPORT_SYMBOL(_copy_from_user); #endif -#if !defined(INLINE_COPY_TO_USER) || defined(CONFIG_RUST) +#if !defined(INLINE_COPY_TO_USER) unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { return _inline_copy_to_user(to, from, n); -- cgit v1.2.3 From 6c2e6e2c1af1809d1d9cdbd50ac80f54f5995bdb Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Sat, 25 Oct 2025 16:00:03 +0800 Subject: dynamic_debug: add support for print stack In practical problem diagnosis, especially during the boot phase, it is often desirable to know the call sequence. However, currently, apart from adding print statements and recompiling the kernel, there seems to be no good alternative. If dynamic_debug supported printing the call stack, it would be very helpful for diagnosing issues. This patch add support '+d' for dump stack. Link: https://lkml.kernel.org/r/20251025080003.312536-1-yebin@huaweicloud.com Signed-off-by: Ye Bin Cc: Jason Baron Cc: Jim Cromie Signed-off-by: Andrew Morton --- lib/dynamic_debug.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 5a007952f7f2..7d7892e57a01 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -95,6 +95,7 @@ static const struct { unsigned flag:8; char opt_char; } opt_array[] = { { _DPRINTK_FLAGS_INCL_SOURCENAME, 's' }, { _DPRINTK_FLAGS_INCL_LINENO, 'l' }, { _DPRINTK_FLAGS_INCL_TID, 't' }, + { _DPRINTK_FLAGS_INCL_STACK, 'd' }, { _DPRINTK_FLAGS_NONE, '_' }, }; -- cgit v1.2.3 From a0b8c6af29a4be3ca2ff9a95cf71e54db5d73e65 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 24 Oct 2025 21:51:20 +0100 Subject: lib/xxhash: remove more unused xxh functions xxh32_reset() and xxh32_copy_state() are unused, and with those gone, the xxh32_state struct is also unused. xxh64_copy_state() is also unused. Remove them all. (Also fixes a comment above the xxh64_state that referred to it as xxh32_state). Link: https://lkml.kernel.org/r/20251024205120.454508-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Suggested-by: Christoph Hellwig Reviewed-by: Kuan-Wei Chiu Reviewed-by: Christoph Hellwig Signed-off-by: Andrew Morton --- lib/xxhash.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'lib') diff --git a/lib/xxhash.c b/lib/xxhash.c index cf629766f376..4125b3e3cf7f 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -73,21 +73,6 @@ static const uint64_t PRIME64_3 = 1609587929392839161ULL; static const uint64_t PRIME64_4 = 9650029242287828579ULL; static const uint64_t PRIME64_5 = 2870177450012600261ULL; -/*-************************** - * Utils - ***************************/ -void xxh32_copy_state(struct xxh32_state *dst, const struct xxh32_state *src) -{ - memcpy(dst, src, sizeof(*dst)); -} -EXPORT_SYMBOL(xxh32_copy_state); - -void xxh64_copy_state(struct xxh64_state *dst, const struct xxh64_state *src) -{ - memcpy(dst, src, sizeof(*dst)); -} -EXPORT_SYMBOL(xxh64_copy_state); - /*-*************************** * Simple Hash Functions ****************************/ @@ -239,20 +224,6 @@ EXPORT_SYMBOL(xxh64); /*-************************************************** * Advanced Hash Functions ***************************************************/ -void xxh32_reset(struct xxh32_state *statePtr, const uint32_t seed) -{ - /* use a local state for memcpy() to avoid strict-aliasing warnings */ - struct xxh32_state state; - - memset(&state, 0, sizeof(state)); - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); -} -EXPORT_SYMBOL(xxh32_reset); - void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) { /* use a local state for memcpy() to avoid strict-aliasing warnings */ -- cgit v1.2.3 From d79a3aeb747c17095d679cc4402d87f0e7c3405e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Oct 2025 12:44:17 +0100 Subject: panic: sys_info: capture si_bits_global before iterating over it Patch series "panic: sys_info: Refactor and fix a potential issue", v3. While targeting the compilation issue due to dangling variable, I have noticed more opportunities for refactoring that helps to avoid above mentioned compilation issue in a cleaner way and also fixes a potential problem with global variable access. This patch (of 6): The for-loop might re-read the content of the memory the si_bits_global points to on each iteration. Instead, just capture it for the sake of consistency and use that instead. Link: https://lkml.kernel.org/r/20251030132007.3742368-1-andriy.shevchenko@linux.intel.com Link: https://lkml.kernel.org/r/20251030132007.3742368-2-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Feng Tang Reviewed-by: Petr Mladek Signed-off-by: Andrew Morton --- lib/sys_info.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 496f9151c9b6..d542a024406a 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -58,11 +58,11 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, char names[sizeof(sys_info_avail)]; struct ctl_table table; unsigned long *si_bits_global; + unsigned long si_bits; si_bits_global = ro_table->data; if (write) { - unsigned long si_bits; int ret; table = *ro_table; @@ -81,9 +81,12 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, char *delim = ""; int i, len = 0; + /* The access to the global value is not synchronized. */ + si_bits = READ_ONCE(*si_bits_global); + names[0] = '\0'; for (i = 0; i < ARRAY_SIZE(si_names); i++) { - if (*si_bits_global & si_names[i].bit) { + if (si_bits & si_names[i].bit) { len += scnprintf(names + len, sizeof(names) - len, "%s%s", delim, si_names[i].name); delim = ","; -- cgit v1.2.3 From 760fc597c33d5a727507c8bb19d6ab87a8c5885b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Oct 2025 12:44:18 +0100 Subject: panic: sys_info: align constant definition names with parameters Align constant definition names with parameters to make it easier to map. It's also better to maintain and extend the names while keeping their uniqueness. Link: https://lkml.kernel.org/r/20251030132007.3742368-3-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Feng Tang Reviewed-by: Petr Mladek Signed-off-by: Andrew Morton --- lib/sys_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index d542a024406a..6b0188b30227 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -23,7 +23,7 @@ static const struct sys_info_name si_names[] = { { SYS_INFO_TIMERS, "timers" }, { SYS_INFO_LOCKS, "locks" }, { SYS_INFO_FTRACE, "ftrace" }, - { SYS_INFO_ALL_CPU_BT, "all_bt" }, + { SYS_INFO_ALL_BT, "all_bt" }, { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" }, }; @@ -118,7 +118,7 @@ void sys_info(unsigned long si_mask) if (si_mask & SYS_INFO_FTRACE) ftrace_dump(DUMP_ALL); - if (si_mask & SYS_INFO_ALL_CPU_BT) + if (si_mask & SYS_INFO_ALL_BT) trigger_all_cpu_backtrace(); if (si_mask & SYS_INFO_BLOCKED_TASKS) -- cgit v1.2.3 From d13adc6147f5848d6ad9900fdb1dbf9a280a2f64 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Oct 2025 12:44:19 +0100 Subject: panic: sys_info:replace struct sys_info_name with plain array of strings There is no need to keep a custom structure just for the need of a plain array of strings. Replace struct sys_info_name with plain array of strings. With that done, simplify the code, in particular, naturally use for_each_set_bit() when iterating over si_bits_global bitmap. Link: https://lkml.kernel.org/r/20251030132007.3742368-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Petr Mladek Cc: Feng Tang Signed-off-by: Andrew Morton --- lib/sys_info.c | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 6b0188b30227..29a63238b31d 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -1,30 +1,29 @@ // SPDX-License-Identifier: GPL-2.0-only -#include +#include #include +#include #include #include -#include #include +#include +#include +#include #include -struct sys_info_name { - unsigned long bit; - const char *name; -}; - /* * When 'si_names' gets updated, please make sure the 'sys_info_avail' * below is updated accordingly. */ -static const struct sys_info_name si_names[] = { - { SYS_INFO_TASKS, "tasks" }, - { SYS_INFO_MEM, "mem" }, - { SYS_INFO_TIMERS, "timers" }, - { SYS_INFO_LOCKS, "locks" }, - { SYS_INFO_FTRACE, "ftrace" }, - { SYS_INFO_ALL_BT, "all_bt" }, - { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" }, +static const char * const si_names[] = { + [ilog2(SYS_INFO_TASKS)] = "tasks", + [ilog2(SYS_INFO_MEM)] = "mem", + [ilog2(SYS_INFO_TIMERS)] = "timers", + [ilog2(SYS_INFO_LOCKS)] = "locks", + [ilog2(SYS_INFO_FTRACE)] = "ftrace", + [ilog2(SYS_INFO_PANIC_CONSOLE_REPLAY)] = "", + [ilog2(SYS_INFO_ALL_BT)] = "all_bt", + [ilog2(SYS_INFO_BLOCKED_TASKS)] = "blocked_tasks", }; /* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */ @@ -36,12 +35,9 @@ unsigned long sys_info_parse_param(char *str) s = str; while ((name = strsep(&s, ",")) && *name) { - for (i = 0; i < ARRAY_SIZE(si_names); i++) { - if (!strcmp(name, si_names[i].name)) { - si_bits |= si_names[i].bit; - break; - } - } + i = match_string(si_names, ARRAY_SIZE(si_names), name); + if (i >= 0) + __set_bit(i, &si_bits); } return si_bits; @@ -85,10 +81,10 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, si_bits = READ_ONCE(*si_bits_global); names[0] = '\0'; - for (i = 0; i < ARRAY_SIZE(si_names); i++) { - if (si_bits & si_names[i].bit) { + for_each_set_bit(i, &si_bits, ARRAY_SIZE(si_names)) { + if (*si_names[i]) { len += scnprintf(names + len, sizeof(names) - len, - "%s%s", delim, si_names[i].name); + "%s%s", delim, si_names[i]); delim = ","; } } -- cgit v1.2.3 From eb72c4667f4567a7363f6e00d082d2ab32b6a03a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Oct 2025 12:44:20 +0100 Subject: panic: sys_info: rewrite a fix for a compilation error (`make W=1`) Compiler was not happy about dead variable in use: lib/sys_info.c:52:19: error: variable 'sys_info_avail' is not needed and will not be emitted [-Werror,-Wunneeded-internal-declaration] 52 | static const char sys_info_avail[] = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks"; | ^~~~~~~~~~~~~~ This was fixed by adding __maybe_unused attribute that just hides the issue and didn't actually fix the root cause. Rewrite the fix by moving the local variable from stack to a heap. As a side effect this drops unneeded "synchronisation" of duplicative info and also makes code ready for the further refactoring. Link: https://lkml.kernel.org/r/20251030132007.3742368-5-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Petr Mladek Cc: Feng Tang Signed-off-by: Andrew Morton --- lib/sys_info.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 29a63238b31d..eb5c1226bfc8 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include +#include #include #include #include @@ -11,10 +13,6 @@ #include -/* - * When 'si_names' gets updated, please make sure the 'sys_info_avail' - * below is updated accordingly. - */ static const char * const si_names[] = { [ilog2(SYS_INFO_TASKS)] = "tasks", [ilog2(SYS_INFO_MEM)] = "mem", @@ -45,25 +43,32 @@ unsigned long sys_info_parse_param(char *str) #ifdef CONFIG_SYSCTL -static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks"; - int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - char names[sizeof(sys_info_avail)]; struct ctl_table table; unsigned long *si_bits_global; unsigned long si_bits; + unsigned int i; + size_t maxlen; si_bits_global = ro_table->data; + maxlen = 0; + for (i = 0; i < ARRAY_SIZE(si_names); i++) + maxlen += strlen(si_names[i]) + 1; + + char *names __free(kfree) = kzalloc(maxlen, GFP_KERNEL); + if (!names) + return -ENOMEM; + if (write) { int ret; table = *ro_table; table.data = names; - table.maxlen = sizeof(names); + table.maxlen = maxlen; ret = proc_dostring(&table, write, buffer, lenp, ppos); if (ret) return ret; @@ -74,16 +79,15 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, return 0; } else { /* for 'read' operation */ + unsigned int len = 0; char *delim = ""; - int i, len = 0; /* The access to the global value is not synchronized. */ si_bits = READ_ONCE(*si_bits_global); - names[0] = '\0'; for_each_set_bit(i, &si_bits, ARRAY_SIZE(si_names)) { if (*si_names[i]) { - len += scnprintf(names + len, sizeof(names) - len, + len += scnprintf(names + len, maxlen - len, "%s%s", delim, si_names[i]); delim = ","; } @@ -91,7 +95,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, table = *ro_table; table.data = names; - table.maxlen = sizeof(names); + table.maxlen = maxlen; return proc_dostring(&table, write, buffer, lenp, ppos); } } -- cgit v1.2.3 From f791dcc842cb1cb3777ae4122be4cd37624ad53d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Oct 2025 12:44:21 +0100 Subject: panic: sys_info: deduplicate local variable 'table; assignments The both handlers use the local 'table' variable and assign the same data to it, deduplicate that. Link: https://lkml.kernel.org/r/20251030132007.3742368-6-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Feng Tang Reviewed-by: Petr Mladek Signed-off-by: Andrew Morton --- lib/sys_info.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index eb5c1226bfc8..94526de8482b 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -63,12 +63,13 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, if (!names) return -ENOMEM; + table = *ro_table; + table.data = names; + table.maxlen = maxlen; + if (write) { int ret; - table = *ro_table; - table.data = names; - table.maxlen = maxlen; ret = proc_dostring(&table, write, buffer, lenp, ppos); if (ret) return ret; @@ -93,9 +94,6 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, } } - table = *ro_table; - table.data = names; - table.maxlen = maxlen; return proc_dostring(&table, write, buffer, lenp, ppos); } } -- cgit v1.2.3 From 9125163273f8033af5d38907b483c1d9f99d781b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 30 Oct 2025 12:44:22 +0100 Subject: panic: sys_info: factor out read and write handlers For the sake of the code readability and easier maintenance factor out read and write sys_info handlers. [akpm@linux-foundation.org: coding-style cleanups] Link: https://lkml.kernel.org/r/20251030132007.3742368-7-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Reviewed-by: Petr Mladek Cc: Feng Tang Signed-off-by: Andrew Morton --- lib/sys_info.c | 79 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 94526de8482b..323624093e54 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -43,18 +43,56 @@ unsigned long sys_info_parse_param(char *str) #ifdef CONFIG_SYSCTL +static int sys_info_write_handler(const struct ctl_table *table, + void *buffer, size_t *lenp, loff_t *ppos, + unsigned long *si_bits_global) +{ + unsigned long si_bits; + int ret; + + ret = proc_dostring(table, 1, buffer, lenp, ppos); + if (ret) + return ret; + + si_bits = sys_info_parse_param(table->data); + + /* The access to the global value is not synchronized. */ + WRITE_ONCE(*si_bits_global, si_bits); + + return 0; +} + +static int sys_info_read_handler(const struct ctl_table *table, + void *buffer, size_t *lenp, loff_t *ppos, + unsigned long *si_bits_global) +{ + unsigned long si_bits; + unsigned int len = 0; + char *delim = ""; + unsigned int i; + + /* The access to the global value is not synchronized. */ + si_bits = READ_ONCE(*si_bits_global); + + for_each_set_bit(i, &si_bits, ARRAY_SIZE(si_names)) { + if (*si_names[i]) { + len += scnprintf(table->data + len, table->maxlen - len, + "%s%s", delim, si_names[i]); + delim = ","; + } + } + + return proc_dostring(table, 0, buffer, lenp, ppos); +} + int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table table; - unsigned long *si_bits_global; - unsigned long si_bits; unsigned int i; size_t maxlen; - si_bits_global = ro_table->data; - maxlen = 0; for (i = 0; i < ARRAY_SIZE(si_names); i++) maxlen += strlen(si_names[i]) + 1; @@ -67,35 +105,10 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, table.data = names; table.maxlen = maxlen; - if (write) { - int ret; - - ret = proc_dostring(&table, write, buffer, lenp, ppos); - if (ret) - return ret; - - si_bits = sys_info_parse_param(names); - /* The access to the global value is not synchronized. */ - WRITE_ONCE(*si_bits_global, si_bits); - return 0; - } else { - /* for 'read' operation */ - unsigned int len = 0; - char *delim = ""; - - /* The access to the global value is not synchronized. */ - si_bits = READ_ONCE(*si_bits_global); - - for_each_set_bit(i, &si_bits, ARRAY_SIZE(si_names)) { - if (*si_names[i]) { - len += scnprintf(names + len, maxlen - len, - "%s%s", delim, si_names[i]); - delim = ","; - } - } - - return proc_dostring(&table, write, buffer, lenp, ppos); - } + if (write) + return sys_info_write_handler(&table, buffer, lenp, ppos, ro_table->data); + else + return sys_info_read_handler(&table, buffer, lenp, ppos, ro_table->data); } #endif -- cgit v1.2.3 From 7f37d88f5cb32fff454f12cd99444686482ca23b Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 29 Oct 2025 13:27:43 +0100 Subject: lib/Kconfig.debug: cleanup CONFIG_DEBUG_SECTION_MISMATCH help text Simplify formulations, correct flow, split it into proper paragraphs and update structure. No functional changes. Link: https://lkml.kernel.org/r/20251029122743.1110-1-bp@kernel.org Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 19592a57e1ed..9a087826498a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -483,23 +483,23 @@ config DEBUG_SECTION_MISMATCH bool "Enable full Section mismatch analysis" depends on CC_IS_GCC help - The section mismatch analysis checks if there are illegal - references from one section to another section. - During linktime or runtime, some sections are dropped; - any use of code/data previously in these sections would - most likely result in an oops. - In the code, functions and variables are annotated with - __init,, etc. (see the full list in include/linux/init.h), - which results in the code/data being placed in specific sections. + The section mismatch analysis checks if there are illegal references + from one section to another. During linktime or runtime, some + sections are dropped; any use of code/data previously in these + sections would most likely result in an oops. + + In the code, functions and variables are annotated with __init, + __initdata, and so on (see the full list in include/linux/init.h). + This directs the toolchain to place code/data in specific sections. + The section mismatch analysis is always performed after a full - kernel build, and enabling this option causes the following - additional step to occur: - - Add the option -fno-inline-functions-called-once to gcc commands. - When inlining a function annotated with __init in a non-init - function, we would lose the section information and thus - the analysis would not catch the illegal reference. - This option tells gcc to inline less (but it does result in - a larger kernel). + kernel build, and enabling this option causes the option + -fno-inline-functions-called-once to be added to gcc commands. + + However, when inlining a function annotated with __init in + a non-init function, we would lose the section information and thus + the analysis would not catch the illegal reference. This option + tells gcc to inline less (but it does result in a larger kernel). config SECTION_MISMATCH_WARN_ONLY bool "Make section mismatch errors non-fatal" -- cgit v1.2.3 From 5944f875ac27cae8b831206aef011a444efa637d Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:27 +0000 Subject: lib: mul_u64_u64_div_u64(): rename parameter 'c' to 'd' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch series "Implement mul_u64_u64_div_u64_roundup()", v5. The pwm-stm32.c code wants a 'rounding up' version of mul_u64_u64_div_u64(). This can be done simply by adding 'divisor - 1' to the 128bit product. Implement mul_u64_add_u64_div_u64(a, b, c, d) = (a * b + c)/d based on the existing code. Define mul_u64_u64_div_u64(a, b, d) as mul_u64_add_u64_div_u64(a, b, 0, d) and mul_u64_u64_div_u64_roundup(a, b, d) as mul_u64_add_u64_div_u64(a, b, d-1, d). Only x86-64 has an optimsed (asm) version of the function. That is optimised to avoid the 'add c' when c is known to be zero. In all other cases the extra code will be noise compared to the software divide code. The test module has been updated to test mul_u64_u64_div_u64_roundup() and also enhanced it to verify the C division code on x86-64 and the 32bit division code on 64bit. This patch (of 9): Change to prototype from mul_u64_u64_div_u64(u64 a, u64 b, u64 c) to mul_u64_u64_div_u64(u64 a, u64 b, u64 d). Using 'd' for 'divisor' makes more sense. An upcoming change adds a 'c' parameter to calculate (a * b + c)/d. Link: https://lkml.kernel.org/r/20251105201035.64043-1-david.laight.linux@gmail.com Link: https://lkml.kernel.org/r/20251105201035.64043-2-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index bf77b9843175..0ebff850fd4d 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -184,10 +184,10 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) EXPORT_SYMBOL(iter_div_u64_rem); #ifndef mul_u64_u64_div_u64 -u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) +u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) { if (ilog2(a) + ilog2(b) <= 62) - return div64_u64(a * b, c); + return div64_u64(a * b, d); #if defined(__SIZEOF_INT128__) @@ -212,37 +212,37 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) #endif - /* make sure c is not zero, trigger runtime exception otherwise */ - if (unlikely(c == 0)) { + /* make sure d is not zero, trigger runtime exception otherwise */ + if (unlikely(d == 0)) { unsigned long zero = 0; OPTIMIZER_HIDE_VAR(zero); return ~0UL/zero; } - int shift = __builtin_ctzll(c); + int shift = __builtin_ctzll(d); /* try reducing the fraction in case the dividend becomes <= 64 bits */ if ((n_hi >> shift) == 0) { u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo; - return div64_u64(n, c >> shift); + return div64_u64(n, d >> shift); /* * The remainder value if needed would be: - * res = div64_u64_rem(n, c >> shift, &rem); + * res = div64_u64_rem(n, d >> shift, &rem); * rem = (rem << shift) + (n_lo - (n << shift)); */ } - if (n_hi >= c) { + if (n_hi >= d) { /* overflow: result is unrepresentable in a u64 */ return -1; } /* Do the full 128 by 64 bits division */ - shift = __builtin_clzll(c); - c <<= shift; + shift = __builtin_clzll(d); + d <<= shift; int p = 64 + shift; u64 res = 0; @@ -257,8 +257,8 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) n_hi <<= shift; n_hi |= n_lo >> (64 - shift); n_lo <<= shift; - if (carry || (n_hi >= c)) { - n_hi -= c; + if (carry || (n_hi >= d)) { + n_hi -= d; res |= 1ULL << p; } } while (n_hi); -- cgit v1.2.3 From 08092babd362170e059330a6a2d44c2891d9dbac Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:28 +0000 Subject: lib: mul_u64_u64_div_u64(): combine overflow and divide by zero checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the overflow check always triggers when the divisor is zero move the check for divide by zero inside the overflow check. This means there is only one test in the normal path. Link: https://lkml.kernel.org/r/20251105201035.64043-3-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index 0ebff850fd4d..1092f41e878e 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -212,12 +212,16 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) #endif - /* make sure d is not zero, trigger runtime exception otherwise */ - if (unlikely(d == 0)) { - unsigned long zero = 0; + if (unlikely(n_hi >= d)) { + /* trigger runtime exception if divisor is zero */ + if (d == 0) { + unsigned long zero = 0; - OPTIMIZER_HIDE_VAR(zero); - return ~0UL/zero; + OPTIMIZER_HIDE_VAR(zero); + return ~0UL/zero; + } + /* overflow: result is unrepresentable in a u64 */ + return ~0ULL; } int shift = __builtin_ctzll(d); @@ -234,11 +238,6 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) */ } - if (n_hi >= d) { - /* overflow: result is unrepresentable in a u64 */ - return -1; - } - /* Do the full 128 by 64 bits division */ shift = __builtin_clzll(d); -- cgit v1.2.3 From d91f891d588557874a45a5f584b6da0b433acee7 Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:29 +0000 Subject: lib: mul_u64_u64_div_u64(): simplify check for a 64bit product MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the product is only 64bits div64_u64() can be used for the divide. Replace the pre-multiply check (ilog2(a) + ilog2(b) <= 62) with a simple post-multiply check that the high 64bits are zero. This has the advantage of being simpler, more accurate and less code. It will always be faster when the product is larger than 64bits. Most 64bit cpu have a native 64x64=128 bit multiply, this is needed (for the low 64bits) even when div64_u64() is called - so the early check gains nothing and is just extra code. 32bit cpu will need a compare (etc) to generate the 64bit ilog2() from two 32bit bit scans - so that is non-trivial. (Never mind the mess of x86's 'bsr' and any oddball cpu without fast bit-scan instructions.) Whereas the additional instructions for the 128bit multiply result are pretty much one multiply and two adds (typically the 'adc $0,%reg' can be run in parallel with the instruction that follows). The only outliers are 64bit systems without 128bit mutiply and simple in order 32bit ones with fast bit scan but needing extra instructions to get the high bits of the multiply result. I doubt it makes much difference to either, the latter is definitely not mainstream. If anyone is worried about the analysis they can look at the generated code for x86 (especially when cmov isn't used). Link: https://lkml.kernel.org/r/20251105201035.64043-4-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index 1092f41e878e..4a4b1aa9e6e1 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -186,9 +186,6 @@ EXPORT_SYMBOL(iter_div_u64_rem); #ifndef mul_u64_u64_div_u64 u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) { - if (ilog2(a) + ilog2(b) <= 62) - return div64_u64(a * b, d); - #if defined(__SIZEOF_INT128__) /* native 64x64=128 bits multiplication */ @@ -212,6 +209,9 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) #endif + if (!n_hi) + return div64_u64(n_lo, d); + if (unlikely(n_hi >= d)) { /* trigger runtime exception if divisor is zero */ if (d == 0) { -- cgit v1.2.3 From 6480241f31f543333ed0c7a209962412461f6e41 Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:30 +0000 Subject: lib: add mul_u64_add_u64_div_u64() and mul_u64_u64_div_u64_roundup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing mul_u64_u64_div_u64() rounds down, a 'rounding up' variant needs 'divisor - 1' adding in between the multiply and divide so cannot easily be done by a caller. Add mul_u64_add_u64_div_u64(a, b, c, d) that calculates (a * b + c)/d and implement the 'round down' and 'round up' using it. Update the x86-64 asm to optimise for 'c' being a constant zero. Add kerndoc definitions for all three functions. Link: https://lkml.kernel.org/r/20251105201035.64043-5-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index 4a4b1aa9e6e1..a88391b8ada0 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -183,13 +183,13 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) } EXPORT_SYMBOL(iter_div_u64_rem); -#ifndef mul_u64_u64_div_u64 -u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) +#ifndef mul_u64_add_u64_div_u64 +u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) { #if defined(__SIZEOF_INT128__) /* native 64x64=128 bits multiplication */ - u128 prod = (u128)a * b; + u128 prod = (u128)a * b + c; u64 n_lo = prod, n_hi = prod >> 64; #else @@ -198,8 +198,10 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32; u64 x, y, z; - x = (u64)a_lo * b_lo; - y = (u64)a_lo * b_hi + (u32)(x >> 32); + /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */ + x = (u64)a_lo * b_lo + (u32)c; + y = (u64)a_lo * b_hi + (u32)(c >> 32); + y += (u32)(x >> 32); z = (u64)a_hi * b_hi + (u32)(y >> 32); y = (u64)a_hi * b_lo + (u32)y; z += (u32)(y >> 32); @@ -265,5 +267,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 d) return res; } -EXPORT_SYMBOL(mul_u64_u64_div_u64); +EXPORT_SYMBOL(mul_u64_add_u64_div_u64); #endif -- cgit v1.2.3 From 500db21917e8aaafd65360bfed35845d549aa3dd Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:31 +0000 Subject: lib: add tests for mul_u64_u64_div_u64_roundup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replicate the existing mul_u64_u64_div_u64() test cases with round up. Update the shell script that verifies the table, remove the comment markers so that it can be directly pasted into a shell. Rename the divisor from 'c' to 'd' to match mul_u64_add_u64_div_u64(). It any tests fail then fail the module load with -EINVAL. Link: https://lkml.kernel.org/r/20251105201035.64043-6-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/test_mul_u64_u64_div_u64.c | 122 +++++++++++++++++++++--------------- 1 file changed, 73 insertions(+), 49 deletions(-) (limited to 'lib') diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c index 58d058de4e73..4d5e4e5dac67 100644 --- a/lib/math/test_mul_u64_u64_div_u64.c +++ b/lib/math/test_mul_u64_u64_div_u64.c @@ -10,61 +10,73 @@ #include #include -typedef struct { u64 a; u64 b; u64 c; u64 result; } test_params; +typedef struct { u64 a; u64 b; u64 d; u64 result; uint round_up;} test_params; static test_params test_values[] = { /* this contains many edge values followed by a couple random values */ -{ 0xb, 0x7, 0x3, 0x19 }, -{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000 }, -{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001 }, -{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000 }, -{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000 }, -{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab }, -{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000 }, -{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff }, -{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851 }, -{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554 }, -{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3 }, -{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2 }, -{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007 }, -{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001 }, -{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001 }, -{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000 }, -{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001 }, -{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002 }, -{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8 }, -{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca }, -{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b }, -{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9 }, -{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe }, -{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18 }, -{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35 }, -{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f }, -{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961 }, -{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216 }, +{ 0xb, 0x7, 0x3, 0x19, 1 }, +{ 0xffff0000, 0xffff0000, 0xf, 0x1110eeef00000000, 0 }, +{ 0xffffffff, 0xffffffff, 0x1, 0xfffffffe00000001, 0 }, +{ 0xffffffff, 0xffffffff, 0x2, 0x7fffffff00000000, 1 }, +{ 0x1ffffffff, 0xffffffff, 0x2, 0xfffffffe80000000, 1 }, +{ 0x1ffffffff, 0xffffffff, 0x3, 0xaaaaaaa9aaaaaaab, 0 }, +{ 0x1ffffffff, 0x1ffffffff, 0x4, 0xffffffff00000000, 1 }, +{ 0xffff000000000000, 0xffff000000000000, 0xffff000000000001, 0xfffeffffffffffff, 1 }, +{ 0x3333333333333333, 0x3333333333333333, 0x5555555555555555, 0x1eb851eb851eb851, 1 }, +{ 0x7fffffffffffffff, 0x2, 0x3, 0x5555555555555554, 1 }, +{ 0xffffffffffffffff, 0x2, 0x8000000000000000, 0x3, 1 }, +{ 0xffffffffffffffff, 0x2, 0xc000000000000000, 0x2, 1 }, +{ 0xffffffffffffffff, 0x4000000000000004, 0x8000000000000000, 0x8000000000000007, 1 }, +{ 0xffffffffffffffff, 0x4000000000000001, 0x8000000000000000, 0x8000000000000001, 1 }, +{ 0xffffffffffffffff, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000001, 0 }, +{ 0xfffffffffffffffe, 0x8000000000000001, 0xffffffffffffffff, 0x8000000000000000, 1 }, +{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffe, 0x8000000000000001, 1 }, +{ 0xffffffffffffffff, 0x8000000000000001, 0xfffffffffffffffd, 0x8000000000000002, 1 }, +{ 0x7fffffffffffffff, 0xffffffffffffffff, 0xc000000000000000, 0xaaaaaaaaaaaaaaa8, 1 }, +{ 0xffffffffffffffff, 0x7fffffffffffffff, 0xa000000000000000, 0xccccccccccccccca, 1 }, +{ 0xffffffffffffffff, 0x7fffffffffffffff, 0x9000000000000000, 0xe38e38e38e38e38b, 1 }, +{ 0x7fffffffffffffff, 0x7fffffffffffffff, 0x5000000000000000, 0xccccccccccccccc9, 1 }, +{ 0xffffffffffffffff, 0xfffffffffffffffe, 0xffffffffffffffff, 0xfffffffffffffffe, 0 }, +{ 0xe6102d256d7ea3ae, 0x70a77d0be4c31201, 0xd63ec35ab3220357, 0x78f8bf8cc86c6e18, 1 }, +{ 0xf53bae05cb86c6e1, 0x3847b32d2f8d32e0, 0xcfd4f55a647f403c, 0x42687f79d8998d35, 1 }, +{ 0x9951c5498f941092, 0x1f8c8bfdf287a251, 0xa3c8dc5f81ea3fe2, 0x1d887cb25900091f, 1 }, +{ 0x374fee9daa1bb2bb, 0x0d0bfbff7b8ae3ef, 0xc169337bd42d5179, 0x03bb2dbaffcbb961, 1 }, +{ 0xeac0d03ac10eeaf0, 0x89be05dfa162ed9b, 0x92bb1679a41f0e4b, 0xdc5f5cc9e270d216, 1 }, }; /* * The above table can be verified with the following shell script: - * - * #!/bin/sh - * sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4/p' \ - * lib/math/test_mul_u64_u64_div_u64.c | - * while read a b c r; do - * expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $c | bc ) - * given=$( printf "%X\n" $r ) - * if [ "$expected" = "$given" ]; then - * echo "$a * $b / $c = $r OK" - * else - * echo "$a * $b / $c = $r is wrong" >&2 - * echo "should be equivalent to 0x$expected" >&2 - * exit 1 - * fi - * done + +#!/bin/sh +sed -ne 's/^{ \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\), \+\(.*\) },$/\1 \2 \3 \4 \5/p' \ + lib/math/test_mul_u64_u64_div_u64.c | +while read a b d r e; do + expected=$( printf "obase=16; ibase=16; %X * %X / %X\n" $a $b $d | bc ) + given=$( printf "%X\n" $r ) + if [ "$expected" = "$given" ]; then + echo "$a * $b / $d = $r OK" + else + echo "$a * $b / $d = $r is wrong" >&2 + echo "should be equivalent to 0x$expected" >&2 + exit 1 + fi + expected=$( printf "obase=16; ibase=16; (%X * %X + %X) / %X\n" $a $b $((d-1)) $d | bc ) + given=$( printf "%X\n" $((r + e)) ) + if [ "$expected" = "$given" ]; then + echo "$a * $b +/ $d = $(printf '%#x' $((r + e))) OK" + else + echo "$a * $b +/ $d = $(printf '%#x' $((r + e))) is wrong" >&2 + echo "should be equivalent to 0x$expected" >&2 + exit 1 + fi +done + */ static int __init test_init(void) { + int errors = 0; + int tests = 0; int i; pr_info("Starting mul_u64_u64_div_u64() test\n"); @@ -72,19 +84,31 @@ static int __init test_init(void) for (i = 0; i < ARRAY_SIZE(test_values); i++) { u64 a = test_values[i].a; u64 b = test_values[i].b; - u64 c = test_values[i].c; + u64 d = test_values[i].d; u64 expected_result = test_values[i].result; - u64 result = mul_u64_u64_div_u64(a, b, c); + u64 result = mul_u64_u64_div_u64(a, b, d); + u64 result_up = mul_u64_u64_div_u64_roundup(a, b, d); + + tests += 2; if (result != expected_result) { - pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, c); + pr_err("ERROR: 0x%016llx * 0x%016llx / 0x%016llx\n", a, b, d); pr_err("ERROR: expected result: %016llx\n", expected_result); pr_err("ERROR: obtained result: %016llx\n", result); + errors++; + } + expected_result += test_values[i].round_up; + if (result_up != expected_result) { + pr_err("ERROR: 0x%016llx * 0x%016llx +/ 0x%016llx\n", a, b, d); + pr_err("ERROR: expected result: %016llx\n", expected_result); + pr_err("ERROR: obtained result: %016llx\n", result_up); + errors++; } } - pr_info("Completed mul_u64_u64_div_u64() test\n"); - return 0; + pr_info("Completed mul_u64_u64_div_u64() test, %d tests, %d errors\n", + tests, errors); + return errors ? -EINVAL : 0; } static void __exit test_exit(void) -- cgit v1.2.3 From f0bff2eb04686f13386b2af97bc7aaa09f020f35 Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:32 +0000 Subject: lib: test_mul_u64_u64_div_u64(): test both generic and arch versions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the #if in div64.c so that test_mul_u64_u64_div_u64.c can compile and test the generic version (including the 'long multiply') on architectures (eg amd64) that define their own copy. Test the kernel version and the locally compiled version on all arch. Output the time taken (in ns) on the 'test completed' trace. For reference, on my zen 5, the optimised version takes ~220ns and the generic version ~3350ns. Using the native multiply saves ~200ns and adding back the ilog2() 'optimisation' test adds ~50ms. Link: https://lkml.kernel.org/r/20251105201035.64043-7-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 8 ++++-- lib/math/test_mul_u64_u64_div_u64.c | 52 ++++++++++++++++++++++++++++++++----- 2 files changed, 51 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index a88391b8ada0..18a9ba26c418 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -177,16 +177,18 @@ EXPORT_SYMBOL(div64_s64); * Iterative div/mod for use when dividend is not expected to be much * bigger than divisor. */ +#ifndef iter_div_u64_rem u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) { return __iter_div_u64_rem(dividend, divisor, remainder); } EXPORT_SYMBOL(iter_div_u64_rem); +#endif -#ifndef mul_u64_add_u64_div_u64 +#if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64) u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) { -#if defined(__SIZEOF_INT128__) +#if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64) /* native 64x64=128 bits multiplication */ u128 prod = (u128)a * b + c; @@ -267,5 +269,7 @@ u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) return res; } +#if !defined(test_mul_u64_add_u64_div_u64) EXPORT_SYMBOL(mul_u64_add_u64_div_u64); #endif +#endif diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c index 4d5e4e5dac67..d8d2c18c4614 100644 --- a/lib/math/test_mul_u64_u64_div_u64.c +++ b/lib/math/test_mul_u64_u64_div_u64.c @@ -73,21 +73,34 @@ done */ -static int __init test_init(void) +static u64 test_mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d); + +static int __init test_run(unsigned int fn_no, const char *fn_name) { + u64 start_time; int errors = 0; int tests = 0; int i; - pr_info("Starting mul_u64_u64_div_u64() test\n"); + start_time = ktime_get_ns(); for (i = 0; i < ARRAY_SIZE(test_values); i++) { u64 a = test_values[i].a; u64 b = test_values[i].b; u64 d = test_values[i].d; u64 expected_result = test_values[i].result; - u64 result = mul_u64_u64_div_u64(a, b, d); - u64 result_up = mul_u64_u64_div_u64_roundup(a, b, d); + u64 result, result_up; + + switch (fn_no) { + default: + result = mul_u64_u64_div_u64(a, b, d); + result_up = mul_u64_u64_div_u64_roundup(a, b, d); + break; + case 1: + result = test_mul_u64_add_u64_div_u64(a, b, 0, d); + result_up = test_mul_u64_add_u64_div_u64(a, b, d - 1, d); + break; + } tests += 2; @@ -106,15 +119,40 @@ static int __init test_init(void) } } - pr_info("Completed mul_u64_u64_div_u64() test, %d tests, %d errors\n", - tests, errors); - return errors ? -EINVAL : 0; + pr_info("Completed %s() test, %d tests, %d errors, %llu ns\n", + fn_name, tests, errors, ktime_get_ns() - start_time); + return errors; +} + +static int __init test_init(void) +{ + pr_info("Starting mul_u64_u64_div_u64() test\n"); + if (test_run(0, "mul_u64_u64_div_u64")) + return -EINVAL; + if (test_run(1, "test_mul_u64_u64_div_u64")) + return -EINVAL; + return 0; } static void __exit test_exit(void) { } +/* Compile the generic mul_u64_add_u64_div_u64() code */ +#undef __div64_32 +#define __div64_32 __div64_32 +#define div_s64_rem div_s64_rem +#define div64_u64_rem div64_u64_rem +#define div64_u64 div64_u64 +#define div64_s64 div64_s64 +#define iter_div_u64_rem iter_div_u64_rem + +#undef mul_u64_add_u64_div_u64 +#define mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64 +#define test_mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64 + +#include "div64.c" + module_init(test_init); module_exit(test_exit); -- cgit v1.2.3 From 630f96a687def5616d6fa7f069adcea158320909 Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:33 +0000 Subject: lib: mul_u64_u64_div_u64(): optimise multiply on 32bit x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc generates horrid code for both ((u64)u32_a * u32_b) and (u64_a + u32_b). As well as the extra instructions it can generate a lot of spills to stack (including spills of constant zeros and even multiplies by constant zero). mul_u32_u32() already exists to optimise the multiply. Add a similar add_u64_32() for the addition. Disable both for clang - it generates better code without them. Move the 64x64 => 128 multiply into a static inline helper function for code clarity. No need for the a/b_hi/lo variables, the implicit casts on the function calls do the work for us. Should have minimal effect on the generated code. Use mul_u32_u32() and add_u64_u32() in the 64x64 => 128 multiply in mul_u64_add_u64_div_u64(). Link: https://lkml.kernel.org/r/20251105201035.64043-8-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index 18a9ba26c418..bb57a48ce36a 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -186,33 +186,45 @@ EXPORT_SYMBOL(iter_div_u64_rem); #endif #if !defined(mul_u64_add_u64_div_u64) || defined(test_mul_u64_add_u64_div_u64) -u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) -{ + +#define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c) + #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64) +static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) +{ /* native 64x64=128 bits multiplication */ u128 prod = (u128)a * b + c; - u64 n_lo = prod, n_hi = prod >> 64; + + *p_lo = prod; + return prod >> 64; +} #else - /* perform a 64x64=128 bits multiplication manually */ - u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32; +static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) +{ + /* perform a 64x64=128 bits multiplication in 32bit chunks */ u64 x, y, z; /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */ - x = (u64)a_lo * b_lo + (u32)c; - y = (u64)a_lo * b_hi + (u32)(c >> 32); - y += (u32)(x >> 32); - z = (u64)a_hi * b_hi + (u32)(y >> 32); - y = (u64)a_hi * b_lo + (u32)y; - z += (u32)(y >> 32); - x = (y << 32) + (u32)x; - - u64 n_lo = x, n_hi = z; + x = mul_add(a, b, c); + y = mul_add(a, b >> 32, c >> 32); + y = add_u64_u32(y, x >> 32); + z = mul_add(a >> 32, b >> 32, y >> 32); + y = mul_add(a >> 32, b, y); + *p_lo = (y << 32) + (u32)x; + return add_u64_u32(z, y >> 32); +} #endif +u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) +{ + u64 n_lo, n_hi; + + n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c); + if (!n_hi) return div64_u64(n_lo, d); -- cgit v1.2.3 From d10bb374c41e4c4dced04ae7d2fe2d782a5858a0 Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:34 +0000 Subject: lib: mul_u64_u64_div_u64(): optimise the divide code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the bit by bit algorithm with one that generates 16 bits per iteration on 32bit architectures and 32 bits on 64bit ones. On my zen 5 this reduces the time for the tests (using the generic code) from ~3350ns to ~1000ns. Running the 32bit algorithm on 64bit x86 takes ~1500ns. It'll be slightly slower on a real 32bit system, mostly due to register pressure. The savings for 32bit x86 are much higher (tested in userspace). The worst case (lots of bits in the quotient) drops from ~900 clocks to ~130 (pretty much independant of the arguments). Other 32bit architectures may see better savings. It is possibly to optimise for divisors that span less than __LONG_WIDTH__/2 bits. However I suspect they don't happen that often and it doesn't remove any slow cpu divide instructions which dominate the result. Typical improvements for 64bit random divides: old new sandy bridge: 470 150 haswell: 400 144 piledriver: 960 467 I think rdpmc is very slow. zen5: 244 80 (Timing is 'rdpmc; mul_div(); rdpmc' with the multiply depending on the first rdpmc and the second rdpmc depending on the quotient.) Object code (64bit x86 test program): old 0x173 new 0x141. Link: https://lkml.kernel.org/r/20251105201035.64043-9-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/div64.c | 124 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 85 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index bb57a48ce36a..d1e92ea24fce 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -190,7 +190,6 @@ EXPORT_SYMBOL(iter_div_u64_rem); #define mul_add(a, b, c) add_u64_u32(mul_u32_u32(a, b), c) #if defined(__SIZEOF_INT128__) && !defined(test_mul_u64_add_u64_div_u64) - static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) { /* native 64x64=128 bits multiplication */ @@ -199,9 +198,7 @@ static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) *p_lo = prod; return prod >> 64; } - #else - static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) { /* perform a 64x64=128 bits multiplication in 32bit chunks */ @@ -216,12 +213,37 @@ static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c) *p_lo = (y << 32) + (u32)x; return add_u64_u32(z, y >> 32); } +#endif + +#ifndef BITS_PER_ITER +#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16) +#endif + +#if BITS_PER_ITER == 32 +#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c) +#define add_u64_long(a, b) ((a) + (b)) +#else +#undef BITS_PER_ITER +#define BITS_PER_ITER 16 +static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c) +{ + u64 n_lo = mul_add(a, b, c); + u64 n_med = mul_add(a >> 32, b, c >> 32); + + n_med = add_u64_u32(n_med, n_lo >> 32); + *p_lo = n_med << 32 | (u32)n_lo; + return n_med >> 32; +} +#define add_u64_long(a, b) add_u64_u32(a, b) #endif u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) { - u64 n_lo, n_hi; + unsigned long d_msig, q_digit; + unsigned int reps, d_z_hi; + u64 quotient, n_lo, n_hi; + u32 overflow; n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c); @@ -240,46 +262,70 @@ u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d) return ~0ULL; } - int shift = __builtin_ctzll(d); - - /* try reducing the fraction in case the dividend becomes <= 64 bits */ - if ((n_hi >> shift) == 0) { - u64 n = shift ? (n_lo >> shift) | (n_hi << (64 - shift)) : n_lo; - - return div64_u64(n, d >> shift); - /* - * The remainder value if needed would be: - * res = div64_u64_rem(n, d >> shift, &rem); - * rem = (rem << shift) + (n_lo - (n << shift)); - */ + /* Left align the divisor, shifting the dividend to match */ + d_z_hi = __builtin_clzll(d); + if (d_z_hi) { + d <<= d_z_hi; + n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi); + n_lo <<= d_z_hi; } - /* Do the full 128 by 64 bits division */ - - shift = __builtin_clzll(d); - d <<= shift; - - int p = 64 + shift; - u64 res = 0; - bool carry; + reps = 64 / BITS_PER_ITER; + /* Optimise loop count for small dividends */ + if (!(u32)(n_hi >> 32)) { + reps -= 32 / BITS_PER_ITER; + n_hi = n_hi << 32 | n_lo >> 32; + n_lo <<= 32; + } +#if BITS_PER_ITER == 16 + if (!(u32)(n_hi >> 48)) { + reps--; + n_hi = add_u64_u32(n_hi << 16, n_lo >> 48); + n_lo <<= 16; + } +#endif - do { - carry = n_hi >> 63; - shift = carry ? 1 : __builtin_clzll(n_hi); - if (p < shift) - break; - p -= shift; - n_hi <<= shift; - n_hi |= n_lo >> (64 - shift); - n_lo <<= shift; - if (carry || (n_hi >= d)) { - n_hi -= d; - res |= 1ULL << p; + /* Invert the dividend so we can use add instead of subtract. */ + n_lo = ~n_lo; + n_hi = ~n_hi; + + /* + * Get the most significant BITS_PER_ITER bits of the divisor. + * This is used to get a low 'guestimate' of the quotient digit. + */ + d_msig = (d >> (64 - BITS_PER_ITER)) + 1; + + /* + * Now do a 'long division' with BITS_PER_ITER bit 'digits'. + * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits. + * The worst case is dividing ~0 by 0x8000 which requires two subtracts. + */ + quotient = 0; + while (reps--) { + q_digit = (unsigned long)(~n_hi >> (64 - 2 * BITS_PER_ITER)) / d_msig; + /* Shift 'n' left to align with the product q_digit * d */ + overflow = n_hi >> (64 - BITS_PER_ITER); + n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER)); + n_lo <<= BITS_PER_ITER; + /* Add product to negated divisor */ + overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi); + /* Adjust for the q_digit 'guestimate' being low */ + while (overflow < 0xffffffff >> (32 - BITS_PER_ITER)) { + q_digit++; + n_hi += d; + overflow += n_hi < d; } - } while (n_hi); - /* The remainder value if needed would be n_hi << p */ + quotient = add_u64_long(quotient << BITS_PER_ITER, q_digit); + } - return res; + /* + * The above only ensures the remainder doesn't overflow, + * it can still be possible to add (aka subtract) another copy + * of the divisor. + */ + if ((n_hi + d) > n_hi) + quotient++; + return quotient; } #if !defined(test_mul_u64_add_u64_div_u64) EXPORT_SYMBOL(mul_u64_add_u64_div_u64); -- cgit v1.2.3 From 1d1ef8c1fb5e488c0f68499239d8dc61b1399db9 Mon Sep 17 00:00:00 2001 From: David Laight Date: Wed, 5 Nov 2025 20:10:35 +0000 Subject: lib: test_mul_u64_u64_div_u64(): test the 32bit code on 64bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are slight differences in the mul_u64_add_u64_div_u64() code between 32bit and 64bit systems. Compile and test the 32bit version on 64bit hosts for better test coverage. Link: https://lkml.kernel.org/r/20251105201035.64043-10-david.laight.linux@gmail.com Signed-off-by: David Laight Reviewed-by: Nicolas Pitre Cc: Biju Das Cc: Borislav Betkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jens Axboe Cc: Li RongQing Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleinxer Cc: Uwe Kleine-König Signed-off-by: Andrew Morton --- lib/math/test_mul_u64_u64_div_u64.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'lib') diff --git a/lib/math/test_mul_u64_u64_div_u64.c b/lib/math/test_mul_u64_u64_div_u64.c index d8d2c18c4614..338d014f0c73 100644 --- a/lib/math/test_mul_u64_u64_div_u64.c +++ b/lib/math/test_mul_u64_u64_div_u64.c @@ -74,6 +74,10 @@ done */ static u64 test_mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d); +#if __LONG_WIDTH__ >= 64 +#define TEST_32BIT_DIV +static u64 test_mul_u64_add_u64_div_u64_32bit(u64 a, u64 b, u64 c, u64 d); +#endif static int __init test_run(unsigned int fn_no, const char *fn_name) { @@ -100,6 +104,12 @@ static int __init test_run(unsigned int fn_no, const char *fn_name) result = test_mul_u64_add_u64_div_u64(a, b, 0, d); result_up = test_mul_u64_add_u64_div_u64(a, b, d - 1, d); break; +#ifdef TEST_32BIT_DIV + case 2: + result = test_mul_u64_add_u64_div_u64_32bit(a, b, 0, d); + result_up = test_mul_u64_add_u64_div_u64_32bit(a, b, d - 1, d); + break; +#endif } tests += 2; @@ -131,6 +141,10 @@ static int __init test_init(void) return -EINVAL; if (test_run(1, "test_mul_u64_u64_div_u64")) return -EINVAL; +#ifdef TEST_32BIT_DIV + if (test_run(2, "test_mul_u64_u64_div_u64_32bit")) + return -EINVAL; +#endif return 0; } @@ -153,6 +167,21 @@ static void __exit test_exit(void) #include "div64.c" +#ifdef TEST_32BIT_DIV +/* Recompile the generic code for 32bit long */ +#undef test_mul_u64_add_u64_div_u64 +#define test_mul_u64_add_u64_div_u64 test_mul_u64_add_u64_div_u64_32bit +#undef BITS_PER_ITER +#define BITS_PER_ITER 16 + +#define mul_u64_u64_add_u64 mul_u64_u64_add_u64_32bit +#undef mul_u64_long_add_u64 +#undef add_u64_long +#undef mul_add + +#include "div64.c" +#endif + module_init(test_init); module_exit(test_exit); -- cgit v1.2.3 From 9ab38c5216634d8adb22156ddbd32f2d195b27a7 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 13 Nov 2025 19:34:13 +0000 Subject: Revert "lib/plist.c: enforce memory ordering in plist_check_list" This reverts commit 7abcb84f953df037d40fad66f2109db318dd155b. The introduction of WRITE_ONCE() calls for the 'prev' and 'next' variables inside plist_check_list() was a misapplication. WRITE_ONCE() is fundamentally a compiler barrier designed to prevent compiler optimizations (like caching or reordering) on shared memory locations. However, the variables 'prev' and 'next' are local, stack-allocated pointers accessed only by the current thread's invocation of the function. Since these pointers are thread-local and are never accessed concurrently, applying WRITE_ONCE() to them is semantically incorrect and unnecessary. Furthermore, the use of WRITE_ONCE() on local variables prevents the compiler from performing standard optimizations, such as keeping these variables cached solely in CPU registers throughout the loop, potentially introducing performance overhead. Restore the conventional C assignment for local loop variables, allowing the compiler to generate optimal code. Link: https://lkml.kernel.org/r/20251113193413.499309-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: I Hsin Cheng Cc: Ingo Molnar Signed-off-by: Andrew Morton --- lib/plist.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/plist.c b/lib/plist.c index 330febb4bd7d..ba677c31e8f3 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -47,8 +47,8 @@ static void plist_check_list(struct list_head *top) plist_check_prev_next(top, prev, next); while (next != top) { - WRITE_ONCE(prev, next); - WRITE_ONCE(next, prev->next); + prev = next; + next = prev->next; plist_check_prev_next(top, prev, next); } } -- cgit v1.2.3 From 03ef32d665e8a23d7ce5965b8b035666cfb47866 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 13 Nov 2025 19:10:39 +0800 Subject: sys_info: add a default kernel sys_info mask Which serves as a global default sys_info mask. When users want the same system information for many error cases (panic, hung, lockup ...), they can chose to set this global knob only once, while not setting up each individual sys_info knobs. This just adds a 'lazy' option, and doesn't change existing kernel behavior as the mask is 0 by default. Link: https://lkml.kernel.org/r/20251113111039.22701-5-feng.tang@linux.alibaba.com Suggested-by: Andrew Morton Signed-off-by: Feng Tang Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Petr Mladek Cc: Steven Rostedt Signed-off-by: Andrew Morton --- lib/sys_info.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 323624093e54..f32a06ec9ed4 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -24,6 +24,13 @@ static const char * const si_names[] = { [ilog2(SYS_INFO_BLOCKED_TASKS)] = "blocked_tasks", }; +/* + * Default kernel sys_info mask. + * If a kernel module calls sys_info() with "parameter == 0", then + * this mask will be used. + */ +static unsigned long kernel_si_mask; + /* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */ unsigned long sys_info_parse_param(char *str) { @@ -110,9 +117,26 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, else return sys_info_read_handler(&table, buffer, lenp, ppos, ro_table->data); } + +static const struct ctl_table sys_info_sysctls[] = { + { + .procname = "kernel_sys_info", + .data = &kernel_si_mask, + .maxlen = sizeof(kernel_si_mask), + .mode = 0644, + .proc_handler = sysctl_sys_info_handler, + }, +}; + +static int __init sys_info_sysctl_init(void) +{ + register_sysctl_init("kernel", sys_info_sysctls); + return 0; +} +subsys_initcall(sys_info_sysctl_init); #endif -void sys_info(unsigned long si_mask) +static void __sys_info(unsigned long si_mask) { if (si_mask & SYS_INFO_TASKS) show_state(); @@ -135,3 +159,8 @@ void sys_info(unsigned long si_mask) if (si_mask & SYS_INFO_BLOCKED_TASKS) show_state_filter(TASK_UNINTERRUPTIBLE); } + +void sys_info(unsigned long si_mask) +{ + __sys_info(si_mask ? : kernel_si_mask); +} -- cgit v1.2.3 From f1e2ca801c54dfc09d6a5540207cec25e8d43f6f Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 14 Nov 2025 14:00:45 +0800 Subject: lib/base64: add support for multiple variants Patch series " lib/base64: add generic encoder/decoder, migrate users", v5. This series introduces a generic Base64 encoder/decoder to the kernel library, eliminating duplicated implementations and delivering significant performance improvements. The Base64 API has been extended to support multiple variants (Standard, URL-safe, and IMAP) as defined in RFC 4648 and RFC 3501. The API now takes a variant parameter and an option to control padding. As part of this series, users are migrated to the new interface while preserving their specific formats: fscrypt now uses BASE64_URLSAFE, Ceph uses BASE64_IMAP, and NVMe is updated to BASE64_STD. On the encoder side, the implementation processes input in 3-byte blocks, mapping 24 bits directly to 4 output symbols. This avoids bit-by-bit streaming and reduces loop overhead, achieving about a 2.7x speedup compared to previous implementations. On the decoder side, replace strchr() lookups with per-variant reverse tables and process input in 4-character groups. Each group is mapped to numeric values and combined into 3 bytes. Padded and unpadded forms are validated explicitly, rejecting invalid '=' usage and enforcing tail rules. This improves throughput by ~43-52x. This patch (of 6): Extend the base64 API to support multiple variants (standard, URL-safe, and IMAP) as defined in RFC 4648 and RFC 3501. The API now takes a variant parameter and an option to control padding. Update NVMe auth code to use the new interface with BASE64_STD. Link: https://lkml.kernel.org/r/20251114055829.87814-1-409411716@gms.tku.edu.tw Link: https://lkml.kernel.org/r/20251114060045.88792-1-409411716@gms.tku.edu.tw Signed-off-by: Kuan-Wei Chiu Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Reviewed-by: David Laight Cc: Christoph Hellwig Cc: Eric Biggers Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Keith Busch Cc: Sagi Grimberg Cc: "Theodore Y. Ts'o" Cc: Viacheslav Dubeyko Cc: Xiubo Li Cc: Yu-Sheng Huang Signed-off-by: Andrew Morton --- lib/base64.c | 62 +++++++++++++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/base64.c b/lib/base64.c index b736a7a431c5..a7c20a8e8e98 100644 --- a/lib/base64.c +++ b/lib/base64.c @@ -1,12 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 /* - * base64.c - RFC4648-compliant base64 encoding + * base64.c - Base64 with support for multiple variants * * Copyright (c) 2020 Hannes Reinecke, SUSE * * Based on the base64url routines from fs/crypto/fname.c - * (which are using the URL-safe base64 encoding), - * modified to use the standard coding table from RFC4648 section 4. + * (which are using the URL-safe Base64 encoding), + * modified to support multiple Base64 variants. */ #include @@ -15,26 +15,31 @@ #include #include -static const char base64_table[65] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char base64_tables[][65] = { + [BASE64_STD] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", + [BASE64_URLSAFE] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", + [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", +}; /** - * base64_encode() - base64-encode some binary data + * base64_encode() - Base64-encode some binary data * @src: the binary data to encode * @srclen: the length of @src in bytes - * @dst: (output) the base64-encoded string. Not NUL-terminated. + * @dst: (output) the Base64-encoded string. Not NUL-terminated. + * @padding: whether to append '=' padding characters + * @variant: which base64 variant to use * - * Encodes data using base64 encoding, i.e. the "Base 64 Encoding" specified - * by RFC 4648, including the '='-padding. + * Encodes data using the selected Base64 variant. * - * Return: the length of the resulting base64-encoded string in bytes. + * Return: the length of the resulting Base64-encoded string in bytes. */ -int base64_encode(const u8 *src, int srclen, char *dst) +int base64_encode(const u8 *src, int srclen, char *dst, bool padding, enum base64_variant variant) { u32 ac = 0; int bits = 0; int i; char *cp = dst; + const char *base64_table = base64_tables[variant]; for (i = 0; i < srclen; i++) { ac = (ac << 8) | src[i]; @@ -48,44 +53,49 @@ int base64_encode(const u8 *src, int srclen, char *dst) *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; bits -= 6; } - while (bits < 0) { - *cp++ = '='; - bits += 2; + if (padding) { + while (bits < 0) { + *cp++ = '='; + bits += 2; + } } return cp - dst; } EXPORT_SYMBOL_GPL(base64_encode); /** - * base64_decode() - base64-decode a string + * base64_decode() - Base64-decode a string * @src: the string to decode. Doesn't need to be NUL-terminated. * @srclen: the length of @src in bytes * @dst: (output) the decoded binary data + * @padding: whether to append '=' padding characters + * @variant: which base64 variant to use * - * Decodes a string using base64 encoding, i.e. the "Base 64 Encoding" - * specified by RFC 4648, including the '='-padding. + * Decodes a string using the selected Base64 variant. * * This implementation hasn't been optimized for performance. * * Return: the length of the resulting decoded binary data in bytes, - * or -1 if the string isn't a valid base64 string. + * or -1 if the string isn't a valid Base64 string. */ -int base64_decode(const char *src, int srclen, u8 *dst) +int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant) { u32 ac = 0; int bits = 0; int i; u8 *bp = dst; + const char *base64_table = base64_tables[variant]; for (i = 0; i < srclen; i++) { const char *p = strchr(base64_table, src[i]); - - if (src[i] == '=') { - ac = (ac << 6); - bits += 6; - if (bits >= 8) - bits -= 8; - continue; + if (padding) { + if (src[i] == '=') { + ac = (ac << 6); + bits += 6; + if (bits >= 8) + bits -= 8; + continue; + } } if (p == NULL || src[i] == 0) return -1; -- cgit v1.2.3 From c4eb7ad32eab13ba64cc452c6f43d518b63f5e03 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 14 Nov 2025 14:01:07 +0800 Subject: lib/base64: optimize base64_decode() with reverse lookup tables Replace the use of strchr() in base64_decode() with precomputed reverse lookup tables for each variant. This avoids repeated string scans and improves performance. Use -1 in the tables to mark invalid characters. Decode: 64B ~1530ns -> ~80ns (~19.1x) 1KB ~27726ns -> ~1239ns (~22.4x) [akpm@linux-foundation.org: fix kernedoc] Link: https://lkml.kernel.org/r/20251114060107.89026-1-409411716@gms.tku.edu.tw Signed-off-by: Kuan-Wei Chiu Co-developed-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Reviewed-by: David Laight Cc: Christoph Hellwig Cc: Eric Biggers Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Keith Busch Cc: Sagi Grimberg Cc: "Theodore Y. Ts'o" Cc: Viacheslav Dubeyko Cc: Xiubo Li Cc: Yu-Sheng Huang Signed-off-by: Andrew Morton --- lib/base64.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 48 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/base64.c b/lib/base64.c index a7c20a8e8e98..2d0b775def64 100644 --- a/lib/base64.c +++ b/lib/base64.c @@ -21,6 +21,50 @@ static const char base64_tables[][65] = { [BASE64_IMAP] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,", }; +/* + * Initialize the base64 reverse mapping for a single character + * This macro maps a character to its corresponding base64 value, + * returning -1 if the character is invalid. + * char 'A'-'Z' maps to 0-25, 'a'-'z' maps to 26-51, '0'-'9' maps to 52-61, + * ch_62 maps to 62, ch_63 maps to 63, and other characters return -1 + */ +#define INIT_1(v, ch_62, ch_63) \ + [v] = (v) >= 'A' && (v) <= 'Z' ? (v) - 'A' \ + : (v) >= 'a' && (v) <= 'z' ? (v) - 'a' + 26 \ + : (v) >= '0' && (v) <= '9' ? (v) - '0' + 52 \ + : (v) == (ch_62) ? 62 : (v) == (ch_63) ? 63 : -1 + +/* + * Recursive macros to generate multiple Base64 reverse mapping table entries. + * Each macro generates a sequence of entries in the lookup table: + * INIT_2 generates 2 entries, INIT_4 generates 4, INIT_8 generates 8, and so on up to INIT_32. + */ +#define INIT_2(v, ...) INIT_1(v, __VA_ARGS__), INIT_1((v) + 1, __VA_ARGS__) +#define INIT_4(v, ...) INIT_2(v, __VA_ARGS__), INIT_2((v) + 2, __VA_ARGS__) +#define INIT_8(v, ...) INIT_4(v, __VA_ARGS__), INIT_4((v) + 4, __VA_ARGS__) +#define INIT_16(v, ...) INIT_8(v, __VA_ARGS__), INIT_8((v) + 8, __VA_ARGS__) +#define INIT_32(v, ...) INIT_16(v, __VA_ARGS__), INIT_16((v) + 16, __VA_ARGS__) + +#define BASE64_REV_INIT(ch_62, ch_63) { \ + [0 ... 0x1f] = -1, \ + INIT_32(0x20, ch_62, ch_63), \ + INIT_32(0x40, ch_62, ch_63), \ + INIT_32(0x60, ch_62, ch_63), \ + [0x80 ... 0xff] = -1 } + +static const s8 base64_rev_maps[][256] = { + [BASE64_STD] = BASE64_REV_INIT('+', '/'), + [BASE64_URLSAFE] = BASE64_REV_INIT('-', '_'), + [BASE64_IMAP] = BASE64_REV_INIT('+', ',') +}; + +#undef BASE64_REV_INIT +#undef INIT_32 +#undef INIT_16 +#undef INIT_8 +#undef INIT_4 +#undef INIT_2 +#undef INIT_1 /** * base64_encode() - Base64-encode some binary data * @src: the binary data to encode @@ -84,10 +128,9 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6 int bits = 0; int i; u8 *bp = dst; - const char *base64_table = base64_tables[variant]; + s8 ch; for (i = 0; i < srclen; i++) { - const char *p = strchr(base64_table, src[i]); if (padding) { if (src[i] == '=') { ac = (ac << 6); @@ -97,9 +140,10 @@ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base6 continue; } } - if (p == NULL || src[i] == 0) + ch = base64_rev_maps[variant][(u8)src[i]]; + if (ch == -1) return -1; - ac = (ac << 6) | (p - base64_table); + ac = (ac << 6) | ch; bits += 6; if (bits >= 8) { bits -= 8; -- cgit v1.2.3 From 9c7d3cf94d33927b6e4e576e7090a929e7162d05 Mon Sep 17 00:00:00 2001 From: Guan-Chun Wu <409411716@gms.tku.edu.tw> Date: Fri, 14 Nov 2025 14:01:32 +0800 Subject: lib/base64: rework encode/decode for speed and stricter validation The old base64 implementation relied on a bit-accumulator loop, which was slow for larger inputs and too permissive in validation. It would accept extra '=', missing '=', or even '=' appearing in the middle of the input, allowing malformed strings to pass. This patch reworks the internals to improve performance and enforce stricter validation. Changes: - Encoder: * Process input in 3-byte blocks, mapping 24 bits into four 6-bit symbols, avoiding bit-by-bit shifting and reducing loop iterations. * Handle the final 1-2 leftover bytes explicitly and emit '=' only when requested. - Decoder: * Based on the reverse lookup tables from the previous patch, decode input in 4-character groups. * Each group is looked up directly, converted into numeric values, and combined into 3 output bytes. * Explicitly handle padded and unpadded forms: - With padding: input length must be a multiple of 4, and '=' is allowed only in the last two positions. Reject stray or early '='. - Without padding: validate tail lengths (2 or 3 chars) and require unused low bits to be zero. * Removed the bit-accumulator style loop to reduce loop iterations. Performance (x86_64, Intel Core i7-10700 @ 2.90GHz, avg over 1000 runs, KUnit): Encode: 64B ~90ns -> ~32ns (~2.8x) 1KB ~1332ns -> ~510ns (~2.6x) Decode: 64B ~1530ns -> ~35ns (~43.7x) 1KB ~27726ns -> ~530ns (~52.3x) [akpm@linux-foundation.org: remove u32 casts, per David and Guan-Chun] Link: https://lkml.kernel.org/r/20251114060132.89279-1-409411716@gms.tku.edu.tw Co-developed-by: Kuan-Wei Chiu Signed-off-by: Kuan-Wei Chiu Co-developed-by: Yu-Sheng Huang Signed-off-by: Yu-Sheng Huang Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Reviewed-by: David Laight Cc: Christoph Hellwig Cc: Eric Biggers Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Keith Busch Cc: Sagi Grimberg Cc: "Theodore Y. Ts'o" Cc: Viacheslav Dubeyko Cc: Xiubo Li Signed-off-by: Andrew Morton --- lib/base64.c | 109 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'lib') diff --git a/lib/base64.c b/lib/base64.c index 2d0b775def64..41961a444028 100644 --- a/lib/base64.c +++ b/lib/base64.c @@ -80,28 +80,38 @@ static const s8 base64_rev_maps[][256] = { int base64_encode(const u8 *src, int srclen, char *dst, bool padding, enum base64_variant variant) { u32 ac = 0; - int bits = 0; - int i; char *cp = dst; const char *base64_table = base64_tables[variant]; - for (i = 0; i < srclen; i++) { - ac = (ac << 8) | src[i]; - bits += 8; - do { - bits -= 6; - *cp++ = base64_table[(ac >> bits) & 0x3f]; - } while (bits >= 6); - } - if (bits) { - *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; - bits -= 6; + while (srclen >= 3) { + ac = src[0] << 16 | src[1] << 8 | src[2]; + *cp++ = base64_table[ac >> 18]; + *cp++ = base64_table[(ac >> 12) & 0x3f]; + *cp++ = base64_table[(ac >> 6) & 0x3f]; + *cp++ = base64_table[ac & 0x3f]; + + src += 3; + srclen -= 3; } - if (padding) { - while (bits < 0) { + + switch (srclen) { + case 2: + ac = src[0] << 16 | src[1] << 8; + *cp++ = base64_table[ac >> 18]; + *cp++ = base64_table[(ac >> 12) & 0x3f]; + *cp++ = base64_table[(ac >> 6) & 0x3f]; + if (padding) + *cp++ = '='; + break; + case 1: + ac = src[0] << 16; + *cp++ = base64_table[ac >> 18]; + *cp++ = base64_table[(ac >> 12) & 0x3f]; + if (padding) { + *cp++ = '='; *cp++ = '='; - bits += 2; } + break; } return cp - dst; } @@ -117,41 +127,58 @@ EXPORT_SYMBOL_GPL(base64_encode); * * Decodes a string using the selected Base64 variant. * - * This implementation hasn't been optimized for performance. - * * Return: the length of the resulting decoded binary data in bytes, * or -1 if the string isn't a valid Base64 string. */ int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum base64_variant variant) { - u32 ac = 0; - int bits = 0; - int i; u8 *bp = dst; - s8 ch; + s8 input[4]; + s32 val; + const u8 *s = (const u8 *)src; + const s8 *base64_rev_tables = base64_rev_maps[variant]; - for (i = 0; i < srclen; i++) { - if (padding) { - if (src[i] == '=') { - ac = (ac << 6); - bits += 6; - if (bits >= 8) - bits -= 8; - continue; - } - } - ch = base64_rev_maps[variant][(u8)src[i]]; - if (ch == -1) - return -1; - ac = (ac << 6) | ch; - bits += 6; - if (bits >= 8) { - bits -= 8; - *bp++ = (u8)(ac >> bits); + while (srclen >= 4) { + input[0] = base64_rev_tables[s[0]]; + input[1] = base64_rev_tables[s[1]]; + input[2] = base64_rev_tables[s[2]]; + input[3] = base64_rev_tables[s[3]]; + + val = input[0] << 18 | input[1] << 12 | input[2] << 6 | input[3]; + + if (unlikely(val < 0)) { + if (!padding || srclen != 4 || s[3] != '=') + return -1; + padding = 0; + srclen = s[2] == '=' ? 2 : 3; + break; } + + *bp++ = val >> 16; + *bp++ = val >> 8; + *bp++ = val; + + s += 4; + srclen -= 4; } - if (ac & ((1 << bits) - 1)) + + if (likely(!srclen)) + return bp - dst; + if (padding || srclen == 1) return -1; + + val = (base64_rev_tables[s[0]] << 12) | (base64_rev_tables[s[1]] << 6); + *bp++ = val >> 10; + + if (srclen == 2) { + if (val & 0x800003ff) + return -1; + } else { + val |= base64_rev_tables[s[2]]; + if (val & 0x80000003) + return -1; + *bp++ = val >> 2; + } return bp - dst; } EXPORT_SYMBOL_GPL(base64_decode); -- cgit v1.2.3 From 8b365c4f5be9e979bb991a52a0cb4b1e4680c8bd Mon Sep 17 00:00:00 2001 From: Guan-Chun Wu <409411716@gms.tku.edu.tw> Date: Fri, 14 Nov 2025 14:01:57 +0800 Subject: lib: add KUnit tests for base64 encoding/decoding Add a KUnit test suite to validate the base64 helpers. The tests cover both encoding and decoding, including padded and unpadded forms as defined by RFC 4648 (standard base64), and add negative cases for malformed inputs and padding errors. The test suite also validates other variants (URLSAFE, IMAP) to ensure their correctness. In addition to functional checks, the suite includes simple microbenchmarks which report average encode/decode latency for small (64B) and larger (1KB) inputs. These numbers are informational only and do not gate the tests. Kconfig (BASE64_KUNIT) and lib/tests/Makefile are updated accordingly. Sample KUnit output: KTAP version 1 # Subtest: base64 # module: base64_kunit 1..4 # base64_performance_tests: [64B] encode run : 32ns # base64_performance_tests: [64B] decode run : 35ns # base64_performance_tests: [1KB] encode run : 510ns # base64_performance_tests: [1KB] decode run : 530ns ok 1 base64_performance_tests ok 2 base64_std_encode_tests ok 3 base64_std_decode_tests ok 4 base64_variant_tests # base64: pass:4 fail:0 skip:0 total:4 # Totals: pass:4 fail:0 skip:0 total:4 Link: https://lkml.kernel.org/r/20251114060157.89507-1-409411716@gms.tku.edu.tw Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Reviewed-by: Kuan-Wei Chiu Cc: Christoph Hellwig Cc: David Laight Cc: Eric Biggers Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Jens Axboe Cc: Keith Busch Cc: Sagi Grimberg Cc: "Theodore Y. Ts'o" Cc: Viacheslav Dubeyko Cc: Xiubo Li Cc: Yu-Sheng Huang Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 19 ++- lib/tests/Makefile | 1 + lib/tests/base64_kunit.c | 294 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 lib/tests/base64_kunit.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9a087826498a..bd3bb7a0c801 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2815,8 +2815,25 @@ config CMDLINE_KUNIT_TEST If unsure, say N. +config BASE64_KUNIT + tristate "KUnit test for base64 decoding and encoding" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds the base64 unit tests. + + The tests cover the encoding and decoding logic of Base64 functions + in the kernel. + In addition to correctness checks, simple performance benchmarks + for both encoding and decoding are also included. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config BITS_TEST - tristate "KUnit test for bits.h" if !KUNIT_ALL_TESTS + tristate "KUnit test for bit functions and macros" if !KUNIT_ALL_TESTS depends on KUNIT default KUNIT_ALL_TESTS help diff --git a/lib/tests/Makefile b/lib/tests/Makefile index f7460831cfdd..601dba4b7d96 100644 --- a/lib/tests/Makefile +++ b/lib/tests/Makefile @@ -4,6 +4,7 @@ # KUnit tests CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) +obj-$(CONFIG_BASE64_KUNIT) += base64_kunit.o obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_BITS_TEST) += test_bits.o obj-$(CONFIG_BLACKHOLE_DEV_KUNIT_TEST) += blackhole_dev_kunit.o diff --git a/lib/tests/base64_kunit.c b/lib/tests/base64_kunit.c new file mode 100644 index 000000000000..f7252070c359 --- /dev/null +++ b/lib/tests/base64_kunit.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * base64_kunit_test.c - KUnit tests for base64 encoding and decoding functions + * + * Copyright (c) 2025, Guan-Chun Wu <409411716@gms.tku.edu.tw> + */ + +#include +#include + +/* ---------- Benchmark helpers ---------- */ +static u64 bench_encode_ns(const u8 *data, int len, char *dst, int reps, + enum base64_variant variant) +{ + u64 t0, t1; + + t0 = ktime_get_ns(); + for (int i = 0; i < reps; i++) + base64_encode(data, len, dst, true, variant); + t1 = ktime_get_ns(); + + return div64_u64(t1 - t0, (u64)reps); +} + +static u64 bench_decode_ns(const char *data, int len, u8 *dst, int reps, + enum base64_variant variant) +{ + u64 t0, t1; + + t0 = ktime_get_ns(); + for (int i = 0; i < reps; i++) + base64_decode(data, len, dst, true, variant); + t1 = ktime_get_ns(); + + return div64_u64(t1 - t0, (u64)reps); +} + +static void run_perf_and_check(struct kunit *test, const char *label, int size, + enum base64_variant variant) +{ + const int reps = 1000; + size_t outlen = DIV_ROUND_UP(size, 3) * 4; + u8 *in = kmalloc(size, GFP_KERNEL); + char *enc = kmalloc(outlen, GFP_KERNEL); + u8 *decoded = kmalloc(size, GFP_KERNEL); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, in); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, enc); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, decoded); + + get_random_bytes(in, size); + int enc_len = base64_encode(in, size, enc, true, variant); + int dec_len = base64_decode(enc, enc_len, decoded, true, variant); + + /* correctness sanity check */ + KUNIT_EXPECT_EQ(test, dec_len, size); + KUNIT_EXPECT_MEMEQ(test, decoded, in, size); + + /* benchmark encode */ + + u64 t1 = bench_encode_ns(in, size, enc, reps, variant); + + kunit_info(test, "[%s] encode run : %lluns", label, t1); + + u64 t2 = bench_decode_ns(enc, enc_len, decoded, reps, variant); + + kunit_info(test, "[%s] decode run : %lluns", label, t2); + + kfree(in); + kfree(enc); + kfree(decoded); +} + +static void base64_performance_tests(struct kunit *test) +{ + /* run on STD variant only */ + run_perf_and_check(test, "64B", 64, BASE64_STD); + run_perf_and_check(test, "1KB", 1024, BASE64_STD); +} + +/* ---------- Helpers for encode ---------- */ +static void expect_encode_ok(struct kunit *test, const u8 *src, int srclen, + const char *expected, bool padding, + enum base64_variant variant) +{ + char buf[128]; + int encoded_len = base64_encode(src, srclen, buf, padding, variant); + + buf[encoded_len] = '\0'; + + KUNIT_EXPECT_EQ(test, encoded_len, strlen(expected)); + KUNIT_EXPECT_STREQ(test, buf, expected); +} + +/* ---------- Helpers for decode ---------- */ +static void expect_decode_ok(struct kunit *test, const char *src, + const u8 *expected, int expected_len, bool padding, + enum base64_variant variant) +{ + u8 buf[128]; + int decoded_len = base64_decode(src, strlen(src), buf, padding, variant); + + KUNIT_EXPECT_EQ(test, decoded_len, expected_len); + KUNIT_EXPECT_MEMEQ(test, buf, expected, expected_len); +} + +static void expect_decode_err(struct kunit *test, const char *src, + int srclen, bool padding, + enum base64_variant variant) +{ + u8 buf[64]; + int decoded_len = base64_decode(src, srclen, buf, padding, variant); + + KUNIT_EXPECT_EQ(test, decoded_len, -1); +} + +/* ---------- Encode Tests ---------- */ +static void base64_std_encode_tests(struct kunit *test) +{ + /* With padding */ + expect_encode_ok(test, (const u8 *)"", 0, "", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"f", 1, "Zg==", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"fo", 2, "Zm8=", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"foo", 3, "Zm9v", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"foob", 4, "Zm9vYg==", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"fooba", 5, "Zm9vYmE=", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"foobar", 6, "Zm9vYmFy", true, BASE64_STD); + + /* Extra cases with padding */ + expect_encode_ok(test, (const u8 *)"Hello, world!", 13, "SGVsbG8sIHdvcmxkIQ==", + true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26, + "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo=", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26, + "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=", true, BASE64_STD); + expect_encode_ok(test, (const u8 *)"0123456789+/", 12, "MDEyMzQ1Njc4OSsv", + true, BASE64_STD); + + /* Without padding */ + expect_encode_ok(test, (const u8 *)"", 0, "", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"f", 1, "Zg", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"fo", 2, "Zm8", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"foo", 3, "Zm9v", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"foob", 4, "Zm9vYg", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"fooba", 5, "Zm9vYmE", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"foobar", 6, "Zm9vYmFy", false, BASE64_STD); + + /* Extra cases without padding */ + expect_encode_ok(test, (const u8 *)"Hello, world!", 13, "SGVsbG8sIHdvcmxkIQ", + false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26, + "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26, + "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo", false, BASE64_STD); + expect_encode_ok(test, (const u8 *)"0123456789+/", 12, "MDEyMzQ1Njc4OSsv", + false, BASE64_STD); +} + +/* ---------- Decode Tests ---------- */ +static void base64_std_decode_tests(struct kunit *test) +{ + /* -------- With padding --------*/ + expect_decode_ok(test, "", (const u8 *)"", 0, true, BASE64_STD); + expect_decode_ok(test, "Zg==", (const u8 *)"f", 1, true, BASE64_STD); + expect_decode_ok(test, "Zm8=", (const u8 *)"fo", 2, true, BASE64_STD); + expect_decode_ok(test, "Zm9v", (const u8 *)"foo", 3, true, BASE64_STD); + expect_decode_ok(test, "Zm9vYg==", (const u8 *)"foob", 4, true, BASE64_STD); + expect_decode_ok(test, "Zm9vYmE=", (const u8 *)"fooba", 5, true, BASE64_STD); + expect_decode_ok(test, "Zm9vYmFy", (const u8 *)"foobar", 6, true, BASE64_STD); + expect_decode_ok(test, "SGVsbG8sIHdvcmxkIQ==", (const u8 *)"Hello, world!", 13, + true, BASE64_STD); + expect_decode_ok(test, "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo=", + (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26, true, BASE64_STD); + expect_decode_ok(test, "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo=", + (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26, true, BASE64_STD); + + /* Error cases */ + expect_decode_err(test, "Zg=!", 4, true, BASE64_STD); + expect_decode_err(test, "Zm$=", 4, true, BASE64_STD); + expect_decode_err(test, "Z===", 4, true, BASE64_STD); + expect_decode_err(test, "Zg", 2, true, BASE64_STD); + expect_decode_err(test, "Zm9v====", 8, true, BASE64_STD); + expect_decode_err(test, "Zm==A", 5, true, BASE64_STD); + + { + char with_nul[4] = { 'Z', 'g', '\0', '=' }; + + expect_decode_err(test, with_nul, 4, true, BASE64_STD); + } + + /* -------- Without padding --------*/ + expect_decode_ok(test, "", (const u8 *)"", 0, false, BASE64_STD); + expect_decode_ok(test, "Zg", (const u8 *)"f", 1, false, BASE64_STD); + expect_decode_ok(test, "Zm8", (const u8 *)"fo", 2, false, BASE64_STD); + expect_decode_ok(test, "Zm9v", (const u8 *)"foo", 3, false, BASE64_STD); + expect_decode_ok(test, "Zm9vYg", (const u8 *)"foob", 4, false, BASE64_STD); + expect_decode_ok(test, "Zm9vYmE", (const u8 *)"fooba", 5, false, BASE64_STD); + expect_decode_ok(test, "Zm9vYmFy", (const u8 *)"foobar", 6, false, BASE64_STD); + expect_decode_ok(test, "TWFu", (const u8 *)"Man", 3, false, BASE64_STD); + expect_decode_ok(test, "SGVsbG8sIHdvcmxkIQ", (const u8 *)"Hello, world!", 13, + false, BASE64_STD); + expect_decode_ok(test, "QUJDREVGR0hJSktMTU5PUFFSU1RVVldYWVo", + (const u8 *)"ABCDEFGHIJKLMNOPQRSTUVWXYZ", 26, false, BASE64_STD); + expect_decode_ok(test, "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo", + (const u8 *)"abcdefghijklmnopqrstuvwxyz", 26, false, BASE64_STD); + expect_decode_ok(test, "MDEyMzQ1Njc4OSsv", (const u8 *)"0123456789+/", 12, + false, BASE64_STD); + + /* Error cases */ + expect_decode_err(test, "Zg=!", 4, false, BASE64_STD); + expect_decode_err(test, "Zm$=", 4, false, BASE64_STD); + expect_decode_err(test, "Z===", 4, false, BASE64_STD); + expect_decode_err(test, "Zg=", 3, false, BASE64_STD); + expect_decode_err(test, "Zm9v====", 8, false, BASE64_STD); + expect_decode_err(test, "Zm==v", 4, false, BASE64_STD); + + { + char with_nul[4] = { 'Z', 'g', '\0', '=' }; + + expect_decode_err(test, with_nul, 4, false, BASE64_STD); + } +} + +/* ---------- Variant tests (URLSAFE / IMAP) ---------- */ +static void base64_variant_tests(struct kunit *test) +{ + const u8 sample1[] = { 0x00, 0xfb, 0xff, 0x7f, 0x80 }; + char std_buf[128], url_buf[128], imap_buf[128]; + u8 back[128]; + int n_std, n_url, n_imap, m; + int i; + + n_std = base64_encode(sample1, sizeof(sample1), std_buf, false, BASE64_STD); + n_url = base64_encode(sample1, sizeof(sample1), url_buf, false, BASE64_URLSAFE); + std_buf[n_std] = '\0'; + url_buf[n_url] = '\0'; + + for (i = 0; i < n_std; i++) { + if (std_buf[i] == '+') + std_buf[i] = '-'; + else if (std_buf[i] == '/') + std_buf[i] = '_'; + } + KUNIT_EXPECT_STREQ(test, std_buf, url_buf); + + m = base64_decode(url_buf, n_url, back, false, BASE64_URLSAFE); + KUNIT_EXPECT_EQ(test, m, (int)sizeof(sample1)); + KUNIT_EXPECT_MEMEQ(test, back, sample1, sizeof(sample1)); + + n_std = base64_encode(sample1, sizeof(sample1), std_buf, false, BASE64_STD); + n_imap = base64_encode(sample1, sizeof(sample1), imap_buf, false, BASE64_IMAP); + std_buf[n_std] = '\0'; + imap_buf[n_imap] = '\0'; + + for (i = 0; i < n_std; i++) + if (std_buf[i] == '/') + std_buf[i] = ','; + KUNIT_EXPECT_STREQ(test, std_buf, imap_buf); + + m = base64_decode(imap_buf, n_imap, back, false, BASE64_IMAP); + KUNIT_EXPECT_EQ(test, m, (int)sizeof(sample1)); + KUNIT_EXPECT_MEMEQ(test, back, sample1, sizeof(sample1)); + + { + const char *bad = "Zg=="; + u8 tmp[8]; + + m = base64_decode(bad, strlen(bad), tmp, false, BASE64_URLSAFE); + KUNIT_EXPECT_EQ(test, m, -1); + + m = base64_decode(bad, strlen(bad), tmp, false, BASE64_IMAP); + KUNIT_EXPECT_EQ(test, m, -1); + } +} + +/* ---------- Test registration ---------- */ +static struct kunit_case base64_test_cases[] = { + KUNIT_CASE(base64_performance_tests), + KUNIT_CASE(base64_std_encode_tests), + KUNIT_CASE(base64_std_decode_tests), + KUNIT_CASE(base64_variant_tests), + {} +}; + +static struct kunit_suite base64_test_suite = { + .name = "base64", + .test_cases = base64_test_cases, +}; + +kunit_test_suite(base64_test_suite); + +MODULE_AUTHOR("Guan-Chun Wu <409411716@gms.tku.edu.tw>"); +MODULE_DESCRIPTION("KUnit tests for Base64 encoding/decoding, including performance checks"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From ff713698bad2e7d052960cf182fa1ab465564dfd Mon Sep 17 00:00:00 2001 From: Chia-Liang Wang Date: Wed, 19 Nov 2025 18:11:44 +0800 Subject: lib: ratelimit: fix spelling mistake 'seperately' Corrects a spelling mistake in a comment in ratelimit.c where 'seperately' was used instead of 'separately'. Link: https://lkml.kernel.org/r/20251119101144.3175-1-a0979625527@icloud.com Signed-off-by: Chia-Liang Wang Signed-off-by: Andrew Morton --- lib/ratelimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ratelimit.c b/lib/ratelimit.c index 859c251b23ce..e2d65d3b1c35 100644 --- a/lib/ratelimit.c +++ b/lib/ratelimit.c @@ -27,7 +27,7 @@ int ___ratelimit(struct ratelimit_state *rs, const char *func) { /* Paired with WRITE_ONCE() in .proc_handler(). - * Changing two values seperately could be inconsistent + * Changing two values separately could be inconsistent * and some message could be lost. (See: net_ratelimit_state). */ int interval = READ_ONCE(rs->interval); -- cgit v1.2.3 From c2d2dad24503d7e2eb7cba354fcc73f95fa78d7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Nov 2025 14:06:45 +0000 Subject: rbtree: inline rb_first() Patch series "rbree: inline rb_first() and rb_last()". Inline these two small helpers, heavily used in TCP and FQ packet scheduler, and in many other places. This reduces kernel text size, and brings an 1.5 % improvement on network TCP stress test. This patch (of 2): This is a very small function, inlining it saves cpu cycles by reducing register pressure and removing call/ret overhead. It also reduces vmlinux text size by 744 bytes on a typical x86_64 build. Before: size vmlinux text data bss dec hex filename 34812525 22177365 5685248 62675138 3bc58c2 vmlinux After: size vmlinux text data bss dec hex filename 34811781 22177365 5685248 62674394 3bc55da vmlinux [ojeda@kernel.org: fix rust build] Link: https://lkml.kernel.org/r/20251120085518.1463498-1-ojeda@kernel.org Link: https://lkml.kernel.org/r/20251114140646.3817319-1-edumazet@google.com Link: https://lkml.kernel.org/r/20251114140646.3817319-2-edumazet@google.com Signed-off-by: Eric Dumazet Signed-off-by: Miguel Ojeda Reviewed-by: Kuan-Wei Chiu Cc: Jakub Kacinski Cc: Neal Cardwell Cc: Paolo Abeni Cc: Alice Ryhl Cc: Stehen Rothwell Signed-off-by: Andrew Morton --- lib/rbtree.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'lib') diff --git a/lib/rbtree.c b/lib/rbtree.c index 5114eda6309c..b946eb4b759d 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -460,22 +460,6 @@ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, } EXPORT_SYMBOL(__rb_insert_augmented); -/* - * This function returns the first node (in sort order) of the tree. - */ -struct rb_node *rb_first(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_left) - n = n->rb_left; - return n; -} -EXPORT_SYMBOL(rb_first); - struct rb_node *rb_last(const struct rb_root *root) { struct rb_node *n; -- cgit v1.2.3 From 94984bfed58ca129f7e259ce09973ed0b3f540a8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 14 Nov 2025 14:06:46 +0000 Subject: rbtree: inline rb_last() This is a very small function, inlining it saves cpu cycles in TCP by reducing register pressure and removing call/ret overhead. It also reduces vmlinux text size by 122 bytes on a typical x86_64 build. Before: size vmlinux text data bss dec hex filename 34811781 22177365 5685248 62674394 3bc55da vmlinux After: size vmlinux text data bss dec hex filename 34811659 22177365 5685248 62674272 3bc5560 vmlinux [ojeda@kernel.org: fix rust build] Link: https://lkml.kernel.org/r/20251120085518.1463498-1-ojeda@kernel.org Link: https://lkml.kernel.org/r/20251114140646.3817319-3-edumazet@google.com Signed-off-by: Eric Dumazet Signed-off-by: Miguel Ojeda Reviewed-by: Kuan-Wei Chiu Cc: Jakub Kacinski Cc: Neal Cardwell Cc: Paolo Abeni Cc: Alice Ryhl Cc: Stehen Rothwell Signed-off-by: Andrew Morton --- lib/rbtree.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'lib') diff --git a/lib/rbtree.c b/lib/rbtree.c index b946eb4b759d..18d42bcf4ec9 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -460,19 +460,6 @@ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, } EXPORT_SYMBOL(__rb_insert_augmented); -struct rb_node *rb_last(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_right) - n = n->rb_right; - return n; -} -EXPORT_SYMBOL(rb_last); - struct rb_node *rb_next(const struct rb_node *node) { struct rb_node *parent; -- cgit v1.2.3 From 70f9133096c833922c3b63461480248cefa7bb0f Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sat, 1 Nov 2025 10:23:18 -0400 Subject: kho: drop notifiers The KHO framework uses a notifier chain as the mechanism for clients to participate in the finalization process. While this works for a single, central state machine, it is too restrictive for kernel-internal components like pstore/reserve_mem or IMA. These components need a simpler, direct way to register their state for preservation (e.g., during their initcall) without being part of a complex, shutdown-time notifier sequence. The notifier model forces all participants into a single finalization flow and makes direct preservation from an arbitrary context difficult. This patch refactors the client participation model by removing the notifier chain and introducing a direct API for managing FDT subtrees. The core kho_finalize() and kho_abort() state machine remains, but clients now register their data with KHO beforehand. Link: https://lkml.kernel.org/r/20251101142325.1326536-3-pasha.tatashin@soleen.com Signed-off-by: Mike Rapoport (Microsoft) Co-developed-by: Pasha Tatashin Signed-off-by: Pasha Tatashin Cc: Alexander Graf Cc: Changyuan Lyu Cc: Christian Brauner Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Simon Horman Cc: Tejun Heo Cc: Zhu Yanjun Signed-off-by: Andrew Morton --- lib/test_kho.c | 35 +++-------------------------------- 1 file changed, 3 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/test_kho.c b/lib/test_kho.c index fff018e5548d..27618c5b4796 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -39,33 +39,6 @@ struct kho_test_state { static struct kho_test_state kho_test_state; -static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, - void *v) -{ - struct kho_test_state *state = &kho_test_state; - struct kho_serialization *ser = v; - int err = 0; - - switch (cmd) { - case KEXEC_KHO_ABORT: - return NOTIFY_DONE; - case KEXEC_KHO_FINALIZE: - /* Handled below */ - break; - default: - return NOTIFY_BAD; - } - - err |= kho_preserve_folio(state->fdt); - err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); - - return err ? NOTIFY_BAD : NOTIFY_DONE; -} - -static struct notifier_block kho_test_nb = { - .notifier_call = kho_test_notifier, -}; - static int kho_test_save_data(struct kho_test_state *state, void *fdt) { phys_addr_t *folios_info __free(kvfree) = NULL; @@ -120,6 +93,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state) fdt = folio_address(state->fdt); + err |= kho_preserve_folio(state->fdt); err |= fdt_create(fdt, fdt_size); err |= fdt_finish_reservemap(fdt); @@ -131,6 +105,7 @@ static int kho_test_prepare_fdt(struct kho_test_state *state) err |= fdt_finish(fdt); + err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt)); if (err) folio_put(state->fdt); @@ -203,10 +178,6 @@ static int kho_test_save(void) if (err) goto err_free_folios; - err = register_kho_notifier(&kho_test_nb); - if (err) - goto err_free_fdt; - return 0; err_free_fdt: @@ -329,7 +300,7 @@ static void kho_test_cleanup(void) static void __exit kho_test_exit(void) { - unregister_kho_notifier(&kho_test_nb); + kho_remove_subtree(folio_address(kho_test_state.fdt)); kho_test_cleanup(); } module_exit(kho_test_exit); -- cgit v1.2.3 From ce405ed5102018946f20b69c1e7ae49697dcf616 Mon Sep 17 00:00:00 2001 From: Pasha Tatashin Date: Sat, 1 Nov 2025 10:23:21 -0400 Subject: test_kho: unpreserve memory in case of error If there is an error half way through KHO memory preservation, we should rollback and unpreserve everything that is partially preserved. Link: https://lkml.kernel.org/r/20251101142325.1326536-6-pasha.tatashin@soleen.com Co-developed-by: Mike Rapoport (Microsoft) Signed-off-by: Mike Rapoport (Microsoft) Signed-off-by: Pasha Tatashin Cc: Alexander Graf Cc: Changyuan Lyu Cc: Christian Brauner Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Masahiro Yamada Cc: Miguel Ojeda Cc: Pratyush Yadav Cc: Randy Dunlap Cc: Simon Horman Cc: Tejun Heo Cc: Zhu Yanjun Signed-off-by: Andrew Morton --- lib/test_kho.c | 103 ++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/test_kho.c b/lib/test_kho.c index 27618c5b4796..85b60d87a50a 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -33,17 +33,28 @@ struct kho_test_state { unsigned int nr_folios; struct folio **folios; phys_addr_t *folios_info; + struct kho_vmalloc folios_info_phys; + int nr_folios_preserved; struct folio *fdt; __wsum csum; }; static struct kho_test_state kho_test_state; -static int kho_test_save_data(struct kho_test_state *state, void *fdt) +static void kho_test_unpreserve_data(struct kho_test_state *state) +{ + for (int i = 0; i < state->nr_folios_preserved; i++) + kho_unpreserve_folio(state->folios[i]); + + kho_unpreserve_vmalloc(&state->folios_info_phys); + vfree(state->folios_info); +} + +static int kho_test_preserve_data(struct kho_test_state *state) { - phys_addr_t *folios_info __free(kvfree) = NULL; struct kho_vmalloc folios_info_phys; - int err = 0; + phys_addr_t *folios_info; + int err; folios_info = vmalloc_array(state->nr_folios, sizeof(*folios_info)); if (!folios_info) @@ -51,64 +62,98 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt) err = kho_preserve_vmalloc(folios_info, &folios_info_phys); if (err) - return err; + goto err_free_info; + + state->folios_info_phys = folios_info_phys; + state->folios_info = folios_info; for (int i = 0; i < state->nr_folios; i++) { struct folio *folio = state->folios[i]; unsigned int order = folio_order(folio); folios_info[i] = virt_to_phys(folio_address(folio)) | order; - err = kho_preserve_folio(folio); if (err) - break; + goto err_unpreserve; + state->nr_folios_preserved++; } + return 0; + +err_unpreserve: + /* + * kho_test_unpreserve_data frees folio_info, bail out immediately to + * avoid double free + */ + kho_test_unpreserve_data(state); + return err; + +err_free_info: + vfree(folios_info); + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state, ssize_t fdt_size) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + void *fdt = folio_address(state->fdt); + int err; + + err = fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= fdt_begin_node(fdt, "data"); err |= fdt_property(fdt, "nr_folios", &state->nr_folios, sizeof(state->nr_folios)); - err |= fdt_property(fdt, "folios_info", &folios_info_phys, - sizeof(folios_info_phys)); + err |= fdt_property(fdt, "folios_info", &state->folios_info_phys, + sizeof(state->folios_info_phys)); err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); err |= fdt_end_node(fdt); - if (!err) - state->folios_info = no_free_ptr(folios_info); + err |= fdt_end_node(fdt); + err |= fdt_finish(fdt); return err; } -static int kho_test_prepare_fdt(struct kho_test_state *state) +static int kho_test_preserve(struct kho_test_state *state) { - const char compatible[] = KHO_TEST_COMPAT; - unsigned int magic = KHO_TEST_MAGIC; ssize_t fdt_size; - int err = 0; - void *fdt; + int err; fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); if (!state->fdt) return -ENOMEM; - fdt = folio_address(state->fdt); - - err |= kho_preserve_folio(state->fdt); - err |= fdt_create(fdt, fdt_size); - err |= fdt_finish_reservemap(fdt); + err = kho_preserve_folio(state->fdt); + if (err) + goto err_free_fdt; - err |= fdt_begin_node(fdt, ""); - err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); - err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); - err |= kho_test_save_data(state, fdt); - err |= fdt_end_node(fdt); + err = kho_test_preserve_data(state); + if (err) + goto err_unpreserve_fdt; - err |= fdt_finish(fdt); + err = kho_test_prepare_fdt(state, fdt_size); + if (err) + goto err_unpreserve_data; err = kho_add_subtree(KHO_TEST_FDT, folio_address(state->fdt)); if (err) - folio_put(state->fdt); + goto err_unpreserve_data; + return 0; + +err_unpreserve_data: + kho_test_unpreserve_data(state); +err_unpreserve_fdt: + kho_unpreserve_folio(state->fdt); +err_free_fdt: + folio_put(state->fdt); return err; } @@ -174,14 +219,12 @@ static int kho_test_save(void) if (err) goto err_free_folios; - err = kho_test_prepare_fdt(state); + err = kho_test_preserve(state); if (err) goto err_free_folios; return 0; -err_free_fdt: - folio_put(state->fdt); err_free_folios: kvfree(folios); return err; -- cgit v1.2.3 From 11047466eff28cb3d3422622166931204ed7d502 Mon Sep 17 00:00:00 2001 From: Pratyush Yadav Date: Tue, 18 Nov 2025 19:10:45 +0100 Subject: test_kho: always print restore status Currently the KHO test only prints a message on success, and remains silent on failure. This makes it difficult to notice a failing test. A failing test is usually more interesting than a successful one. Always print the test status after attempting restore. Link: https://lkml.kernel.org/r/20251118181046.23321-1-pratyush@kernel.org Signed-off-by: Pratyush Yadav Reviewed-by: Pasha Tatashin Reviewed-by: Mike Rapoport (Microsoft) Acked-by: SeongJae Park Cc: Alexander Graf Cc: Pratyush Yadav Signed-off-by: Andrew Morton --- lib/test_kho.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/test_kho.c b/lib/test_kho.c index 85b60d87a50a..47de56280795 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -306,7 +306,6 @@ static int kho_test_restore(phys_addr_t fdt_phys) if (err) return err; - pr_info("KHO restore succeeded\n"); return 0; } @@ -319,8 +318,15 @@ static int __init kho_test_init(void) return 0; err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); - if (!err) - return kho_test_restore(fdt_phys); + if (!err) { + err = kho_test_restore(fdt_phys); + if (err) + pr_err("KHO restore failed\n"); + else + pr_info("KHO restore succeeded\n"); + + return err; + } if (err != -ENOENT) { pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); -- cgit v1.2.3