From b5960a06b90eeba147c50c2d14de57b923371651 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 4 Apr 2025 15:10:03 -0700 Subject: vsprintf: Use __diag macros to disable '-Wsuggest-attribute=format' The GCC specific warning '-Wsuggest-attribute=format' is disabled around va_format() using raw #pragma statements, which includes an '#ifndef __clang__' to avoid a warning about an unknown warning option from clang (which recognizes '#pragma GCC' for compatibility reasons): lib/vsprintf.c:1703:32: error: unknown warning group '-Wsuggest-attribute=format', ignored [-Werror,-Wunknown-warning-option] 1703 | #pragma GCC diagnostic ignored "-Wsuggest-attribute=format" | ^ While the current solution works, it is not visually appealing. The kernel already has some infrastructure that wraps these #pragma statements to give more specific control over diagnostics without needing #ifdef blocks for different compilers. Convert the existing statements over to the __diag macros. Closes: https://lore.kernel.org/r/CAHk-=wgfX9nBGE0Ap9GjhOy7Mn=RSy=rx0MvqfYFFDx31KJXqQ@mail.gmail.com Signed-off-by: Nathan Chancellor Tested-by: Andy Shevchenko Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250404-vsprintf-convert-pragmas-to-__diag-v1-2-5d6c5c55b2bd@kernel.org Signed-off-by: Petr Mladek --- lib/vsprintf.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a2195bc81723..8a6cdee0d4ad 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1699,10 +1699,9 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } -#pragma GCC diagnostic push -#ifndef __clang__ -#pragma GCC diagnostic ignored "-Wsuggest-attribute=format" -#endif +__diag_push(); +__diag_ignore(GCC, all, "-Wsuggest-attribute=format", + "Not a valid __printf() conversion candidate."); static char *va_format(char *buf, char *end, struct va_format *va_fmt, struct printf_spec spec) { @@ -1717,7 +1716,7 @@ static char *va_format(char *buf, char *end, struct va_format *va_fmt, return buf; } -#pragma GCC diagnostic pop +__diag_pop(); static noinline_for_stack char *uuid_string(char *buf, char *end, const u8 *addr, -- cgit v1.2.3 From de1c831a7898f164c1c2703c6b2b9e4fb4bebefc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 15 Apr 2025 10:02:33 -0700 Subject: slab: Decouple slab_debug and no_hash_pointers Some system owners use slab_debug=FPZ (or similar) as a hardening option, but do not want to be forced into having kernel addresses exposed due to the implicit "no_hash_pointers" boot param setting.[1] Introduce the "hash_pointers" boot param, which defaults to "auto" (the current behavior), but also includes "always" (forcing on hashing even when "slab_debug=..." is defined), and "never". The existing "no_hash_pointers" boot param becomes an alias for "hash_pointers=never". This makes it possible to boot with "slab_debug=FPZ hash_pointers=always". Link: https://github.com/KSPP/linux/issues/368 [1] Fixes: 792702911f58 ("slub: force on no_hash_pointers when slub_debug is enabled") Co-developed-by: Sergio Perez Gonzalez Signed-off-by: Sergio Perez Gonzalez Acked-by: Vlastimil Babka Acked-by: David Rientjes Reviewed-by: Bagas Sanjaya Signed-off-by: Kees Cook Reviewed-by: Harry Yoo Acked-by: Rafael Aquini Tested-by: Petr Mladek Reviewed-by: Petr Mladek Link: https://patch.msgid.link/20250415170232.it.467-kees@kernel.org [kees@kernel.org: Add note about hash_pointers into slab_debug kernel parameter documentation.] Signed-off-by: Petr Mladek --- lib/vsprintf.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 01699852f30c..22cbd75266ef 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -60,6 +60,20 @@ bool no_hash_pointers __ro_after_init; EXPORT_SYMBOL_GPL(no_hash_pointers); +/* + * Hashed pointers policy selected by "hash_pointers=..." boot param + * + * `auto` - Hashed pointers enabled unless disabled by slub_debug_enabled=true + * `always` - Hashed pointers enabled unconditionally + * `never` - Hashed pointers disabled unconditionally + */ +enum hash_pointers_policy { + HASH_PTR_AUTO = 0, + HASH_PTR_ALWAYS, + HASH_PTR_NEVER +}; +static enum hash_pointers_policy hash_pointers_mode __initdata; + noinline static unsigned long long simple_strntoull(const char *startp, char **endp, unsigned int base, size_t max_chars) { @@ -2271,12 +2285,23 @@ char *resource_or_range(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); } -int __init no_hash_pointers_enable(char *str) +void __init hash_pointers_finalize(bool slub_debug) { - if (no_hash_pointers) - return 0; + switch (hash_pointers_mode) { + case HASH_PTR_ALWAYS: + no_hash_pointers = false; + break; + case HASH_PTR_NEVER: + no_hash_pointers = true; + break; + case HASH_PTR_AUTO: + default: + no_hash_pointers = slub_debug; + break; + } - no_hash_pointers = true; + if (!no_hash_pointers) + return; pr_warn("**********************************************************\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); @@ -2289,11 +2314,39 @@ int __init no_hash_pointers_enable(char *str) pr_warn("** the kernel, report this immediately to your system **\n"); pr_warn("** administrator! **\n"); pr_warn("** **\n"); + pr_warn("** Use hash_pointers=always to force this mode off **\n"); + pr_warn("** **\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("**********************************************************\n"); +} + +static int __init hash_pointers_mode_parse(char *str) +{ + if (!str) { + pr_warn("Hash pointers mode empty; falling back to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "auto", 4) == 0) { + pr_info("Hash pointers mode set to auto.\n"); + hash_pointers_mode = HASH_PTR_AUTO; + } else if (strncmp(str, "never", 5) == 0) { + pr_info("Hash pointers mode set to never.\n"); + hash_pointers_mode = HASH_PTR_NEVER; + } else if (strncmp(str, "always", 6) == 0) { + pr_info("Hash pointers mode set to always.\n"); + hash_pointers_mode = HASH_PTR_ALWAYS; + } else { + pr_warn("Unknown hash_pointers mode '%s' specified; assuming auto.\n", str); + hash_pointers_mode = HASH_PTR_AUTO; + } return 0; } +early_param("hash_pointers", hash_pointers_mode_parse); + +static int __init no_hash_pointers_enable(char *str) +{ + return hash_pointers_mode_parse("never"); +} early_param("no_hash_pointers", no_hash_pointers_enable); /* -- cgit v1.2.3 From e795000e755c309d1f9bd2a0590eca38b4625f3a Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 16 Jun 2025 15:22:44 -0400 Subject: mul_u64_u64_div_u64: fix the division-by-zero behavior MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current implementation forces a compile-time 1/0 division, which generates an undefined instruction (ud2 on x86) rather than a proper runtime division-by-zero exception. Change to trigger an actual div-by-0 exception at runtime, consistent with other division operations. Use a non-1 dividend to prevent the compiler from optimizing the division into a comparison. Link: https://lkml.kernel.org/r/q246p466-1453-qon9-29so-37105116009q@onlyvoer.pbz Signed-off-by: Nicolas Pitre Cc: Biju Das Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Weißschuh Cc: Uwe Kleine-König Cc: David Laight Signed-off-by: Andrew Morton --- lib/math/div64.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/math/div64.c b/lib/math/div64.c index 5faa29208bdb..bf77b9843175 100644 --- a/lib/math/div64.c +++ b/lib/math/div64.c @@ -212,12 +212,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c) #endif - /* make sure c is not zero, trigger exception otherwise */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdiv-by-zero" - if (unlikely(c == 0)) - return 1/0; -#pragma GCC diagnostic pop + /* make sure c is not zero, trigger runtime exception otherwise */ + if (unlikely(c == 0)) { + unsigned long zero = 0; + + OPTIMIZER_HIDE_VAR(zero); + return ~0UL/zero; + } int shift = __builtin_ctzll(c); -- cgit v1.2.3 From 1857fcc847443b0238cb64584b43d8c3a9049a0a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 17 Mar 2025 17:02:28 +0800 Subject: lib/raid6: replace custom zero page with ZERO_PAGE Use the system-wide zero page instead of a custom zero page. [herbert@gondor.apana.org.au: update lib/raid6/recov_rvv.c, per Klara] Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Link: https://lkml.kernel.org/r/Z9flJNkWQICx0PXk@gondor.apana.org.au Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- lib/raid6/algos.c | 3 --- lib/raid6/recov.c | 6 +++--- lib/raid6/recov_avx2.c | 6 +++--- lib/raid6/recov_avx512.c | 6 +++--- lib/raid6/recov_loongarch_simd.c | 12 ++++++------ lib/raid6/recov_neon.c | 6 +++--- lib/raid6/recov_s390xc.c | 6 +++--- lib/raid6/recov_ssse3.c | 6 +++--- 8 files changed, 24 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 75ce3e134b7c..799e0e5eac26 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -18,9 +18,6 @@ #else #include #include -/* In .bss so it's zeroed */ -const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); -EXPORT_SYMBOL(raid6_empty_zero_page); #endif struct raid6_calls raid6_call; diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index a7c1b2bbe40d..b5e47c008b41 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -31,10 +31,10 @@ static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -72,7 +72,7 @@ static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c index 4e8095403ee2..97d598d2535c 100644 --- a/lib/raid6/recov_avx2.c +++ b/lib/raid6/recov_avx2.c @@ -28,10 +28,10 @@ static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -196,7 +196,7 @@ static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_avx512.c b/lib/raid6/recov_avx512.c index 310c715db313..7986120ca444 100644 --- a/lib/raid6/recov_avx512.c +++ b/lib/raid6/recov_avx512.c @@ -37,10 +37,10 @@ static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila, */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -238,7 +238,7 @@ static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila, */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c index 94aeac85e6f7..93dc515997a1 100644 --- a/lib/raid6/recov_loongarch_simd.c +++ b/lib/raid6/recov_loongarch_simd.c @@ -42,10 +42,10 @@ static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -197,7 +197,7 @@ static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -316,10 +316,10 @@ static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -436,7 +436,7 @@ static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 1bfc14174d4d..70e1404c1512 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -36,10 +36,10 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -74,7 +74,7 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c index 179eec900cea..1d32c01261be 100644 --- a/lib/raid6/recov_s390xc.c +++ b/lib/raid6/recov_s390xc.c @@ -35,10 +35,10 @@ static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -82,7 +82,7 @@ static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 4bfa3c6b60de..2e849185c32b 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -30,10 +30,10 @@ static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, Use the dead data pages as temporary storage for delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, /* Compute syndrome with zero for the missing data page Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks-1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); -- cgit v1.2.3 From caf728dfa7789f7096e8cd4341b4bf8f671f5c1d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 20 Jun 2025 13:19:04 +0200 Subject: lib: test_objagg: split test_hints_case() into two functions With sanitizers enabled, this function uses a lot of stack, causing a harmless warning: lib/test_objagg.c: In function 'test_hints_case.constprop': lib/test_objagg.c:994:1: error: the frame size of 1440 bytes is larger than 1408 bytes [-Werror=frame-larger-than=] Most of this is from the two 'struct world' structures. Since most of the work in this function is duplicated for the two, split it up into separate functions that each use one of them. The combined stack usage is still the same here, but there is no warning any more, and the code is still safe because of the known call chain. Link: https://lkml.kernel.org/r/20250620111907.3395296-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Cc: Jiri Pirko Signed-off-by: Andrew Morton --- lib/test_objagg.c | 77 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 45 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/test_objagg.c b/lib/test_objagg.c index 222b39fc2629..ce5c4c36a084 100644 --- a/lib/test_objagg.c +++ b/lib/test_objagg.c @@ -908,50 +908,22 @@ static int check_expect_hints_stats(struct objagg_hints *objagg_hints, return err; } -static int test_hints_case(const struct hints_case *hints_case) +static int test_hints_case2(const struct hints_case *hints_case, + struct objagg_hints *hints, struct objagg *objagg) { struct objagg_obj *objagg_obj; - struct objagg_hints *hints; struct world world2 = {}; - struct world world = {}; struct objagg *objagg2; - struct objagg *objagg; const char *errmsg; int i; int err; - objagg = objagg_create(&delta_ops, NULL, &world); - if (IS_ERR(objagg)) - return PTR_ERR(objagg); - - for (i = 0; i < hints_case->key_ids_count; i++) { - objagg_obj = world_obj_get(&world, objagg, - hints_case->key_ids[i]); - if (IS_ERR(objagg_obj)) { - err = PTR_ERR(objagg_obj); - goto err_world_obj_get; - } - } - - pr_debug_stats(objagg); - err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); - if (err) { - pr_err("Stats: %s\n", errmsg); - goto err_check_expect_stats; - } - - hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); - if (IS_ERR(hints)) { - err = PTR_ERR(hints); - goto err_hints_get; - } - pr_debug_hints_stats(hints); err = check_expect_hints_stats(hints, &hints_case->expect_stats_hints, &errmsg); if (err) { pr_err("Hints stats: %s\n", errmsg); - goto err_check_expect_hints_stats; + return err; } objagg2 = objagg_create(&delta_ops, hints, &world2); @@ -983,7 +955,48 @@ err_world2_obj_get: world_obj_put(&world2, objagg, hints_case->key_ids[i]); i = hints_case->key_ids_count; objagg_destroy(objagg2); -err_check_expect_hints_stats: + + return err; +} + +static int test_hints_case(const struct hints_case *hints_case) +{ + struct objagg_obj *objagg_obj; + struct objagg_hints *hints; + struct world world = {}; + struct objagg *objagg; + const char *errmsg; + int i; + int err; + + objagg = objagg_create(&delta_ops, NULL, &world); + if (IS_ERR(objagg)) + return PTR_ERR(objagg); + + for (i = 0; i < hints_case->key_ids_count; i++) { + objagg_obj = world_obj_get(&world, objagg, + hints_case->key_ids[i]); + if (IS_ERR(objagg_obj)) { + err = PTR_ERR(objagg_obj); + goto err_world_obj_get; + } + } + + pr_debug_stats(objagg); + err = check_expect_stats(objagg, &hints_case->expect_stats, &errmsg); + if (err) { + pr_err("Stats: %s\n", errmsg); + goto err_check_expect_stats; + } + + hints = objagg_hints_get(objagg, OBJAGG_OPT_ALGO_SIMPLE_GREEDY); + if (IS_ERR(hints)) { + err = PTR_ERR(hints); + goto err_hints_get; + } + + err = test_hints_case2(hints_case, hints, objagg); + objagg_hints_put(hints); err_hints_get: err_check_expect_stats: -- cgit v1.2.3 From b76e89e50fc3693b7b8a443ed906320d8ccb93fd Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:01 +0800 Subject: panic: generalize panic_print's function to show sys info 'panic_print' was introduced to help debugging kernel panic by dumping different kinds of system information like tasks' call stack, memory, ftrace buffer, etc. Actually this function could also be used to help debugging other cases like task-hung, soft/hard lockup, etc. where user may need the snapshot of system info at that time. Extract system info dump function related code from panic.c to separate file sys_info.[ch], for wider usage by other kernel parts for debugging. Also modify the macro names about singulars/plurals. Link: https://lkml.kernel.org/r/20250703021004.42328-3-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Signed-off-by: Andrew Morton --- lib/Makefile | 2 +- lib/sys_info.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 lib/sys_info.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index c38582f187dd..88d6228089a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o win_minmax.o memcat_p.o \ - buildid.o objpool.o iomem_copy.o + buildid.o objpool.o iomem_copy.o sys_info.o lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o diff --git a/lib/sys_info.c b/lib/sys_info.c new file mode 100644 index 000000000000..53031e5cb98e --- /dev/null +++ b/lib/sys_info.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include + +#include + +void sys_info(unsigned long si_mask) +{ + if (si_mask & SYS_INFO_TASKS) + show_state(); + + if (si_mask & SYS_INFO_MEM) + show_mem(); + + if (si_mask & SYS_INFO_TIMERS) + sysrq_timer_list_show(); + + if (si_mask & SYS_INFO_LOCKS) + debug_show_all_locks(); + + if (si_mask & SYS_INFO_FTRACE) + ftrace_dump(DUMP_ALL); + + if (si_mask & SYS_INFO_ALL_CPU_BT) + trigger_all_cpu_backtrace(); + + if (si_mask & SYS_INFO_BLOCKED_TASKS) + show_state_filter(TASK_UNINTERRUPTIBLE); +} -- cgit v1.2.3 From d747755917bf8ae08f490c3fe7d8e321afab8127 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 3 Jul 2025 10:10:02 +0800 Subject: panic: add 'panic_sys_info' sysctl to take human readable string parameter Bitmap definition for 'panic_print' is hard to remember and decode. Add 'panic_sys_info='sysctl to take human readable string like "tasks,mem,timers,locks,ftrace,..." and translate it into bitmap. The detailed mapping is: SYS_INFO_TASKS "tasks" SYS_INFO_MEM "mem" SYS_INFO_TIMERS "timers" SYS_INFO_LOCKS "locks" SYS_INFO_FTRACE "ftrace" SYS_INFO_ALL_CPU_BT "all_bt" SYS_INFO_BLOCKED_TASKS "blocked_tasks" [nathan@kernel.org: add __maybe_unused to sys_info_avail] Link: https://lkml.kernel.org/r/20250708-fix-clang-sys_info_avail-warning-v1-1-60d239eacd64@kernel.org Link: https://lkml.kernel.org/r/20250703021004.42328-4-feng.tang@linux.alibaba.com Signed-off-by: Feng Tang Suggested-by: Petr Mladek Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Cc: Nathan Chancellor Cc: Andy Shevchenko Signed-off-by: Andrew Morton --- lib/sys_info.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 53031e5cb98e..5bf503fd7ec1 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -3,10 +3,100 @@ #include #include #include +#include #include #include +struct sys_info_name { + unsigned long bit; + const char *name; +}; + +/* + * When 'si_names' gets updated, please make sure the 'sys_info_avail' + * below is updated accordingly. + */ +static const struct sys_info_name si_names[] = { + { SYS_INFO_TASKS, "tasks" }, + { SYS_INFO_MEM, "mem" }, + { SYS_INFO_TIMERS, "timers" }, + { SYS_INFO_LOCKS, "locks" }, + { SYS_INFO_FTRACE, "ftrace" }, + { SYS_INFO_ALL_CPU_BT, "all_bt" }, + { SYS_INFO_BLOCKED_TASKS, "blocked_tasks" }, +}; + +/* Expecting string like "xxx_sys_info=tasks,mem,timers,locks,ftrace,..." */ +unsigned long sys_info_parse_param(char *str) +{ + unsigned long si_bits = 0; + char *s, *name; + int i; + + s = str; + while ((name = strsep(&s, ",")) && *name) { + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (!strcmp(name, si_names[i].name)) { + si_bits |= si_names[i].bit; + break; + } + } + } + + return si_bits; +} + +#ifdef CONFIG_SYSCTL + +static const char sys_info_avail[] __maybe_unused = "tasks,mem,timers,locks,ftrace,all_bt,blocked_tasks"; + +int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + char names[sizeof(sys_info_avail) + 1]; + struct ctl_table table; + unsigned long *si_bits_global; + + si_bits_global = ro_table->data; + + if (write) { + unsigned long si_bits; + int ret; + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + ret = proc_dostring(&table, write, buffer, lenp, ppos); + if (ret) + return ret; + + si_bits = sys_info_parse_param(names); + /* The access to the global value is not synchronized. */ + WRITE_ONCE(*si_bits_global, si_bits); + return 0; + } else { + /* for 'read' operation */ + char *delim = ""; + int i, len = 0; + + for (i = 0; i < ARRAY_SIZE(si_names); i++) { + if (*si_bits_global & si_names[i].bit) { + len += scnprintf(names + len, sizeof(names) - len, + "%s%s", delim, si_names[i].name); + delim = ","; + } + } + + table = *ro_table; + table.data = names; + table.maxlen = sizeof(names); + return proc_dostring(&table, write, buffer, lenp, ppos); + } +} +#endif + void sys_info(unsigned long si_mask) { if (si_mask & SYS_INFO_TASKS) -- cgit v1.2.3 From b3d5fd6f82dde8c906dc2a587003a44252ae5eae Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Fri, 6 Jun 2025 21:47:56 +0800 Subject: lib/math/gcd: use static key to select implementation at runtime Patch series "Optimize GCD performance on RISC-V by selecting implementation at runtime", v3. The current implementation of gcd() selects between the binary GCD and the odd-even GCD algorithm at compile time, depending on whether CONFIG_CPU_NO_EFFICIENT_FFS is set. On platforms like RISC-V, however, this compile-time decision can be misleading: even when the compiler emits ctz instructions based on the assumption that they are efficient (as is the case when CONFIG_RISCV_ISA_ZBB is enabled), the actual hardware may lack support for the Zbb extension. In such cases, ffs() falls back to a software implementation at runtime, making the binary GCD algorithm significantly slower than the odd-even variant. To address this, we introduce a static key to allow runtime selection between the binary and odd-even GCD implementations. On RISC-V, the kernel now checks for Zbb support during boot. If Zbb is unavailable, the static key is disabled so that gcd() consistently uses the more efficient odd-even algorithm in that scenario. Additionally, to further reduce code size, we select CONFIG_CPU_NO_EFFICIENT_FFS automatically when CONFIG_RISCV_ISA_ZBB is not enabled, avoiding compilation of the unused binary GCD implementation entirely on systems where it would never be executed. This series ensures that the most efficient GCD algorithm is used in practice and avoids compiling unnecessary code based on hardware capabilities and kernel configuration. This patch (of 3): On platforms like RISC-V, the compiler may generate hardware FFS instructions even if the underlying CPU does not actually support them. Currently, the GCD implementation is chosen at compile time based on CONFIG_CPU_NO_EFFICIENT_FFS, which can result in suboptimal behavior on such systems. Introduce a static key, efficient_ffs_key, to enable runtime selection between the binary GCD (using ffs) and the odd-even GCD implementation. This allows the kernel to default to the faster binary GCD when FFS is efficient, while retaining the ability to fall back when needed. Link: https://lkml.kernel.org/r/20250606134758.1308400-1-visitorckw@gmail.com Link: https://lkml.kernel.org/r/20250606134758.1308400-2-visitorckw@gmail.com Co-developed-by: Yu-Chun Lin Signed-off-by: Yu-Chun Lin Signed-off-by: Kuan-Wei Chiu Cc: Albert Ou Cc: Ching-Chun (Jim) Huang Cc: Palmer Dabbelt Cc: Paul Walmsley Cc: Alexandre Ghiti Signed-off-by: Andrew Morton --- lib/math/gcd.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/math/gcd.c b/lib/math/gcd.c index e3b042214d1b..62efca6787ae 100644 --- a/lib/math/gcd.c +++ b/lib/math/gcd.c @@ -11,22 +11,16 @@ * has decent hardware division. */ +DEFINE_STATIC_KEY_TRUE(efficient_ffs_key); + #if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) /* If __ffs is available, the even/odd algorithm benchmarks slower. */ -/** - * gcd - calculate and return the greatest common divisor of 2 unsigned longs - * @a: first value - * @b: second value - */ -unsigned long gcd(unsigned long a, unsigned long b) +static unsigned long binary_gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; - if (!a || !b) - return r; - b >>= __ffs(b); if (b == 1) return r & -r; @@ -44,9 +38,15 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#else +#endif /* If normalization is done by loops, the even/odd algorithm is a win. */ + +/** + * gcd - calculate and return the greatest common divisor of 2 unsigned longs + * @a: first value + * @b: second value + */ unsigned long gcd(unsigned long a, unsigned long b) { unsigned long r = a | b; @@ -54,6 +54,11 @@ unsigned long gcd(unsigned long a, unsigned long b) if (!a || !b) return r; +#if !defined(CONFIG_CPU_NO_EFFICIENT_FFS) + if (static_branch_likely(&efficient_ffs_key)) + return binary_gcd(a, b); +#endif + /* Isolate lsbit of r */ r &= -r; @@ -80,6 +85,4 @@ unsigned long gcd(unsigned long a, unsigned long b) } } -#endif - EXPORT_SYMBOL_GPL(gcd); -- cgit v1.2.3 From a9ed4422adacce848fd368c3a7076368ead7fc18 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 23 Jun 2025 16:47:25 +0800 Subject: lib/raid6: update recov_rvv.c zero page usage Update lib/raid6/recov_rvv.c, for 1857fcc84744 ("lib/raid6: replace custom zero page with ZERO_PAGE"), per Klara. Link: https://lkml.kernel.org/r/aFkUnXWtxcgOTVkw@gondor.apana.org.au Fixes: 1857fcc84744 ("lib/raid6: replace custom zero page with ZERO_PAGE") Signed-off-by: Herbert Xu Cc: Song Liu Cc: Yu Kuai Cc: Klara Modin Signed-off-by: Andrew Morton --- lib/raid6/recov_rvv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index f29303795ccf..5d54c4b437df 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -165,10 +165,10 @@ static void raid6_2data_recov_rvv(int disks, size_t bytes, int faila, * delta p and delta q */ dp = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 2] = dp; dq = (u8 *)ptrs[failb]; - ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[failb] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); @@ -203,7 +203,7 @@ static void raid6_datap_recov_rvv(int disks, size_t bytes, int faila, * Use the dead data page as temporary storage for delta q */ dq = (u8 *)ptrs[faila]; - ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[faila] = raid6_get_zero_page(); ptrs[disks - 1] = dq; raid6_call.gen_syndrome(disks, bytes, ptrs); -- cgit v1.2.3 From 768da2eae8662ca51102794c32d37c17410acbf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 11 Jul 2025 15:31:38 +0200 Subject: kunit: test: Drop CONFIG_MODULE ifdeffery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function stubs exposed by module.h allow the code to compile properly without the ifdeffery. The generated object code stays the same, as the compiler can optimize away all the dead code. As the code is still typechecked developer errors can be detected faster. Signed-off-by: Thomas Weißschuh Acked-by: David Gow Reviewed-by: Daniel Gomez Link: https://lore.kernel.org/r/20250711-kunit-ifdef-modules-v2-3-39443decb1f8@linutronix.de Signed-off-by: Daniel Gomez --- lib/kunit/test.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'lib') diff --git a/lib/kunit/test.c b/lib/kunit/test.c index f3c6b11f12b8..d2bfa331a2b1 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -802,7 +802,6 @@ void __kunit_test_suites_exit(struct kunit_suite **suites, int num_suites) } EXPORT_SYMBOL_GPL(__kunit_test_suites_exit); -#ifdef CONFIG_MODULES static void kunit_module_init(struct module *mod) { struct kunit_suite_set suite_set, filtered_set; @@ -890,7 +889,6 @@ static struct notifier_block kunit_mod_nb = { .notifier_call = kunit_module_notify, .priority = 0, }; -#endif KUNIT_DEFINE_ACTION_WRAPPER(kfree_action_wrapper, kfree, const void *) @@ -981,20 +979,14 @@ static int __init kunit_init(void) kunit_debugfs_init(); kunit_bus_init(); -#ifdef CONFIG_MODULES return register_module_notifier(&kunit_mod_nb); -#else - return 0; -#endif } late_initcall(kunit_init); static void __exit kunit_exit(void) { memset(&kunit_hooks, 0, sizeof(kunit_hooks)); -#ifdef CONFIG_MODULES unregister_module_notifier(&kunit_mod_nb); -#endif kunit_bus_shutdown(); -- cgit v1.2.3 From 6c6d8f8ba7789c221a2e4c43a0ed982c7a41f428 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 16 Jul 2025 14:32:45 +0100 Subject: lib/xxhash: remove unused functions xxh32_digest() and xxh32_update() were added in 2017 in the original xxhash commit, but have remained unused. Remove them. Link: https://lkml.kernel.org/r/20250716133245.243363-1-linux@treblig.org Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Christoph Hellwig Cc: Dave Gilbert Cc: Nick Terrell Signed-off-by: Andrew Morton --- lib/xxhash.c | 107 ----------------------------------------------------------- 1 file changed, 107 deletions(-) (limited to 'lib') diff --git a/lib/xxhash.c b/lib/xxhash.c index b5bd567aa6b3..cf629766f376 100644 --- a/lib/xxhash.c +++ b/lib/xxhash.c @@ -267,113 +267,6 @@ void xxh64_reset(struct xxh64_state *statePtr, const uint64_t seed) } EXPORT_SYMBOL(xxh64_reset); -int xxh32_update(struct xxh32_state *state, const void *input, const size_t len) -{ - const uint8_t *p = (const uint8_t *)input; - const uint8_t *const b_end = p + len; - - if (input == NULL) - return -EINVAL; - - state->total_len_32 += (uint32_t)len; - state->large_len |= (len >= 16) | (state->total_len_32 >= 16); - - if (state->memsize + len < 16) { /* fill in tmp buffer */ - memcpy((uint8_t *)(state->mem32) + state->memsize, input, len); - state->memsize += (uint32_t)len; - return 0; - } - - if (state->memsize) { /* some data left from previous update */ - const uint32_t *p32 = state->mem32; - - memcpy((uint8_t *)(state->mem32) + state->memsize, input, - 16 - state->memsize); - - state->v1 = xxh32_round(state->v1, get_unaligned_le32(p32)); - p32++; - state->v2 = xxh32_round(state->v2, get_unaligned_le32(p32)); - p32++; - state->v3 = xxh32_round(state->v3, get_unaligned_le32(p32)); - p32++; - state->v4 = xxh32_round(state->v4, get_unaligned_le32(p32)); - p32++; - - p += 16-state->memsize; - state->memsize = 0; - } - - if (p <= b_end - 16) { - const uint8_t *const limit = b_end - 16; - uint32_t v1 = state->v1; - uint32_t v2 = state->v2; - uint32_t v3 = state->v3; - uint32_t v4 = state->v4; - - do { - v1 = xxh32_round(v1, get_unaligned_le32(p)); - p += 4; - v2 = xxh32_round(v2, get_unaligned_le32(p)); - p += 4; - v3 = xxh32_round(v3, get_unaligned_le32(p)); - p += 4; - v4 = xxh32_round(v4, get_unaligned_le32(p)); - p += 4; - } while (p <= limit); - - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - - if (p < b_end) { - memcpy(state->mem32, p, (size_t)(b_end-p)); - state->memsize = (uint32_t)(b_end-p); - } - - return 0; -} -EXPORT_SYMBOL(xxh32_update); - -uint32_t xxh32_digest(const struct xxh32_state *state) -{ - const uint8_t *p = (const uint8_t *)state->mem32; - const uint8_t *const b_end = (const uint8_t *)(state->mem32) + - state->memsize; - uint32_t h32; - - if (state->large_len) { - h32 = xxh_rotl32(state->v1, 1) + xxh_rotl32(state->v2, 7) + - xxh_rotl32(state->v3, 12) + xxh_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } - - h32 += state->total_len_32; - - while (p + 4 <= b_end) { - h32 += get_unaligned_le32(p) * PRIME32_3; - h32 = xxh_rotl32(h32, 17) * PRIME32_4; - p += 4; - } - - while (p < b_end) { - h32 += (*p) * PRIME32_5; - h32 = xxh_rotl32(h32, 11) * PRIME32_1; - p++; - } - - h32 ^= h32 >> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} -EXPORT_SYMBOL(xxh32_digest); - int xxh64_update(struct xxh64_state *state, const void *input, const size_t len) { const uint8_t *p = (const uint8_t *)input; -- cgit v1.2.3 From ed4f142f72a9191b8236778093074c277435bf8a Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 18 Jul 2025 16:39:28 +0100 Subject: stackdepot: make max number of pools boot-time configurable We're hitting the WARN in depot_init_pool() about reaching the stack depot limit because we have long stacks that don't dedup very well. Introduce a new start-up parameter to allow users to set the number of maximum stack depot pools. Link: https://lkml.kernel.org/r/20250718153928.94229-1-matt@readmodwrite.com Signed-off-by: Matt Fleming Acked-by: Vlastimil Babka Acked-by: Marco Elver Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitriy Vyukov Cc: Oscar Salvador Signed-off-by: Andrew Morton --- lib/stackdepot.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 73d7b50924ef..de0b0025af2b 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -36,11 +36,11 @@ #include #include -#define DEPOT_POOLS_CAP 8192 -/* The pool_index is offset by 1 so the first record does not have a 0 handle. */ -#define DEPOT_MAX_POOLS \ - (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \ - (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP) +/* + * The pool_index is offset by 1 so the first record does not have a 0 handle. + */ +static unsigned int stack_max_pools __read_mostly = + MIN((1LL << DEPOT_POOL_INDEX_BITS) - 1, 8192); static bool stack_depot_disabled; static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT); @@ -62,7 +62,7 @@ static unsigned int stack_bucket_number_order; static unsigned int stack_hash_mask; /* Array of memory regions that store stack records. */ -static void *stack_pools[DEPOT_MAX_POOLS]; +static void **stack_pools; /* Newly allocated pool that is not yet added to stack_pools. */ static void *new_pool; /* Number of pools in stack_pools. */ @@ -101,6 +101,34 @@ static int __init disable_stack_depot(char *str) } early_param("stack_depot_disable", disable_stack_depot); +static int __init parse_max_pools(char *str) +{ + const long long limit = (1LL << (DEPOT_POOL_INDEX_BITS)) - 1; + unsigned int max_pools; + int rv; + + rv = kstrtouint(str, 0, &max_pools); + if (rv) + return rv; + + if (max_pools < 1024) { + pr_err("stack_depot_max_pools below 1024, using default of %u\n", + stack_max_pools); + goto out; + } + + if (max_pools > limit) { + pr_err("stack_depot_max_pools exceeds %lld, using default of %u\n", + limit, stack_max_pools); + goto out; + } + + stack_max_pools = max_pools; +out: + return 0; +} +early_param("stack_depot_max_pools", parse_max_pools); + void __init stack_depot_request_early_init(void) { /* Too late to request early init now. */ @@ -182,6 +210,17 @@ int __init stack_depot_early_init(void) } init_stack_table(entries); + pr_info("allocating space for %u stack pools via memblock\n", + stack_max_pools); + stack_pools = + memblock_alloc(stack_max_pools * sizeof(void *), PAGE_SIZE); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + memblock_free(stack_table, entries * sizeof(struct list_head)); + stack_depot_disabled = true; + return -ENOMEM; + } + return 0; } @@ -231,6 +270,16 @@ int stack_depot_init(void) stack_hash_mask = entries - 1; init_stack_table(entries); + pr_info("allocating space for %u stack pools via kvcalloc\n", + stack_max_pools); + stack_pools = kvcalloc(stack_max_pools, sizeof(void *), GFP_KERNEL); + if (!stack_pools) { + pr_err("stack pools allocation failed, disabling\n"); + kvfree(stack_table); + stack_depot_disabled = true; + ret = -ENOMEM; + } + out_unlock: mutex_unlock(&stack_depot_init_mutex); @@ -245,9 +294,9 @@ static bool depot_init_pool(void **prealloc) { lockdep_assert_held(&pool_lock); - if (unlikely(pools_num >= DEPOT_MAX_POOLS)) { + if (unlikely(pools_num >= stack_max_pools)) { /* Bail out if we reached the pool limit. */ - WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */ + WARN_ON_ONCE(pools_num > stack_max_pools); /* should never happen */ WARN_ON_ONCE(!new_pool); /* to avoid unnecessary pre-allocation */ WARN_ONCE(1, "Stack depot reached limit capacity"); return false; @@ -273,7 +322,7 @@ static bool depot_init_pool(void **prealloc) * NULL; do not reset to NULL if we have reached the maximum number of * pools. */ - if (pools_num < DEPOT_MAX_POOLS) + if (pools_num < stack_max_pools) WRITE_ONCE(new_pool, NULL); else WRITE_ONCE(new_pool, STACK_DEPOT_POISON); -- cgit v1.2.3 From b753522bed0b7e388a643f58d91bd81d8849ba43 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 27 Jul 2025 11:37:33 +0300 Subject: kho: add test for kexec handover Testing kexec handover requires a kernel driver that will generate some data and preserve it with KHO on the first boot and then restore that data and verify it was preserved properly after kexec. To facilitate such test, along with the kernel driver responsible for data generation, preservation and restoration add a script that runs a kernel in a VM with a minimal /init. The /init enables KHO, loads a kernel image for kexec and runs kexec reboot. After the boot of the kexeced kernel, the driver verifies that the data was properly preserved. [rppt@kernel.org: fix section mismatch] Link: https://lkml.kernel.org/r/aIiRC8fXiOXKbPM_@kernel.org Link: https://lkml.kernel.org/r/20250727083733.2590139-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Cc: Alexander Graf Cc: Changyuan Lyu Cc: Pasha Tatashin Cc: Pratyush Yadav Cc: Shuah Khan Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 21 ++++ lib/Makefile | 1 + lib/test_kho.c | 305 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 327 insertions(+) create mode 100644 lib/test_kho.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ebe33181b6e6..4f82d38e3c45 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3225,6 +3225,27 @@ config TEST_OBJPOOL If unsure, say N. +config TEST_KEXEC_HANDOVER + bool "Test for Kexec HandOver" + default n + depends on KEXEC_HANDOVER + help + This option enables test for Kexec HandOver (KHO). + The test consists of two parts: saving kernel data before kexec and + restoring the data after kexec and verifying that it was properly + handed over. This test module creates and saves data on the boot of + the first kernel and restores and verifies the data on the boot of + kexec'ed kernel. + + For detailed documentation about KHO, see Documentation/core-api/kho. + + To run the test run: + + tools/testing/selftests/kho/vmtest.sh -h + + If unsure, say N. + + config INT_POW_KUNIT_TEST tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 88d6228089a8..dadf0028b319 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_TEST_HMM) += test_hmm.o obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o obj-$(CONFIG_TEST_REF_TRACKER) += test_ref_tracker.o obj-$(CONFIG_TEST_OBJPOOL) += test_objpool.o +obj-$(CONFIG_TEST_KEXEC_HANDOVER) += test_kho.o obj-$(CONFIG_TEST_FPU) += test_fpu.o test_fpu-y := test_fpu_glue.o test_fpu_impl.o diff --git a/lib/test_kho.c b/lib/test_kho.c new file mode 100644 index 000000000000..c2eb899c3b45 --- /dev/null +++ b/lib/test_kho.c @@ -0,0 +1,305 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Test module for KHO + * Copyright (c) 2025 Microsoft Corporation. + * + * Authors: + * Saurabh Sengar + * Mike Rapoport + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define KHO_TEST_MAGIC 0x4b484f21 /* KHO! */ +#define KHO_TEST_FDT "kho_test" +#define KHO_TEST_COMPAT "kho-test-v1" + +static long max_mem = (PAGE_SIZE << MAX_PAGE_ORDER) * 2; +module_param(max_mem, long, 0644); + +struct kho_test_state { + unsigned int nr_folios; + struct folio **folios; + struct folio *fdt; + __wsum csum; +}; + +static struct kho_test_state kho_test_state; + +static int kho_test_notifier(struct notifier_block *self, unsigned long cmd, + void *v) +{ + struct kho_test_state *state = &kho_test_state; + struct kho_serialization *ser = v; + int err = 0; + + switch (cmd) { + case KEXEC_KHO_ABORT: + return NOTIFY_DONE; + case KEXEC_KHO_FINALIZE: + /* Handled below */ + break; + default: + return NOTIFY_BAD; + } + + err |= kho_preserve_folio(state->fdt); + err |= kho_add_subtree(ser, KHO_TEST_FDT, folio_address(state->fdt)); + + return err ? NOTIFY_BAD : NOTIFY_DONE; +} + +static struct notifier_block kho_test_nb = { + .notifier_call = kho_test_notifier, +}; + +static int kho_test_save_data(struct kho_test_state *state, void *fdt) +{ + phys_addr_t *folios_info __free(kvfree) = NULL; + int err = 0; + + folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), + GFP_KERNEL); + if (!folios_info) + return -ENOMEM; + + for (int i = 0; i < state->nr_folios; i++) { + struct folio *folio = state->folios[i]; + unsigned int order = folio_order(folio); + + folios_info[i] = virt_to_phys(folio_address(folio)) | order; + + err = kho_preserve_folio(folio); + if (err) + return err; + } + + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", folios_info, + state->nr_folios * sizeof(*folios_info)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + return err; +} + +static int kho_test_prepare_fdt(struct kho_test_state *state) +{ + const char compatible[] = KHO_TEST_COMPAT; + unsigned int magic = KHO_TEST_MAGIC; + ssize_t fdt_size; + int err = 0; + void *fdt; + + fdt_size = state->nr_folios * sizeof(phys_addr_t) + PAGE_SIZE; + state->fdt = folio_alloc(GFP_KERNEL, get_order(fdt_size)); + if (!state->fdt) + return -ENOMEM; + + fdt = folio_address(state->fdt); + + err |= fdt_create(fdt, fdt_size); + err |= fdt_finish_reservemap(fdt); + + err |= fdt_begin_node(fdt, ""); + err |= fdt_property(fdt, "compatible", compatible, sizeof(compatible)); + err |= fdt_property(fdt, "magic", &magic, sizeof(magic)); + err |= kho_test_save_data(state, fdt); + err |= fdt_end_node(fdt); + + err |= fdt_finish(fdt); + + if (err) + folio_put(state->fdt); + + return err; +} + +static int kho_test_generate_data(struct kho_test_state *state) +{ + size_t alloc_size = 0; + __wsum csum = 0; + + while (alloc_size < max_mem) { + int order = get_random_u32() % NR_PAGE_ORDERS; + struct folio *folio; + unsigned int size; + void *addr; + + /* cap allocation so that we won't exceed max_mem */ + if (alloc_size + (PAGE_SIZE << order) > max_mem) { + order = get_order(max_mem - alloc_size); + if (order) + order--; + } + size = PAGE_SIZE << order; + + folio = folio_alloc(GFP_KERNEL | __GFP_NORETRY, order); + if (!folio) + goto err_free_folios; + + state->folios[state->nr_folios++] = folio; + addr = folio_address(folio); + get_random_bytes(addr, size); + csum = csum_partial(addr, size, csum); + alloc_size += size; + } + + state->csum = csum; + return 0; + +err_free_folios: + for (int i = 0; i < state->nr_folios; i++) + folio_put(state->folios[i]); + return -ENOMEM; +} + +static int kho_test_save(void) +{ + struct kho_test_state *state = &kho_test_state; + struct folio **folios __free(kvfree) = NULL; + unsigned long max_nr; + int err; + + max_mem = PAGE_ALIGN(max_mem); + max_nr = max_mem >> PAGE_SHIFT; + + folios = kvmalloc_array(max_nr, sizeof(*state->folios), GFP_KERNEL); + if (!folios) + return -ENOMEM; + state->folios = folios; + + err = kho_test_generate_data(state); + if (err) + return err; + + err = kho_test_prepare_fdt(state); + if (err) + return err; + + return register_kho_notifier(&kho_test_nb); +} + +static int kho_test_restore_data(const void *fdt, int node) +{ + const unsigned int *nr_folios; + const phys_addr_t *folios_info; + const __wsum *old_csum; + __wsum csum = 0; + int len; + + node = fdt_path_offset(fdt, "/data"); + + nr_folios = fdt_getprop(fdt, node, "nr_folios", &len); + if (!nr_folios || len != sizeof(*nr_folios)) + return -EINVAL; + + old_csum = fdt_getprop(fdt, node, "csum", &len); + if (!old_csum || len != sizeof(*old_csum)) + return -EINVAL; + + folios_info = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + return -EINVAL; + + for (int i = 0; i < *nr_folios; i++) { + unsigned int order = folios_info[i] & ~PAGE_MASK; + phys_addr_t phys = folios_info[i] & PAGE_MASK; + unsigned int size = PAGE_SIZE << order; + struct folio *folio; + + folio = kho_restore_folio(phys); + if (!folio) + break; + + if (folio_order(folio) != order) + break; + + csum = csum_partial(folio_address(folio), size, csum); + folio_put(folio); + } + + if (csum != *old_csum) + return -EINVAL; + + return 0; +} + +static int kho_test_restore(phys_addr_t fdt_phys) +{ + void *fdt = phys_to_virt(fdt_phys); + const unsigned int *magic; + int node, len, err; + + node = fdt_path_offset(fdt, "/"); + if (node < 0) + return -EINVAL; + + if (fdt_node_check_compatible(fdt, node, KHO_TEST_COMPAT)) + return -EINVAL; + + magic = fdt_getprop(fdt, node, "magic", &len); + if (!magic || len != sizeof(*magic)) + return -EINVAL; + + if (*magic != KHO_TEST_MAGIC) + return -EINVAL; + + err = kho_test_restore_data(fdt, node); + if (err) + return err; + + pr_info("KHO restore succeeded\n"); + return 0; +} + +static int __init kho_test_init(void) +{ + phys_addr_t fdt_phys; + int err; + + err = kho_retrieve_subtree(KHO_TEST_FDT, &fdt_phys); + if (!err) + return kho_test_restore(fdt_phys); + + if (err != -ENOENT) { + pr_warn("failed to retrieve %s FDT: %d\n", KHO_TEST_FDT, err); + return err; + } + + return kho_test_save(); +} +module_init(kho_test_init); + +static void kho_test_cleanup(void) +{ + for (int i = 0; i < kho_test_state.nr_folios; i++) + folio_put(kho_test_state.folios[i]); + + kvfree(kho_test_state.folios); +} + +static void __exit kho_test_exit(void) +{ + unregister_kho_notifier(&kho_test_nb); + kho_test_cleanup(); +} +module_exit(kho_test_exit); + +MODULE_AUTHOR("Mike Rapoport "); +MODULE_DESCRIPTION("KHO test module"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 42e6c6ce03fd3e41e39a0f93f9b1a1d9fa664338 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:12 +0800 Subject: lib/sbitmap: convert shallow_depth from one word to the whole sbitmap Currently elevators will record internal 'async_depth' to throttle asynchronous requests, and they both calculate shallow_dpeth based on sb->shift, with the respect that sb->shift is the available tags in one word. However, sb->shift is not the availbale tags in the last word, see __map_depth: if (index == sb->map_nr - 1) return sb->depth - (index << sb->shift); For consequence, if the last word is used, more tags can be get than expected, for example, assume nr_requests=256 and there are four words, in the worst case if user set nr_requests=32, then the first word is the last word, and still use bits per word, which is 64, to calculate async_depth is wrong. One the ohter hand, due to cgroup qos, bfq can allow only one request to be allocated, and set shallow_dpeth=1 will still allow the number of words request to be allocated. Fix this problems by using shallow_depth to the whole sbitmap instead of per word, also change kyber, mq-deadline and bfq to follow this, a new helper __map_depth_with_shallow() is introduced to calculate available bits in each word. Signed-off-by: Yu Kuai Link: https://lore.kernel.org/r/20250807032413.1469456-2-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 56 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index d3412984170c..c07e3cd82e29 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -208,8 +208,28 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map, return nr; } +static unsigned int __map_depth_with_shallow(const struct sbitmap *sb, + int index, + unsigned int shallow_depth) +{ + u64 shallow_word_depth; + unsigned int word_depth, reminder; + + word_depth = __map_depth(sb, index); + if (shallow_depth >= sb->depth) + return word_depth; + + shallow_word_depth = word_depth * shallow_depth; + reminder = do_div(shallow_word_depth, sb->depth); + + if (reminder >= (index + 1) * word_depth) + shallow_word_depth++; + + return (unsigned int)shallow_word_depth; +} + static int sbitmap_find_bit(struct sbitmap *sb, - unsigned int depth, + unsigned int shallow_depth, unsigned int index, unsigned int alloc_hint, bool wrap) @@ -218,12 +238,12 @@ static int sbitmap_find_bit(struct sbitmap *sb, int nr = -1; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_word(&sb->map[index], - min_t(unsigned int, - __map_depth(sb, index), - depth), - alloc_hint, wrap); + unsigned int depth = __map_depth_with_shallow(sb, index, + shallow_depth); + if (depth) + nr = sbitmap_find_bit_in_word(&sb->map[index], depth, + alloc_hint, wrap); if (nr != -1) { nr += index << sb->shift; break; @@ -406,27 +426,9 @@ EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); static unsigned int sbq_calc_wake_batch(struct sbitmap_queue *sbq, unsigned int depth) { - unsigned int wake_batch; - unsigned int shallow_depth; - - /* - * Each full word of the bitmap has bits_per_word bits, and there might - * be a partial word. There are depth / bits_per_word full words and - * depth % bits_per_word bits left over. In bitwise arithmetic: - * - * bits_per_word = 1 << shift - * depth / bits_per_word = depth >> shift - * depth % bits_per_word = depth & ((1 << shift) - 1) - * - * Each word can be limited to sbq->min_shallow_depth bits. - */ - shallow_depth = min(1U << sbq->sb.shift, sbq->min_shallow_depth); - depth = ((depth >> sbq->sb.shift) * shallow_depth + - min(depth & ((1U << sbq->sb.shift) - 1), shallow_depth)); - wake_batch = clamp_t(unsigned int, depth / SBQ_WAIT_QUEUES, 1, - SBQ_WAKE_BATCH); - - return wake_batch; + return clamp_t(unsigned int, + min(depth, sbq->min_shallow_depth) / SBQ_WAIT_QUEUES, + 1, SBQ_WAKE_BATCH); } int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, -- cgit v1.2.3 From 45fa9f97e65231a9fd4f9429489cb74c10ccd0fd Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Thu, 7 Aug 2025 11:24:13 +0800 Subject: lib/sbitmap: make sbitmap_get_shallow() internal Because it's only used in sbitmap.c Signed-off-by: Yu Kuai Reviewed-by: Damien Le Moal Reviewed-by: Jan Kara Reviewed-by: Bart Van Assche Link: https://lore.kernel.org/r/20250807032413.1469456-3-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index c07e3cd82e29..4d188d05db15 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -307,7 +307,22 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } -int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) +/** + * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, + * limiting the depth used from each word. + * @sb: Bitmap to allocate from. + * @shallow_depth: The maximum number of bits to allocate from the bitmap. + * + * This rather specific operation allows for having multiple users with + * different allocation limits. E.g., there can be a high-priority class that + * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() + * with a @shallow_depth of (sb->depth >> 1). Then, the low-priority + * class can only allocate half of the total bits in the bitmap, preventing it + * from starving out the high-priority class. + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) { int nr; unsigned int hint, depth; @@ -322,7 +337,6 @@ int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) return nr; } -EXPORT_SYMBOL_GPL(sbitmap_get_shallow); bool sbitmap_any_bit_set(const struct sbitmap *sb) { -- cgit v1.2.3 From b41dc83f0790fd3488a45b31de0b0c3af7d441fe Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Aug 2025 11:26:29 -0700 Subject: kunit, lib/crypto: Move run_irq_test() to common header Rename run_irq_test() to kunit_run_irq_test() and move it to a public header so that it can be reused by crc_kunit. Link: https://lore.kernel.org/r/20250811182631.376302-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/hash-test-template.h | 123 ++-------------------------------- 1 file changed, 4 insertions(+), 119 deletions(-) (limited to 'lib') diff --git a/lib/crypto/tests/hash-test-template.h b/lib/crypto/tests/hash-test-template.h index f437a0a9ac6c..61b43e62779f 100644 --- a/lib/crypto/tests/hash-test-template.h +++ b/lib/crypto/tests/hash-test-template.h @@ -5,11 +5,9 @@ * * Copyright 2025 Google LLC */ +#include #include -#include -#include #include -#include /* test_buf is a guarded buffer, i.e. &test_buf[TEST_BUF_LEN] is not mapped. */ #define TEST_BUF_LEN 16384 @@ -319,119 +317,6 @@ static void test_hash_ctx_zeroization(struct kunit *test) "Hash context was not zeroized by finalization"); } -#define IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5) - -struct hash_irq_test_state { - bool (*func)(void *test_specific_state); - void *test_specific_state; - bool task_func_reported_failure; - bool hardirq_func_reported_failure; - bool softirq_func_reported_failure; - unsigned long hardirq_func_calls; - unsigned long softirq_func_calls; - struct hrtimer timer; - struct work_struct bh_work; -}; - -static enum hrtimer_restart hash_irq_test_timer_func(struct hrtimer *timer) -{ - struct hash_irq_test_state *state = - container_of(timer, typeof(*state), timer); - - WARN_ON_ONCE(!in_hardirq()); - state->hardirq_func_calls++; - - if (!state->func(state->test_specific_state)) - state->hardirq_func_reported_failure = true; - - hrtimer_forward_now(&state->timer, IRQ_TEST_HRTIMER_INTERVAL); - queue_work(system_bh_wq, &state->bh_work); - return HRTIMER_RESTART; -} - -static void hash_irq_test_bh_work_func(struct work_struct *work) -{ - struct hash_irq_test_state *state = - container_of(work, typeof(*state), bh_work); - - WARN_ON_ONCE(!in_serving_softirq()); - state->softirq_func_calls++; - - if (!state->func(state->test_specific_state)) - state->softirq_func_reported_failure = true; -} - -/* - * Helper function which repeatedly runs the given @func in task, softirq, and - * hardirq context concurrently, and reports a failure to KUnit if any - * invocation of @func in any context returns false. @func is passed - * @test_specific_state as its argument. At most 3 invocations of @func will - * run concurrently: one in each of task, softirq, and hardirq context. - * - * The main purpose of this interrupt context testing is to validate fallback - * code paths that run in contexts where the normal code path cannot be used, - * typically due to the FPU or vector registers already being in-use in kernel - * mode. These code paths aren't covered when the test code is executed only by - * the KUnit test runner thread in task context. The reason for the concurrency - * is because merely using hardirq context is not sufficient to reach a fallback - * code path on some architectures; the hardirq actually has to occur while the - * FPU or vector unit was already in-use in kernel mode. - * - * Another purpose of this testing is to detect issues with the architecture's - * irq_fpu_usable() and kernel_fpu_begin/end() or equivalent functions, - * especially in softirq context when the softirq may have interrupted a task - * already using kernel-mode FPU or vector (if the arch didn't prevent that). - * Crypto functions are often executed in softirqs, so this is important. - */ -static void run_irq_test(struct kunit *test, bool (*func)(void *), - int max_iterations, void *test_specific_state) -{ - struct hash_irq_test_state state = { - .func = func, - .test_specific_state = test_specific_state, - }; - unsigned long end_jiffies; - - /* - * Set up a hrtimer (the way we access hardirq context) and a work - * struct for the BH workqueue (the way we access softirq context). - */ - hrtimer_setup_on_stack(&state.timer, hash_irq_test_timer_func, - CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD); - INIT_WORK_ONSTACK(&state.bh_work, hash_irq_test_bh_work_func); - - /* Run for up to max_iterations or 1 second, whichever comes first. */ - end_jiffies = jiffies + HZ; - hrtimer_start(&state.timer, IRQ_TEST_HRTIMER_INTERVAL, - HRTIMER_MODE_REL_HARD); - for (int i = 0; i < max_iterations && !time_after(jiffies, end_jiffies); - i++) { - if (!func(test_specific_state)) - state.task_func_reported_failure = true; - } - - /* Cancel the timer and work. */ - hrtimer_cancel(&state.timer); - flush_work(&state.bh_work); - - /* Sanity check: the timer and BH functions should have been run. */ - KUNIT_EXPECT_GT_MSG(test, state.hardirq_func_calls, 0, - "Timer function was not called"); - KUNIT_EXPECT_GT_MSG(test, state.softirq_func_calls, 0, - "BH work function was not called"); - - /* Check for incorrect hash values reported from any context. */ - KUNIT_EXPECT_FALSE_MSG( - test, state.task_func_reported_failure, - "Incorrect hash values reported from task context"); - KUNIT_EXPECT_FALSE_MSG( - test, state.hardirq_func_reported_failure, - "Incorrect hash values reported from hardirq context"); - KUNIT_EXPECT_FALSE_MSG( - test, state.softirq_func_reported_failure, - "Incorrect hash values reported from softirq context"); -} - #define IRQ_TEST_DATA_LEN 256 #define IRQ_TEST_NUM_BUFFERS 3 /* matches max concurrency level */ @@ -469,7 +354,7 @@ static void test_hash_interrupt_context_1(struct kunit *test) HASH(&test_buf[i * IRQ_TEST_DATA_LEN], IRQ_TEST_DATA_LEN, state.expected_hashes[i]); - run_irq_test(test, hash_irq_test1_func, 100000, &state); + kunit_run_irq_test(test, hash_irq_test1_func, 100000, &state); } struct hash_irq_test2_hash_ctx { @@ -500,7 +385,7 @@ static bool hash_irq_test2_func(void *state_) if (WARN_ON_ONCE(ctx == &state->ctxs[ARRAY_SIZE(state->ctxs)])) { /* * This should never happen, as the number of contexts is equal - * to the maximum concurrency level of run_irq_test(). + * to the maximum concurrency level of kunit_run_irq_test(). */ return false; } @@ -566,7 +451,7 @@ static void test_hash_interrupt_context_2(struct kunit *test) state->update_lens[state->num_steps++] = remaining; state->num_steps += 2; /* for init and final */ - run_irq_test(test, hash_irq_test2_func, 250000, state); + kunit_run_irq_test(test, hash_irq_test2_func, 250000, state); } #define UNKEYED_HASH_KUNIT_CASES \ -- cgit v1.2.3 From 842ec21357f15ff722695dd87daf99823f297185 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Aug 2025 11:26:30 -0700 Subject: lib/crc: crc_kunit: Test CRC computation in interrupt contexts Test that if CRCs are computed in task, softirq, and hardirq context concurrently, then all results are as expected. Implement this using kunit_run_irq_test() which is also used by the lib/crypto/ tests. As with the corresponding lib/crypto/ tests, the purpose of this is to test fallback code paths and to exercise edge cases in the architecture's support for in-kernel FPU/SIMD/vector. Remove the code from crc_test() that sometimes disabled interrupts, as that was just an incomplete attempt to achieve similar test coverage. Link: https://lore.kernel.org/r/20250811182631.376302-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crc/tests/crc_kunit.c | 62 ++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/crc/tests/crc_kunit.c b/lib/crc/tests/crc_kunit.c index f08d985d8860..9a450e25ac81 100644 --- a/lib/crc/tests/crc_kunit.c +++ b/lib/crc/tests/crc_kunit.c @@ -6,6 +6,7 @@ * * Author: Eric Biggers */ +#include #include #include #include @@ -141,6 +142,54 @@ static size_t generate_random_length(size_t max_length) return len % (max_length + 1); } +#define IRQ_TEST_DATA_LEN 512 +#define IRQ_TEST_NUM_BUFFERS 3 /* matches max concurrency level */ + +struct crc_irq_test_state { + const struct crc_variant *v; + u64 initial_crc; + u64 expected_crcs[IRQ_TEST_NUM_BUFFERS]; + atomic_t seqno; +}; + +/* + * Compute the CRC of one of the test messages and verify that it matches the + * expected CRC from @state->expected_crcs. To increase the chance of detecting + * problems, cycle through multiple messages. + */ +static bool crc_irq_test_func(void *state_) +{ + struct crc_irq_test_state *state = state_; + const struct crc_variant *v = state->v; + u32 i = (u32)atomic_inc_return(&state->seqno) % IRQ_TEST_NUM_BUFFERS; + u64 actual_crc = v->func(state->initial_crc, + &test_buffer[i * IRQ_TEST_DATA_LEN], + IRQ_TEST_DATA_LEN); + + return actual_crc == state->expected_crcs[i]; +} + +/* + * Test that if CRCs are computed in task, softirq, and hardirq context + * concurrently, then all results are as expected. + */ +static void crc_interrupt_context_test(struct kunit *test, + const struct crc_variant *v) +{ + struct crc_irq_test_state state = { + .v = v, + .initial_crc = generate_random_initial_crc(v), + }; + + for (int i = 0; i < IRQ_TEST_NUM_BUFFERS; i++) { + state.expected_crcs[i] = crc_ref( + v, state.initial_crc, + &test_buffer[i * IRQ_TEST_DATA_LEN], IRQ_TEST_DATA_LEN); + } + + kunit_run_irq_test(test, crc_irq_test_func, 100000, &state); +} + /* Test that v->func gives the same CRCs as a reference implementation. */ static void crc_test(struct kunit *test, const struct crc_variant *v) { @@ -149,7 +198,6 @@ static void crc_test(struct kunit *test, const struct crc_variant *v) for (i = 0; i < CRC_KUNIT_NUM_TEST_ITERS; i++) { u64 init_crc, expected_crc, actual_crc; size_t len, offset; - bool nosimd; init_crc = generate_random_initial_crc(v); len = generate_random_length(CRC_KUNIT_MAX_LEN); @@ -168,22 +216,18 @@ static void crc_test(struct kunit *test, const struct crc_variant *v) /* Refresh the data occasionally. */ prandom_bytes_state(&rng, &test_buffer[offset], len); - nosimd = rand32() % 8 == 0; - /* * Compute the CRC, and verify that it equals the CRC computed * by a simple bit-at-a-time reference implementation. */ expected_crc = crc_ref(v, init_crc, &test_buffer[offset], len); - if (nosimd) - local_irq_disable(); actual_crc = v->func(init_crc, &test_buffer[offset], len); - if (nosimd) - local_irq_enable(); KUNIT_EXPECT_EQ_MSG(test, expected_crc, actual_crc, - "Wrong result with len=%zu offset=%zu nosimd=%d", - len, offset, nosimd); + "Wrong result with len=%zu offset=%zu", + len, offset); } + + crc_interrupt_context_test(test, v); } static __always_inline void -- cgit v1.2.3 From c2a0c5156a40c40edb0cce80ce11c97ab39c67e3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 11 Aug 2025 11:26:31 -0700 Subject: lib/crc: Use underlying functions instead of crypto_simd_usable() Since crc_kunit now tests the fallback code paths without using crypto_simd_disabled_for_test, make the CRC code just use the underlying may_use_simd() and irq_fpu_usable() functions directly instead of crypto_simd_usable(). This eliminates an unnecessary layer. Take the opportunity to add likely() and unlikely() annotations as well. Link: https://lore.kernel.org/r/20250811182631.376302-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crc/arm/crc-t10dif.h | 6 ++---- lib/crc/arm/crc32.h | 6 ++---- lib/crc/arm64/crc-t10dif.h | 6 ++---- lib/crc/arm64/crc32.h | 11 ++++++----- lib/crc/powerpc/crc-t10dif.h | 5 +++-- lib/crc/powerpc/crc32.h | 5 +++-- lib/crc/x86/crc-pclmul-template.h | 3 +-- lib/crc/x86/crc32.h | 2 +- 8 files changed, 20 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h index 2edf7e9681d0..1a969f4024d4 100644 --- a/lib/crc/arm/crc-t10dif.h +++ b/lib/crc/arm/crc-t10dif.h @@ -5,8 +5,6 @@ * Copyright (C) 2016 Linaro Ltd */ -#include - #include #include @@ -23,7 +21,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { if (static_branch_likely(&have_pmull)) { - if (crypto_simd_usable()) { + if (likely(may_use_simd())) { kernel_neon_begin(); crc = crc_t10dif_pmull64(crc, data, length); kernel_neon_end(); @@ -31,7 +29,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) } } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && static_branch_likely(&have_neon) && - crypto_simd_usable()) { + likely(may_use_simd())) { u8 buf[16] __aligned(16); kernel_neon_begin(); diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h index 018007e162a2..ae71aa60b7a7 100644 --- a/lib/crc/arm/crc32.h +++ b/lib/crc/arm/crc32.h @@ -7,8 +7,6 @@ #include -#include - #include #include #include @@ -34,7 +32,7 @@ static inline u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len) static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) { if (len >= PMULL_MIN_LEN + 15 && - static_branch_likely(&have_pmull) && crypto_simd_usable()) { + static_branch_likely(&have_pmull) && likely(may_use_simd())) { size_t n = -(uintptr_t)p & 15; /* align p to 16-byte boundary */ @@ -63,7 +61,7 @@ static inline u32 crc32c_scalar(u32 crc, const u8 *p, size_t len) static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) { if (len >= PMULL_MIN_LEN + 15 && - static_branch_likely(&have_pmull) && crypto_simd_usable()) { + static_branch_likely(&have_pmull) && likely(may_use_simd())) { size_t n = -(uintptr_t)p & 15; /* align p to 16-byte boundary */ diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h index c4521a7f1ee9..435a990ec43c 100644 --- a/lib/crc/arm64/crc-t10dif.h +++ b/lib/crc/arm64/crc-t10dif.h @@ -7,8 +7,6 @@ #include -#include - #include #include @@ -25,7 +23,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { if (static_branch_likely(&have_pmull)) { - if (crypto_simd_usable()) { + if (likely(may_use_simd())) { kernel_neon_begin(); crc = crc_t10dif_pmull_p64(crc, data, length); kernel_neon_end(); @@ -33,7 +31,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) } } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && static_branch_likely(&have_asimd) && - crypto_simd_usable()) { + likely(may_use_simd())) { u8 buf[16]; kernel_neon_begin(); diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h index 6e5dec45f05d..31e649cd40a2 100644 --- a/lib/crc/arm64/crc32.h +++ b/lib/crc/arm64/crc32.h @@ -5,8 +5,6 @@ #include #include -#include - // The minimum input length to consider the 4-way interleaved code path static const size_t min_len = 1024; @@ -23,7 +21,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_le_base(crc, p, len); - if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { + if (len >= min_len && cpu_have_named_feature(PMULL) && + likely(may_use_simd())) { kernel_neon_begin(); crc = crc32_le_arm64_4way(crc, p, len); kernel_neon_end(); @@ -43,7 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32c_base(crc, p, len); - if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { + if (len >= min_len && cpu_have_named_feature(PMULL) && + likely(may_use_simd())) { kernel_neon_begin(); crc = crc32c_le_arm64_4way(crc, p, len); kernel_neon_end(); @@ -63,7 +63,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_be_base(crc, p, len); - if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { + if (len >= min_len && cpu_have_named_feature(PMULL) && + likely(may_use_simd())) { kernel_neon_begin(); crc = crc32_be_arm64_4way(crc, p, len); kernel_neon_end(); diff --git a/lib/crc/powerpc/crc-t10dif.h b/lib/crc/powerpc/crc-t10dif.h index 59e16804a6ea..e033d5f57bae 100644 --- a/lib/crc/powerpc/crc-t10dif.h +++ b/lib/crc/powerpc/crc-t10dif.h @@ -6,8 +6,8 @@ * [based on crc32c-vpmsum_glue.c] */ +#include #include -#include #include #include #include @@ -29,7 +29,8 @@ static inline u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len) u32 crc = crci; if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || - !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) + !static_branch_likely(&have_vec_crypto) || + unlikely(!may_use_simd())) return crc_t10dif_generic(crc, p, len); if ((unsigned long)p & VMX_ALIGN_MASK) { diff --git a/lib/crc/powerpc/crc32.h b/lib/crc/powerpc/crc32.h index 811cc2e6ed24..cc8fa3913d4e 100644 --- a/lib/crc/powerpc/crc32.h +++ b/lib/crc/powerpc/crc32.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include -#include #include #include #include @@ -24,7 +24,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) unsigned int tail; if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || - !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable()) + !static_branch_likely(&have_vec_crypto) || + unlikely(!may_use_simd())) return crc32c_base(crc, p, len); if ((unsigned long)p & VMX_ALIGN_MASK) { diff --git a/lib/crc/x86/crc-pclmul-template.h b/lib/crc/x86/crc-pclmul-template.h index 35c950d7010c..02744831c6fa 100644 --- a/lib/crc/x86/crc-pclmul-template.h +++ b/lib/crc/x86/crc-pclmul-template.h @@ -12,7 +12,6 @@ #include #include -#include #include #include "crc-pclmul-consts.h" @@ -57,7 +56,7 @@ static inline bool have_avx512(void) #define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq) \ do { \ if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) && \ - crypto_simd_usable()) { \ + likely(irq_fpu_usable())) { \ const void *consts_ptr; \ \ consts_ptr = (consts).fold_across_128_bits_consts; \ diff --git a/lib/crc/x86/crc32.h b/lib/crc/x86/crc32.h index cea2c96d08d0..2c4a5976654a 100644 --- a/lib/crc/x86/crc32.h +++ b/lib/crc/x86/crc32.h @@ -44,7 +44,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) return crc32c_base(crc, p, len); if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN && - static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) { + static_branch_likely(&have_pclmulqdq) && likely(irq_fpu_usable())) { /* * Long length, the vector registers are usable, and the CPU is * 64-bit and supports both CRC32 and PCLMULQDQ instructions. -- cgit v1.2.3 From 52966bf71de98fef4ca7b3be1349adc7459d6d53 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 8 Aug 2025 07:45:23 -0400 Subject: ref_tracker: use %p instead of %px in debugfs dentry name As Kees points out, this is a kernel address leak, and debugging is not a sufficiently good reason to expose the real kernel address. Fixes: 65b584f53611 ("ref_tracker: automatically register a file in debugfs for a ref_tracker_dir") Reported-by: Kees Cook Closes: https://lore.kernel.org/netdev/202507301603.62E553F93@keescook/ Signed-off-by: Jeff Layton Reviewed-by: Eric Dumazet Signed-off-by: Jakub Kicinski --- lib/ref_tracker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index a9e6ffcff04b..cce12287708e 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -434,7 +434,7 @@ void ref_tracker_dir_debugfs(struct ref_tracker_dir *dir) if (dentry && !xa_is_err(dentry)) return; - ret = snprintf(name, sizeof(name), "%s@%px", dir->class, dir); + ret = snprintf(name, sizeof(name), "%s@%p", dir->class, dir); name[sizeof(name) - 1] = '\0'; if (ret < sizeof(name)) { -- cgit v1.2.3 From d73915fdc0011d536c03856be7ec451f6a5fb4ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:42:18 -0700 Subject: lib/crypto: sha: Update Kconfig help for SHA1 and SHA256 Update the help text for CRYPTO_LIB_SHA1 and CRYPTO_LIB_SHA256 to reflect the addition of HMAC support, and to be consistent with CRYPTO_LIB_SHA512. Link: https://lore.kernel.org/r/20250731224218.137947-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index c2b65b6a9bb6..1e6b008f8fca 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -140,8 +140,8 @@ config CRYPTO_LIB_CHACHA20POLY1305 config CRYPTO_LIB_SHA1 tristate help - The SHA-1 library functions. Select this if your module uses any of - the functions from . + The SHA-1 and HMAC-SHA1 library functions. Select this if your module + uses any of the functions from . config CRYPTO_LIB_SHA1_ARCH bool @@ -157,9 +157,9 @@ config CRYPTO_LIB_SHA1_ARCH config CRYPTO_LIB_SHA256 tristate help - Enable the SHA-256 library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. + The SHA-224, SHA-256, HMAC-SHA224, and HMAC-SHA256 library functions. + Select this if your module uses any of these functions from + . config CRYPTO_LIB_SHA256_ARCH bool -- cgit v1.2.3 From fd7e5de4b2eddd34e3567cd419812d8869ef4f13 Mon Sep 17 00:00:00 2001 From: Tal Zussman Date: Thu, 14 Aug 2025 02:51:57 -0400 Subject: lib/crypto: ensure generated *.S files are removed on make clean make clean does not check the kernel config when removing files. As such, additions to clean-files under CONFIG_ARM or CONFIG_ARM64 are not evaluated. For example, when building on arm64, this means that lib/crypto/arm64/sha{256,512}-core.S are left over after make clean. Set clean-files unconditionally to ensure that make clean removes these files. Fixes: e96cb9507f2d ("lib/crypto: sha256: Consolidate into single module") Fixes: 24c91b62ac50 ("lib/crypto: arm/sha512: Migrate optimized SHA-512 code to library") Fixes: 60e3f1e9b7a5 ("lib/crypto: arm64/sha512: Migrate optimized SHA-512 code to library") Signed-off-by: Tal Zussman Link: https://lore.kernel.org/r/20250814-crypto_clean-v2-1-659a2dc86302@columbia.edu Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e4151be2ebd4..539d5d59a50e 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -100,7 +100,6 @@ ifeq ($(CONFIG_ARM),y) libsha256-y += arm/sha256-ce.o arm/sha256-core.o $(obj)/arm/sha256-core.S: $(src)/arm/sha256-armv4.pl $(call cmd,perlasm) -clean-files += arm/sha256-core.S AFLAGS_arm/sha256-core.o += $(aflags-thumb2-y) endif @@ -108,7 +107,6 @@ ifeq ($(CONFIG_ARM64),y) libsha256-y += arm64/sha256-core.o $(obj)/arm64/sha256-core.S: $(src)/arm64/sha2-armv8.pl $(call cmd,perlasm_with_args) -clean-files += arm64/sha256-core.S libsha256-$(CONFIG_KERNEL_MODE_NEON) += arm64/sha256-ce.o endif @@ -132,7 +130,6 @@ ifeq ($(CONFIG_ARM),y) libsha512-y += arm/sha512-core.o $(obj)/arm/sha512-core.S: $(src)/arm/sha512-armv4.pl $(call cmd,perlasm) -clean-files += arm/sha512-core.S AFLAGS_arm/sha512-core.o += $(aflags-thumb2-y) endif @@ -140,7 +137,6 @@ ifeq ($(CONFIG_ARM64),y) libsha512-y += arm64/sha512-core.o $(obj)/arm64/sha512-core.S: $(src)/arm64/sha2-armv8.pl $(call cmd,perlasm_with_args) -clean-files += arm64/sha512-core.S libsha512-$(CONFIG_KERNEL_MODE_NEON) += arm64/sha512-ce-core.o endif @@ -167,3 +163,7 @@ obj-$(CONFIG_PPC) += powerpc/ obj-$(CONFIG_RISCV) += riscv/ obj-$(CONFIG_S390) += s390/ obj-$(CONFIG_X86) += x86/ + +# clean-files must be defined unconditionally +clean-files += arm/sha256-core.S arm/sha512-core.S +clean-files += arm64/sha256-core.S arm64/sha512-core.S -- cgit v1.2.3 From 29128da29dbabfe567c0133bf069052d2d5d588c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 13 Aug 2025 08:28:30 +0200 Subject: kunit: Always descend into kunit directory during build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For kbuild to properly clean up these build artifacts in the subdirectory, even after CONFIG_KUNIT changed do disabled, the directory needs to be processed always. Pushing the special logic for hook.o into the kunit Makefile also makes the logic easier to understand. Link: https://lore.kernel.org/r/20250813-kunit-always-descend-v1-1-7bbd387ff13b@linutronix.de Signed-off-by: Thomas Weißschuh Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/Makefile | 4 ---- lib/kunit/Makefile | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 392ff808c9b9..15a03f4c16e2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -109,11 +109,7 @@ test_fpu-y := test_fpu_glue.o test_fpu_impl.o CFLAGS_test_fpu_impl.o += $(CC_FLAGS_FPU) CFLAGS_REMOVE_test_fpu_impl.o += $(CC_FLAGS_NO_FPU) -# Some KUnit files (hooks.o) need to be built-in even when KUnit is a module, -# so we can't just use obj-$(CONFIG_KUNIT). -ifdef CONFIG_KUNIT obj-y += kunit/ -endif ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile index 5aa51978e456..656f1fa35abc 100644 --- a/lib/kunit/Makefile +++ b/lib/kunit/Makefile @@ -17,7 +17,7 @@ kunit-objs += debugfs.o endif # KUnit 'hooks' are built-in even when KUnit is built as a module. -obj-y += hooks.o +obj-$(if $(CONFIG_KUNIT),y) += hooks.o obj-$(CONFIG_KUNIT_TEST) += kunit-test.o obj-$(CONFIG_KUNIT_TEST) += platform-test.o -- cgit v1.2.3 From 5ff74f5f71f83cce3c920cd17940df0fe0401865 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 15 Aug 2025 19:02:40 -0700 Subject: lib/crc: Drop inline from all *_mod_init_arch() functions Drop 'inline' from all the *_mod_init_arch() functions so that the compiler will warn about any bugs where they are unused due to not being wired up properly. (There are no such bugs currently, so this just establishes a more robust convention for the future. Of course, these functions also tend to get inlined anyway, regardless of the keyword.) Link: https://lore.kernel.org/r/20250816020240.431545-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crc/arm/crc-t10dif.h | 2 +- lib/crc/arm/crc32.h | 2 +- lib/crc/arm64/crc-t10dif.h | 2 +- lib/crc/loongarch/crc32.h | 2 +- lib/crc/mips/crc32.h | 2 +- lib/crc/powerpc/crc-t10dif.h | 2 +- lib/crc/powerpc/crc32.h | 2 +- lib/crc/sparc/crc32.h | 2 +- lib/crc/x86/crc-t10dif.h | 2 +- lib/crc/x86/crc32.h | 2 +- lib/crc/x86/crc64.h | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h index 1a969f4024d4..63441de5e3f1 100644 --- a/lib/crc/arm/crc-t10dif.h +++ b/lib/crc/arm/crc-t10dif.h @@ -43,7 +43,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) } #define crc_t10dif_mod_init_arch crc_t10dif_mod_init_arch -static inline void crc_t10dif_mod_init_arch(void) +static void crc_t10dif_mod_init_arch(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h index ae71aa60b7a7..7b76f52f6907 100644 --- a/lib/crc/arm/crc32.h +++ b/lib/crc/arm/crc32.h @@ -83,7 +83,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) #define crc32_be_arch crc32_be_base /* not implemented on this arch */ #define crc32_mod_init_arch crc32_mod_init_arch -static inline void crc32_mod_init_arch(void) +static void crc32_mod_init_arch(void) { if (elf_hwcap2 & HWCAP2_CRC32) static_branch_enable(&have_crc32); diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h index 435a990ec43c..f88db2971805 100644 --- a/lib/crc/arm64/crc-t10dif.h +++ b/lib/crc/arm64/crc-t10dif.h @@ -45,7 +45,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) } #define crc_t10dif_mod_init_arch crc_t10dif_mod_init_arch -static inline void crc_t10dif_mod_init_arch(void) +static void crc_t10dif_mod_init_arch(void) { if (cpu_have_named_feature(ASIMD)) { static_branch_enable(&have_asimd); diff --git a/lib/crc/loongarch/crc32.h b/lib/crc/loongarch/crc32.h index 6de5c96594af..d34fa4c68632 100644 --- a/lib/crc/loongarch/crc32.h +++ b/lib/crc/loongarch/crc32.h @@ -101,7 +101,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) #define crc32_be_arch crc32_be_base /* not implemented on this arch */ #define crc32_mod_init_arch crc32_mod_init_arch -static inline void crc32_mod_init_arch(void) +static void crc32_mod_init_arch(void) { if (cpu_has_crc32) static_branch_enable(&have_crc32); diff --git a/lib/crc/mips/crc32.h b/lib/crc/mips/crc32.h index 11cb272c63a6..3100354a049e 100644 --- a/lib/crc/mips/crc32.h +++ b/lib/crc/mips/crc32.h @@ -148,7 +148,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) #define crc32_be_arch crc32_be_base /* not implemented on this arch */ #define crc32_mod_init_arch crc32_mod_init_arch -static inline void crc32_mod_init_arch(void) +static void crc32_mod_init_arch(void) { if (cpu_have_feature(cpu_feature(MIPS_CRC32))) static_branch_enable(&have_crc32); diff --git a/lib/crc/powerpc/crc-t10dif.h b/lib/crc/powerpc/crc-t10dif.h index e033d5f57bae..8f4592a5323d 100644 --- a/lib/crc/powerpc/crc-t10dif.h +++ b/lib/crc/powerpc/crc-t10dif.h @@ -62,7 +62,7 @@ static inline u16 crc_t10dif_arch(u16 crci, const u8 *p, size_t len) } #define crc_t10dif_mod_init_arch crc_t10dif_mod_init_arch -static inline void crc_t10dif_mod_init_arch(void) +static void crc_t10dif_mod_init_arch(void) { if (cpu_has_feature(CPU_FTR_ARCH_207S) && (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) diff --git a/lib/crc/powerpc/crc32.h b/lib/crc/powerpc/crc32.h index cc8fa3913d4e..0c852272a382 100644 --- a/lib/crc/powerpc/crc32.h +++ b/lib/crc/powerpc/crc32.h @@ -55,7 +55,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) } #define crc32_mod_init_arch crc32_mod_init_arch -static inline void crc32_mod_init_arch(void) +static void crc32_mod_init_arch(void) { if (cpu_has_feature(CPU_FTR_ARCH_207S) && (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) diff --git a/lib/crc/sparc/crc32.h b/lib/crc/sparc/crc32.h index 60f2765ac015..df7c350acd7b 100644 --- a/lib/crc/sparc/crc32.h +++ b/lib/crc/sparc/crc32.h @@ -44,7 +44,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *data, size_t len) } #define crc32_mod_init_arch crc32_mod_init_arch -static inline void crc32_mod_init_arch(void) +static void crc32_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crc/x86/crc-t10dif.h b/lib/crc/x86/crc-t10dif.h index 2a02a3026f3f..8ee8824da551 100644 --- a/lib/crc/x86/crc-t10dif.h +++ b/lib/crc/x86/crc-t10dif.h @@ -19,7 +19,7 @@ static inline u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len) } #define crc_t10dif_mod_init_arch crc_t10dif_mod_init_arch -static inline void crc_t10dif_mod_init_arch(void) +static void crc_t10dif_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); diff --git a/lib/crc/x86/crc32.h b/lib/crc/x86/crc32.h index 2c4a5976654a..19a5e3c6c73b 100644 --- a/lib/crc/x86/crc32.h +++ b/lib/crc/x86/crc32.h @@ -106,7 +106,7 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) #define crc32_be_arch crc32_be_base /* not implemented on this arch */ #define crc32_mod_init_arch crc32_mod_init_arch -static inline void crc32_mod_init_arch(void) +static void crc32_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_XMM4_2)) static_branch_enable(&have_crc32); diff --git a/lib/crc/x86/crc64.h b/lib/crc/x86/crc64.h index fde1222c4c58..7d4599319343 100644 --- a/lib/crc/x86/crc64.h +++ b/lib/crc/x86/crc64.h @@ -27,7 +27,7 @@ static inline u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len) } #define crc64_mod_init_arch crc64_mod_init_arch -static inline void crc64_mod_init_arch(void) +static void crc64_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { static_branch_enable(&have_pclmulqdq); -- cgit v1.2.3 From d2236198839ca57610d1f7be7c61fb8c95f2fca6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 7 Aug 2025 13:44:44 +0200 Subject: lib/lzo: add unlikely hints to overrun checks The NEED_* macros do an implicit goto in case the safety bounds checks fail. Add the unlikely hints as this is the error case and not a hot path. The final assembly is slightly shorter and some jumps got reordered according to the hints. text data bss dec hex filename 2294 16 0 2310 906 pre/lzo1x_decompress_safe.o 2277 16 0 2293 8f5 post/lzo1x_decompress_safe.o text data bss dec hex filename 3444 48 0 3492 da4 pre/lzo1x_compress_safe.o 3372 48 0 3420 d5c post/lzo1x_compress_safe.o Signed-off-by: David Sterba Signed-off-by: Herbert Xu --- lib/lzo/lzo1x_compress.c | 2 +- lib/lzo/lzo1x_decompress_safe.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 7b10ca86a893..01586af2347f 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -26,7 +26,7 @@ #define HAVE_OP(x) 1 #endif -#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun +#define NEED_OP(x) if (unlikely(!HAVE_OP(x))) goto output_overrun static noinline int LZO_SAFE(lzo1x_1_do_compress)(const unsigned char *in, size_t in_len, diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c index c94f4928e188..318abb82c63d 100644 --- a/lib/lzo/lzo1x_decompress_safe.c +++ b/lib/lzo/lzo1x_decompress_safe.c @@ -22,9 +22,9 @@ #define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x)) #define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x)) -#define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun -#define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun -#define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun +#define NEED_IP(x) if (unlikely(!HAVE_IP(x))) goto input_overrun +#define NEED_OP(x) if (unlikely(!HAVE_OP(x))) goto output_overrun +#define TEST_LB(m_pos) if (unlikely((m_pos) < out)) goto lookbehind_overrun /* This MAX_255_COUNT is the maximum number of times we can add 255 to a base * count without overflowing an integer. The multiply will overflow when -- cgit v1.2.3 From ae91aea2d2265c88dbed65a07bbaf3c133fe970c Mon Sep 17 00:00:00 2001 From: Junhui Pei Date: Mon, 2 Jun 2025 23:38:41 +0800 Subject: ubsan: Fix incorrect hand-side used in handle __ubsan_handle_divrem_overflow() incorrectly uses the RHS to report. It always reports the same log: division of -1 by -1. But it should report division of LHS by -1. Signed-off-by: Junhui Pei Fixes: c6d308534aef ("UBSAN: run-time undefined behavior sanity checker") Link: https://lore.kernel.org/r/20250602153841.62935-1-paradoxskin233@gmail.com Signed-off-by: Kees Cook --- lib/ubsan.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/ubsan.c b/lib/ubsan.c index a6ca235dd714..456e3dd8f4ea 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -333,18 +333,18 @@ EXPORT_SYMBOL(__ubsan_handle_implicit_conversion); void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs) { struct overflow_data *data = _data; - char rhs_val_str[VALUE_LENGTH]; + char lhs_val_str[VALUE_LENGTH]; if (suppress_report(&data->location)) return; ubsan_prologue(&data->location, "division-overflow"); - val_to_string(rhs_val_str, sizeof(rhs_val_str), data->type, rhs); + val_to_string(lhs_val_str, sizeof(lhs_val_str), data->type, lhs); if (type_is_signed(data->type) && get_signed_val(data->type, rhs) == -1) pr_err("division of %s by -1 cannot be represented in type %s\n", - rhs_val_str, data->type->type_name); + lhs_val_str, data->type->type_name); else pr_err("division by zero\n"); -- cgit v1.2.3 From 640d31ea83c6f67133d47df9a0973f3281c91cf4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:35:10 -0700 Subject: lib/crypto: sha256: Use underlying functions instead of crypto_simd_usable() Since sha256_kunit tests the fallback code paths without using crypto_simd_disabled_for_test, make the SHA-256 code just use the underlying may_use_simd() and irq_fpu_usable() functions directly instead of crypto_simd_usable(). This eliminates an unnecessary layer. While doing this, also add likely() annotations, and fix a minor inconsistency where the static keys in the sha256.h files were in a different place than in the corresponding sha1.h and sha512.h files. Link: https://lore.kernel.org/r/20250731223510.136650-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/sha256.h | 10 +++++----- lib/crypto/arm64/sha256.h | 10 +++++----- lib/crypto/riscv/sha256.h | 8 ++++---- lib/crypto/x86/sha256.h | 3 +-- 4 files changed, 15 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h index da75cbdc51d4..eab713e650f3 100644 --- a/lib/crypto/arm/sha256.h +++ b/lib/crypto/arm/sha256.h @@ -5,7 +5,10 @@ * Copyright 2025 Google LLC */ #include -#include +#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); asmlinkage void sha256_block_data_order(struct sha256_block_state *state, const u8 *data, size_t nblocks); @@ -14,14 +17,11 @@ asmlinkage void sha256_block_data_order_neon(struct sha256_block_state *state, asmlinkage void sha256_ce_transform(struct sha256_block_state *state, const u8 *data, size_t nblocks); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); - static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && crypto_simd_usable()) { + static_branch_likely(&have_neon) && likely(may_use_simd())) { kernel_neon_begin(); if (static_branch_likely(&have_ce)) sha256_ce_transform(state, data, nblocks); diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h index a211966c124a..d95f1077c32b 100644 --- a/lib/crypto/arm64/sha256.h +++ b/lib/crypto/arm64/sha256.h @@ -5,9 +5,12 @@ * Copyright 2025 Google LLC */ #include -#include +#include #include +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + asmlinkage void sha256_block_data_order(struct sha256_block_state *state, const u8 *data, size_t nblocks); asmlinkage void sha256_block_neon(struct sha256_block_state *state, @@ -15,14 +18,11 @@ asmlinkage void sha256_block_neon(struct sha256_block_state *state, asmlinkage size_t __sha256_ce_transform(struct sha256_block_state *state, const u8 *data, size_t nblocks); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); - static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && crypto_simd_usable()) { + static_branch_likely(&have_neon) && likely(may_use_simd())) { if (static_branch_likely(&have_ce)) { do { size_t rem; diff --git a/lib/crypto/riscv/sha256.h b/lib/crypto/riscv/sha256.h index c0f79c18f119..f36f68d2e88c 100644 --- a/lib/crypto/riscv/sha256.h +++ b/lib/crypto/riscv/sha256.h @@ -9,19 +9,19 @@ * Author: Jerry Shih */ +#include #include -#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(struct sha256_block_state *state, const u8 *data, size_t nblocks); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); - static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_extensions) && crypto_simd_usable()) { + if (static_branch_likely(&have_extensions) && likely(may_use_simd())) { kernel_vector_begin(); sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks); kernel_vector_end(); diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index 669bc06538b6..c852396ef319 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -5,7 +5,6 @@ * Copyright 2025 Google LLC */ #include -#include #include DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); @@ -16,7 +15,7 @@ DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); static void c_fn(struct sha256_block_state *state, const u8 *data, \ size_t nblocks) \ { \ - if (likely(crypto_simd_usable())) { \ + if (likely(irq_fpu_usable())) { \ kernel_fpu_begin(); \ asm_fn(state, data, nblocks); \ kernel_fpu_end(); \ -- cgit v1.2.3 From bce5816672ec27085489f096ec27739a4a233b7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:36:51 -0700 Subject: lib/crypto: sha512: Use underlying functions instead of crypto_simd_usable() Since sha512_kunit tests the fallback code paths without using crypto_simd_disabled_for_test, make the SHA-512 code just use the underlying may_use_simd() and irq_fpu_usable() functions directly instead of crypto_simd_usable(). This eliminates an unnecessary layer. Link: https://lore.kernel.org/r/20250731223651.136939-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/sha512.h | 5 ++--- lib/crypto/arm64/sha512.h | 5 ++--- lib/crypto/riscv/sha512.h | 4 +--- lib/crypto/x86/sha512.h | 4 +--- 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h index f147b6490d6c..cc2447acd562 100644 --- a/lib/crypto/arm/sha512.h +++ b/lib/crypto/arm/sha512.h @@ -4,9 +4,8 @@ * * Copyright 2025 Google LLC */ - #include -#include +#include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -19,7 +18,7 @@ static void sha512_blocks(struct sha512_block_state *state, const u8 *data, size_t nblocks) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && likely(crypto_simd_usable())) { + static_branch_likely(&have_neon) && likely(may_use_simd())) { kernel_neon_begin(); sha512_block_data_order_neon(state, data, nblocks); kernel_neon_end(); diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h index 6abb40b467f2..7539ea3fef10 100644 --- a/lib/crypto/arm64/sha512.h +++ b/lib/crypto/arm64/sha512.h @@ -4,9 +4,8 @@ * * Copyright 2025 Google LLC */ - #include -#include +#include #include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha512_insns); @@ -21,7 +20,7 @@ static void sha512_blocks(struct sha512_block_state *state, { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && static_branch_likely(&have_sha512_insns) && - likely(crypto_simd_usable())) { + likely(may_use_simd())) { do { size_t rem; diff --git a/lib/crypto/riscv/sha512.h b/lib/crypto/riscv/sha512.h index 9d0abede322f..59dc0294a9a7 100644 --- a/lib/crypto/riscv/sha512.h +++ b/lib/crypto/riscv/sha512.h @@ -11,7 +11,6 @@ #include #include -#include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); @@ -21,8 +20,7 @@ asmlinkage void sha512_transform_zvknhb_zvkb(struct sha512_block_state *state, static void sha512_blocks(struct sha512_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_extensions) && - likely(crypto_simd_usable())) { + if (static_branch_likely(&have_extensions) && likely(may_use_simd())) { kernel_vector_begin(); sha512_transform_zvknhb_zvkb(state, data, nblocks); kernel_vector_end(); diff --git a/lib/crypto/x86/sha512.h b/lib/crypto/x86/sha512.h index c13503d9d57d..be2c8fc12246 100644 --- a/lib/crypto/x86/sha512.h +++ b/lib/crypto/x86/sha512.h @@ -4,9 +4,7 @@ * * Copyright 2025 Google LLC */ - #include -#include #include DEFINE_STATIC_CALL(sha512_blocks_x86, sha512_blocks_generic); @@ -17,7 +15,7 @@ DEFINE_STATIC_CALL(sha512_blocks_x86, sha512_blocks_generic); static void c_fn(struct sha512_block_state *state, const u8 *data, \ size_t nblocks) \ { \ - if (likely(crypto_simd_usable())) { \ + if (likely(irq_fpu_usable())) { \ kernel_fpu_begin(); \ asm_fn(state, data, nblocks); \ kernel_fpu_end(); \ -- cgit v1.2.3 From e164461349444ad27873e4ab2f492eb4465dbbb0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:49 -0700 Subject: lib/crypto: md5: Add MD5 and HMAC-MD5 library functions Add library functions for MD5, including HMAC support. The MD5 implementation is derived from crypto/md5.c. This closely mirrors the corresponding SHA-1 and SHA-2 changes. Like SHA-1 and SHA-2, support for architecture-optimized MD5 implementations is included. I originally proposed dropping those, but unfortunately there is an AF_ALG user of the PowerPC MD5 code (https://lore.kernel.org/r/c4191597-341d-4fd7-bc3d-13daf7666c41@csgroup.eu/), and dropping that code would be viewed as a performance regression. We don't add new software algorithm implementations purely for AF_ALG, as escalating to kernel mode merely to do calculations that could be done in userspace is inefficient and is completely the wrong design. But since this one already existed, it gets grandfathered in for now. An objection was also raised to dropping the SPARC64 MD5 code because it utilizes the CPU's direct support for MD5, although it remains unclear that anyone is using that. Regardless, we'll keep these around for now. Note that while MD5 is a legacy algorithm that is vulnerable to practical collision attacks, it still has various in-kernel users that implement legacy protocols. Switching to a simple library API, which is the way the code should have been organized originally, will greatly simplify their code. For example: MD5: drivers/md/dm-crypt.c (for lmk IV generation) fs/nfsd/nfs4recover.c fs/ecryptfs/ fs/smb/client/ net/{ipv4,ipv6}/ (for TCP-MD5 signatures) HMAC-MD5: fs/smb/client/ fs/smb/server/ (Also net/sctp/ if it continues using HMAC-MD5 for cookie generation. However, that use case has the flexibility to upgrade to a more modern algorithm, which I'll be proposing instead.) As usual, the "md5" and "hmac(md5)" crypto_shash algorithms will also be reimplemented on top of these library functions. For "hmac(md5)" this will provide a faster, more streamlined implementation. Link: https://lore.kernel.org/r/20250805222855.10362-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 10 ++ lib/crypto/Makefile | 10 ++ lib/crypto/md5.c | 322 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 342 insertions(+) create mode 100644 lib/crypto/md5.c (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 1e6b008f8fca..e38e8ed779d8 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -101,6 +101,16 @@ config CRYPTO_LIB_CURVE25519 config CRYPTO_LIB_DES tristate +config CRYPTO_LIB_MD5 + tristate + help + The MD5 and HMAC-MD5 library functions. Select this if your module + uses any of the functions from . + +config CRYPTO_LIB_MD5_ARCH + bool + depends on CRYPTO_LIB_MD5 && !UML + config CRYPTO_LIB_POLY1305_RSIZE int default 2 if MIPS diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 539d5d59a50e..429573e8f8b3 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -59,6 +59,16 @@ libcurve25519-$(CONFIG_CRYPTO_SELFTESTS) += curve25519-selftest.o obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o libdes-y := des.o +################################################################################ + +obj-$(CONFIG_CRYPTO_LIB_MD5) += libmd5.o +libmd5-y := md5.o +ifeq ($(CONFIG_CRYPTO_LIB_MD5_ARCH),y) +CFLAGS_md5.o += -I$(src)/$(SRCARCH) +endif # CONFIG_CRYPTO_LIB_MD5_ARCH + +################################################################################ + obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o libpoly1305-y += poly1305.o diff --git a/lib/crypto/md5.c b/lib/crypto/md5.c new file mode 100644 index 000000000000..c0610ea1370e --- /dev/null +++ b/lib/crypto/md5.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MD5 and HMAC-MD5 library functions + * + * md5_block_generic() is derived from cryptoapi implementation, originally + * based on the public domain implementation written by Colin Plumb in 1993. + * + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + * Copyright 2025 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct md5_block_state md5_iv = { + .h = { MD5_H0, MD5_H1, MD5_H2, MD5_H3 }, +}; + +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +#define MD5STEP(f, w, x, y, z, in, s) \ + (w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x) + +static void md5_block_generic(struct md5_block_state *state, + const u8 data[MD5_BLOCK_SIZE]) +{ + u32 in[MD5_BLOCK_WORDS]; + u32 a, b, c, d; + + memcpy(in, data, MD5_BLOCK_SIZE); + le32_to_cpu_array(in, ARRAY_SIZE(in)); + + a = state->h[0]; + b = state->h[1]; + c = state->h[2]; + d = state->h[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + state->h[0] += a; + state->h[1] += b; + state->h[2] += c; + state->h[3] += d; +} + +static void __maybe_unused md5_blocks_generic(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + do { + md5_block_generic(state, data); + data += MD5_BLOCK_SIZE; + } while (--nblocks); +} + +#ifdef CONFIG_CRYPTO_LIB_MD5_ARCH +#include "md5.h" /* $(SRCARCH)/md5.h */ +#else +#define md5_blocks md5_blocks_generic +#endif + +void md5_init(struct md5_ctx *ctx) +{ + ctx->state = md5_iv; + ctx->bytecount = 0; +} +EXPORT_SYMBOL_GPL(md5_init); + +void md5_update(struct md5_ctx *ctx, const u8 *data, size_t len) +{ + size_t partial = ctx->bytecount % MD5_BLOCK_SIZE; + + ctx->bytecount += len; + + if (partial + len >= MD5_BLOCK_SIZE) { + size_t nblocks; + + if (partial) { + size_t l = MD5_BLOCK_SIZE - partial; + + memcpy(&ctx->buf[partial], data, l); + data += l; + len -= l; + + md5_blocks(&ctx->state, ctx->buf, 1); + } + + nblocks = len / MD5_BLOCK_SIZE; + len %= MD5_BLOCK_SIZE; + + if (nblocks) { + md5_blocks(&ctx->state, data, nblocks); + data += nblocks * MD5_BLOCK_SIZE; + } + partial = 0; + } + if (len) + memcpy(&ctx->buf[partial], data, len); +} +EXPORT_SYMBOL_GPL(md5_update); + +static void __md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) +{ + u64 bitcount = ctx->bytecount << 3; + size_t partial = ctx->bytecount % MD5_BLOCK_SIZE; + + ctx->buf[partial++] = 0x80; + if (partial > MD5_BLOCK_SIZE - 8) { + memset(&ctx->buf[partial], 0, MD5_BLOCK_SIZE - partial); + md5_blocks(&ctx->state, ctx->buf, 1); + partial = 0; + } + memset(&ctx->buf[partial], 0, MD5_BLOCK_SIZE - 8 - partial); + *(__le64 *)&ctx->buf[MD5_BLOCK_SIZE - 8] = cpu_to_le64(bitcount); + md5_blocks(&ctx->state, ctx->buf, 1); + + cpu_to_le32_array(ctx->state.h, ARRAY_SIZE(ctx->state.h)); + memcpy(out, ctx->state.h, MD5_DIGEST_SIZE); +} + +void md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) +{ + __md5_final(ctx, out); + memzero_explicit(ctx, sizeof(*ctx)); +} +EXPORT_SYMBOL_GPL(md5_final); + +void md5(const u8 *data, size_t len, u8 out[MD5_DIGEST_SIZE]) +{ + struct md5_ctx ctx; + + md5_init(&ctx); + md5_update(&ctx, data, len); + md5_final(&ctx, out); +} +EXPORT_SYMBOL_GPL(md5); + +static void __hmac_md5_preparekey(struct md5_block_state *istate, + struct md5_block_state *ostate, + const u8 *raw_key, size_t raw_key_len) +{ + union { + u8 b[MD5_BLOCK_SIZE]; + unsigned long w[MD5_BLOCK_SIZE / sizeof(unsigned long)]; + } derived_key = { 0 }; + + if (unlikely(raw_key_len > MD5_BLOCK_SIZE)) + md5(raw_key, raw_key_len, derived_key.b); + else + memcpy(derived_key.b, raw_key, raw_key_len); + + for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) + derived_key.w[i] ^= REPEAT_BYTE(HMAC_IPAD_VALUE); + *istate = md5_iv; + md5_blocks(istate, derived_key.b, 1); + + for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) + derived_key.w[i] ^= REPEAT_BYTE(HMAC_OPAD_VALUE ^ + HMAC_IPAD_VALUE); + *ostate = md5_iv; + md5_blocks(ostate, derived_key.b, 1); + + memzero_explicit(&derived_key, sizeof(derived_key)); +} + +void hmac_md5_preparekey(struct hmac_md5_key *key, + const u8 *raw_key, size_t raw_key_len) +{ + __hmac_md5_preparekey(&key->istate, &key->ostate, raw_key, raw_key_len); +} +EXPORT_SYMBOL_GPL(hmac_md5_preparekey); + +void hmac_md5_init(struct hmac_md5_ctx *ctx, const struct hmac_md5_key *key) +{ + ctx->hash_ctx.state = key->istate; + ctx->hash_ctx.bytecount = MD5_BLOCK_SIZE; + ctx->ostate = key->ostate; +} +EXPORT_SYMBOL_GPL(hmac_md5_init); + +void hmac_md5_init_usingrawkey(struct hmac_md5_ctx *ctx, + const u8 *raw_key, size_t raw_key_len) +{ + __hmac_md5_preparekey(&ctx->hash_ctx.state, &ctx->ostate, + raw_key, raw_key_len); + ctx->hash_ctx.bytecount = MD5_BLOCK_SIZE; +} +EXPORT_SYMBOL_GPL(hmac_md5_init_usingrawkey); + +void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) +{ + /* Generate the padded input for the outer hash in ctx->hash_ctx.buf. */ + __md5_final(&ctx->hash_ctx, ctx->hash_ctx.buf); + memset(&ctx->hash_ctx.buf[MD5_DIGEST_SIZE], 0, + MD5_BLOCK_SIZE - MD5_DIGEST_SIZE); + ctx->hash_ctx.buf[MD5_DIGEST_SIZE] = 0x80; + *(__le64 *)&ctx->hash_ctx.buf[MD5_BLOCK_SIZE - 8] = + cpu_to_le64(8 * (MD5_BLOCK_SIZE + MD5_DIGEST_SIZE)); + + /* Compute the outer hash, which gives the HMAC value. */ + md5_blocks(&ctx->ostate, ctx->hash_ctx.buf, 1); + cpu_to_le32_array(ctx->ostate.h, ARRAY_SIZE(ctx->ostate.h)); + memcpy(out, ctx->ostate.h, MD5_DIGEST_SIZE); + + memzero_explicit(ctx, sizeof(*ctx)); +} +EXPORT_SYMBOL_GPL(hmac_md5_final); + +void hmac_md5(const struct hmac_md5_key *key, + const u8 *data, size_t data_len, u8 out[MD5_DIGEST_SIZE]) +{ + struct hmac_md5_ctx ctx; + + hmac_md5_init(&ctx, key); + hmac_md5_update(&ctx, data, data_len); + hmac_md5_final(&ctx, out); +} +EXPORT_SYMBOL_GPL(hmac_md5); + +void hmac_md5_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[MD5_DIGEST_SIZE]) +{ + struct hmac_md5_ctx ctx; + + hmac_md5_init_usingrawkey(&ctx, raw_key, raw_key_len); + hmac_md5_update(&ctx, data, data_len); + hmac_md5_final(&ctx, out); +} +EXPORT_SYMBOL_GPL(hmac_md5_usingrawkey); + +#ifdef md5_mod_init_arch +static int __init md5_mod_init(void) +{ + md5_mod_init_arch(); + return 0; +} +subsys_initcall(md5_mod_init); + +static void __exit md5_mod_exit(void) +{ +} +module_exit(md5_mod_exit); +#endif + +MODULE_DESCRIPTION("MD5 and HMAC-MD5 library functions"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c9e5ac0ab9d1235f1a91852ec3d3c5c5f3e8ba0e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:50 -0700 Subject: lib/crypto: mips/md5: Migrate optimized code into library Instead of exposing the mips-optimized MD5 code via mips-specific crypto_shash algorithms, instead just implement the md5_blocks() library function. This is much simpler, it makes the MD5 library functions be mips-optimized, and it fixes the longstanding issue where the mips-optimized MD5 code was disabled by default. MD5 still remains available through crypto_shash, but individual architectures no longer need to handle it. Note: to see the diff from arch/mips/cavium-octeon/crypto/octeon-md5.c to lib/crypto/mips/md5.h, view this commit with 'git show -M10'. Link: https://lore.kernel.org/r/20250805222855.10362-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 1 + lib/crypto/mips/md5.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 lib/crypto/mips/md5.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index e38e8ed779d8..7b4e47ce37bb 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -110,6 +110,7 @@ config CRYPTO_LIB_MD5 config CRYPTO_LIB_MD5_ARCH bool depends on CRYPTO_LIB_MD5 && !UML + default y if MIPS && CPU_CAVIUM_OCTEON config CRYPTO_LIB_POLY1305_RSIZE int diff --git a/lib/crypto/mips/md5.h b/lib/crypto/mips/md5.h new file mode 100644 index 000000000000..e08e28aeffa4 --- /dev/null +++ b/lib/crypto/mips/md5.h @@ -0,0 +1,65 @@ +/* + * Cryptographic API. + * + * MD5 Message Digest Algorithm (RFC1321). + * + * Adapted for OCTEON by Aaro Koskinen . + * + * Based on crypto/md5.c, which is: + * + * Derived from cryptoapi implementation, originally based on the + * public domain implementation written by Colin Plumb in 1993. + * + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include + +/* + * We pass everything as 64-bit. OCTEON can handle misaligned data. + */ + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + struct octeon_cop2_state cop2_state; + u64 *state64 = (u64 *)state; + unsigned long flags; + + if (!octeon_has_crypto()) + return md5_blocks_generic(state, data, nblocks); + + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + + flags = octeon_crypto_enable(&cop2_state); + write_octeon_64bit_hash_dword(state64[0], 0); + write_octeon_64bit_hash_dword(state64[1], 1); + + do { + const u64 *block = (const u64 *)data; + + write_octeon_64bit_block_dword(block[0], 0); + write_octeon_64bit_block_dword(block[1], 1); + write_octeon_64bit_block_dword(block[2], 2); + write_octeon_64bit_block_dword(block[3], 3); + write_octeon_64bit_block_dword(block[4], 4); + write_octeon_64bit_block_dword(block[5], 5); + write_octeon_64bit_block_dword(block[6], 6); + octeon_md5_start(block[7]); + + data += MD5_BLOCK_SIZE; + } while (--nblocks); + + state64[0] = read_octeon_64bit_hash_dword(0); + state64[1] = read_octeon_64bit_hash_dword(1); + octeon_crypto_disable(&cop2_state, flags); + + le32_to_cpu_array(state->h, ARRAY_SIZE(state->h)); +} -- cgit v1.2.3 From 09371e1349c9bb34ac030973c7867016a8a8914d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:52 -0700 Subject: lib/crypto: powerpc/md5: Migrate optimized code into library Instead of exposing the powerpc-optimized MD5 code via powerpc-specific crypto_shash algorithms, instead just implement the md5_blocks() library function. This is much simpler, it makes the MD5 library functions be powerpc-optimized, and it fixes the longstanding issue where the powerpc-optimized MD5 code was disabled by default. MD5 still remains available through crypto_shash, but individual architectures no longer need to handle it. Link: https://lore.kernel.org/r/20250805222855.10362-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 1 + lib/crypto/Makefile | 1 + lib/crypto/powerpc/md5-asm.S | 235 +++++++++++++++++++++++++++++++++++++++++++ lib/crypto/powerpc/md5.h | 12 +++ 4 files changed, 249 insertions(+) create mode 100644 lib/crypto/powerpc/md5-asm.S create mode 100644 lib/crypto/powerpc/md5.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 7b4e47ce37bb..7334ddc70e59 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -111,6 +111,7 @@ config CRYPTO_LIB_MD5_ARCH bool depends on CRYPTO_LIB_MD5 && !UML default y if MIPS && CPU_CAVIUM_OCTEON + default y if PPC config CRYPTO_LIB_POLY1305_RSIZE int diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 429573e8f8b3..b3986dde676b 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_CRYPTO_LIB_MD5) += libmd5.o libmd5-y := md5.o ifeq ($(CONFIG_CRYPTO_LIB_MD5_ARCH),y) CFLAGS_md5.o += -I$(src)/$(SRCARCH) +libmd5-$(CONFIG_PPC) += powerpc/md5-asm.o endif # CONFIG_CRYPTO_LIB_MD5_ARCH ################################################################################ diff --git a/lib/crypto/powerpc/md5-asm.S b/lib/crypto/powerpc/md5-asm.S new file mode 100644 index 000000000000..fa6bc440cf4a --- /dev/null +++ b/lib/crypto/powerpc/md5-asm.S @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Fast MD5 implementation for PPC + * + * Copyright (c) 2015 Markus Stockhausen + */ +#include +#include +#include + +#define rHP r3 +#define rWP r4 + +#define rH0 r0 +#define rH1 r6 +#define rH2 r7 +#define rH3 r5 + +#define rW00 r8 +#define rW01 r9 +#define rW02 r10 +#define rW03 r11 +#define rW04 r12 +#define rW05 r14 +#define rW06 r15 +#define rW07 r16 +#define rW08 r17 +#define rW09 r18 +#define rW10 r19 +#define rW11 r20 +#define rW12 r21 +#define rW13 r22 +#define rW14 r23 +#define rW15 r24 + +#define rT0 r25 +#define rT1 r26 + +#define INITIALIZE \ + PPC_STLU r1,-INT_FRAME_SIZE(r1); \ + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ + +#define FINALIZE \ + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ + addi r1,r1,INT_FRAME_SIZE + +#ifdef __BIG_ENDIAN__ +#define LOAD_DATA(reg, off) \ + lwbrx reg,0,rWP; /* load data */ +#define INC_PTR \ + addi rWP,rWP,4; /* increment per word */ +#define NEXT_BLOCK /* nothing to do */ +#else +#define LOAD_DATA(reg, off) \ + lwz reg,off(rWP); /* load data */ +#define INC_PTR /* nothing to do */ +#define NEXT_BLOCK \ + addi rWP,rWP,64; /* increment per block */ +#endif + +#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ + LOAD_DATA(w0, off) /* W */ \ + and rT0,b,c; /* 1: f = b and c */ \ + INC_PTR /* ptr++ */ \ + andc rT1,d,b; /* 1: f' = ~b and d */ \ + LOAD_DATA(w1, off+4) /* W */ \ + or rT0,rT0,rT1; /* 1: f = f or f' */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + addis w1,w1,k1h; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addi w1,w1,k1l; /* 2: wk = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add d,d,w1; /* 2: a = a + wk */ \ + add a,a,b; /* 1: a = a + b */ \ + and rT0,a,b; /* 2: f = b and c */ \ + andc rT1,c,a; /* 2: f' = ~b and d */ \ + or rT0,rT0,rT1; /* 2: f = f or f' */ \ + add d,d,rT0; /* 2: a = a + f */ \ + INC_PTR /* ptr++ */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + andc rT0,c,d; /* 1: f = c and ~d */ \ + and rT1,b,d; /* 1: f' = b and d */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + or rT0,rT0,rT1; /* 1: f = f or f' */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addi w1,w1,k1l; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addis w1,w1,k1h; /* 2: wk = w + k' */ \ + andc rT0,b,c; /* 2: f = c and ~d */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add a,a,b; /* 1: a = a + b */ \ + add d,d,w1; /* 2: a = a + wk */ \ + and rT1,a,c; /* 2: f' = b and d */ \ + or rT0,rT0,rT1; /* 2: f = f or f' */ \ + add d,d,rT0; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a +b */ + +#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + xor rT0,b,c; /* 1: f' = b xor c */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + xor rT1,rT0,d; /* 1: f = f xor f' */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + add a,a,rT1; /* 1: a = a + f */ \ + addi w1,w1,k1l; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addis w1,w1,k1h; /* 2: wk = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add d,d,w1; /* 2: a = a + wk */ \ + add a,a,b; /* 1: a = a + b */ \ + xor rT1,rT0,a; /* 2: f = b xor f' */ \ + add d,d,rT1; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + addi w0,w0,k0l; /* 1: w = w + k */ \ + orc rT0,b,d; /* 1: f = b or ~d */ \ + addis w0,w0,k0h; /* 1: w = w + k' */ \ + xor rT0,rT0,c; /* 1: f = f xor c */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addi w1,w1,k1l; /* 2: w = w + k */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addis w1,w1,k1h; /* 2: w = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add a,a,b; /* 1: a = a + b */ \ + orc rT0,a,c; /* 2: f = b or ~d */ \ + add d,d,w1; /* 2: a = a + wk */ \ + xor rT0,rT0,b; /* 2: f = f xor c */ \ + add d,d,rT0; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +_GLOBAL(ppc_md5_transform) + INITIALIZE + + mtctr r5 + lwz rH0,0(rHP) + lwz rH1,4(rHP) + lwz rH2,8(rHP) + lwz rH3,12(rHP) + +ppc_md5_main: + R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, + 0xd76b, -23432, 0xe8c8, -18602) + R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, + 0x2420, 0x70db, 0xc1be, -12562) + R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, + 0xf57c, 0x0faf, 0x4788, -14806) + R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, + 0xa830, 0x4613, 0xfd47, -27391) + R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, + 0x6981, -26408, 0x8b45, -2129) + R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, + 0xffff, 0x5bb1, 0x895d, -10306) + R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, + 0x6b90, 0x1122, 0xfd98, 0x7193) + R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, + 0xa679, 0x438e, 0x49b4, 0x0821) + + R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, + 0x0d56, 0x6e0c, 0x1810, 0x6d2d) + R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, + 0x9d02, -32109, 0x124c, 0x2332) + R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, + 0x8ea7, 0x4a33, 0x0245, -18270) + R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, + 0x8eee, -8608, 0xf258, -5095) + R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, + 0x969d, -10697, 0x1cbe, -15288) + R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, + 0x3317, 0x3e99, 0xdbd9, 0x7c15) + R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, + 0xac4b, 0x7772, 0xd8cf, 0x331d) + R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, + 0x6a28, 0x6dd8, 0x219a, 0x3b68) + + R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, + 0x29cb, 0x28e5, 0x4218, -7788) + R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, + 0x473f, 0x06d1, 0x3aae, 0x3036) + R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, + 0xaea1, -15134, 0x640b, -11295) + R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, + 0x8f4c, 0x4887, 0xbc7c, -22499) + R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, + 0x7eb8, -27199, 0x00ea, 0x6050) + R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, + 0xe01a, 0x22fe, 0x4447, 0x69c5) + R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, + 0xb7f3, 0x0253, 0x59b1, 0x4d5b) + R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, + 0x4701, -27017, 0xc7bd, -19859) + + R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, + 0x0988, -1462, 0x4c70, -19401) + R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, + 0xadaf, -5221, 0xfc99, 0x66f7) + R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, + 0x7e80, -16418, 0xba1e, -25587) + R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, + 0x4130, 0x380d, 0xe0c5, 0x738d) + lwz rW00,0(rHP) + R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, + 0xe837, -30770, 0xde8a, 0x69e8) + lwz rW14,4(rHP) + R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, + 0x9e79, 0x260f, 0x256d, -27941) + lwz rW12,8(rHP) + R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, + 0xab75, -20775, 0x4f9e, -28397) + lwz rW10,12(rHP) + R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, + 0x662b, 0x7c56, 0x11b2, 0x0358) + + add rH0,rH0,rW00 + stw rH0,0(rHP) + add rH1,rH1,rW14 + stw rH1,4(rHP) + add rH2,rH2,rW12 + stw rH2,8(rHP) + add rH3,rH3,rW10 + stw rH3,12(rHP) + NEXT_BLOCK + + bdnz ppc_md5_main + + FINALIZE + blr diff --git a/lib/crypto/powerpc/md5.h b/lib/crypto/powerpc/md5.h new file mode 100644 index 000000000000..540b08e34d1d --- /dev/null +++ b/lib/crypto/powerpc/md5.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * MD5 optimized for PowerPC + */ + +void ppc_md5_transform(u32 *state, const u8 *data, size_t nblocks); + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + ppc_md5_transform(state->h, data, nblocks); +} -- cgit v1.2.3 From a1848f6e382145e0200843549f24f5af4b5c8136 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:53 -0700 Subject: lib/crypto: sparc/md5: Migrate optimized code into library Instead of exposing the sparc-optimized MD5 code via sparc-specific crypto_shash algorithms, instead just implement the md5_blocks() library function. This is much simpler, it makes the MD5 library functions be sparc-optimized, and it fixes the longstanding issue where the sparc-optimized MD5 code was disabled by default. MD5 still remains available through crypto_shash, but individual architectures no longer need to handle it. Note: to see the diff from arch/sparc/crypto/md5_glue.c to lib/crypto/sparc/md5.h, view this commit with 'git show -M10'. Link: https://lore.kernel.org/r/20250805222855.10362-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 1 + lib/crypto/Makefile | 1 + lib/crypto/sparc/md5.h | 48 +++++++++++++++++++++++++++++++ lib/crypto/sparc/md5_asm.S | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 lib/crypto/sparc/md5.h create mode 100644 lib/crypto/sparc/md5_asm.S (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 7334ddc70e59..79b848448e07 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -112,6 +112,7 @@ config CRYPTO_LIB_MD5_ARCH depends on CRYPTO_LIB_MD5 && !UML default y if MIPS && CPU_CAVIUM_OCTEON default y if PPC + default y if SPARC64 config CRYPTO_LIB_POLY1305_RSIZE int diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index b3986dde676b..d362636a22d3 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -66,6 +66,7 @@ libmd5-y := md5.o ifeq ($(CONFIG_CRYPTO_LIB_MD5_ARCH),y) CFLAGS_md5.o += -I$(src)/$(SRCARCH) libmd5-$(CONFIG_PPC) += powerpc/md5-asm.o +libmd5-$(CONFIG_SPARC) += sparc/md5_asm.o endif # CONFIG_CRYPTO_LIB_MD5_ARCH ################################################################################ diff --git a/lib/crypto/sparc/md5.h b/lib/crypto/sparc/md5.h new file mode 100644 index 000000000000..3f1b0ed8c0b3 --- /dev/null +++ b/lib/crypto/sparc/md5.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * MD5 accelerated using the sparc64 crypto opcodes + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + */ + +#include +#include +#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_md5_opcodes); + +asmlinkage void md5_sparc64_transform(struct md5_block_state *state, + const u8 *data, size_t nblocks); + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_md5_opcodes)) { + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + md5_sparc64_transform(state, data, nblocks); + le32_to_cpu_array(state->h, ARRAY_SIZE(state->h)); + } else { + md5_blocks_generic(state, data, nblocks); + } +} + +#define md5_mod_init_arch md5_mod_init_arch +static inline void md5_mod_init_arch(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_MD5)) + return; + + static_branch_enable(&have_md5_opcodes); + pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); +} diff --git a/lib/crypto/sparc/md5_asm.S b/lib/crypto/sparc/md5_asm.S new file mode 100644 index 000000000000..60b544e4d205 --- /dev/null +++ b/lib/crypto/sparc/md5_asm.S @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + +ENTRY(md5_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x08], %f2 + bne,pn %xcc, 10f + ld [%o0 + 0x0c], %f3 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + MD5 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + MD5 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(md5_sparc64_transform) -- cgit v1.2.3 From 4b59300ba4d2362b02c2a9077047bbabceea67d7 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:31 +0800 Subject: kunit: Add parent kunit for parameterized test context Currently, KUnit parameterized tests lack a mechanism to share resources across parameter runs because the same `struct kunit` instance is cleaned up and reused for each run. This patch introduces parameterized test context, enabling test users to share resources between parameter runs. It also allows setting up resources that need to be available for all parameter runs only once, which is helpful in cases where setup is expensive. To establish a parameterized test context, this patch adds a parent pointer field to `struct kunit`. This allows resources added to the parent `struct kunit` to be shared and accessible across all parameter runs. In kunit_run_tests(), the default `struct kunit` created is now designated to act as the parameterized test context whenever a test is parameterized. Subsequently, a new `struct kunit` is made for each parameter run, and its parent pointer is set to the `struct kunit` that holds the parameterized test context. Link: https://lore.kernel.org/r/20250826091341.1427123-2-davidgow@google.com Reviewed-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Marie Zhussupova Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/kunit/test.c b/lib/kunit/test.c index d2bfa331a2b1..587b5c51db58 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -647,6 +647,7 @@ int kunit_run_tests(struct kunit_suite *suite) struct kunit_case *test_case; struct kunit_result_stats suite_stats = { 0 }; struct kunit_result_stats total_stats = { 0 }; + const void *curr_param; /* Taint the kernel so we know we've run tests. */ add_taint(TAINT_TEST, LOCKDEP_STILL_OK); @@ -679,37 +680,42 @@ int kunit_run_tests(struct kunit_suite *suite) } else { /* Get initial param. */ param_desc[0] = '\0'; - test.param_value = test_case->generate_params(NULL, param_desc); + /* TODO: Make generate_params try-catch */ + curr_param = test_case->generate_params(NULL, param_desc); test_case->status = KUNIT_SKIPPED; kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT "KTAP version 1\n"); kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT "# Subtest: %s", test_case->name); - while (test.param_value) { - kunit_run_case_catch_errors(suite, test_case, &test); + while (curr_param) { + struct kunit param_test = { + .param_value = curr_param, + .param_index = ++test.param_index, + .parent = &test, + }; + kunit_init_test(¶m_test, test_case->name, test_case->log); + kunit_run_case_catch_errors(suite, test_case, ¶m_test); if (param_desc[0] == '\0') { snprintf(param_desc, sizeof(param_desc), - "param-%d", test.param_index); + "param-%d", param_test.param_index); } - kunit_print_ok_not_ok(&test, KUNIT_LEVEL_CASE_PARAM, - test.status, - test.param_index + 1, + kunit_print_ok_not_ok(¶m_test, KUNIT_LEVEL_CASE_PARAM, + param_test.status, + param_test.param_index, param_desc, - test.status_comment); + param_test.status_comment); - kunit_update_stats(¶m_stats, test.status); + kunit_update_stats(¶m_stats, param_test.status); /* Get next param. */ param_desc[0] = '\0'; - test.param_value = test_case->generate_params(test.param_value, param_desc); - test.param_index++; - test.status = KUNIT_SUCCESS; - test.status_comment[0] = '\0'; - test.priv = NULL; + curr_param = test_case->generate_params(curr_param, param_desc); } + /* TODO: Put this kunit_cleanup into a try-catch. */ + kunit_cleanup(&test); } kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE); -- cgit v1.2.3 From 241423580e5e8d8b10b14b382379f4928b87be17 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:32 +0800 Subject: kunit: Introduce param_init/exit for parameterized test context management Add (*param_init) and (*param_exit) function pointers to `struct kunit_case`. Users will be able to set them via the new KUNIT_CASE_PARAM_WITH_INIT() macro. param_init/exit will be invoked by kunit_run_tests() once before and once after the parameterized test, respectively. They will receive the `struct kunit` that holds the parameterized test context; facilitating init and exit for shared state. This patch also sets param_init/exit to None in rust/kernel/kunit.rs. Link: https://lore.kernel.org/r/20250826091341.1427123-3-davidgow@google.com Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Marie Zhussupova Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 587b5c51db58..0fe61dec5a96 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -641,6 +641,20 @@ static void kunit_accumulate_stats(struct kunit_result_stats *total, total->total += add.total; } +static void kunit_init_parent_param_test(struct kunit_case *test_case, struct kunit *test) +{ + if (test_case->param_init) { + int err = test_case->param_init(test); + + if (err) { + kunit_err(test_case, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT + "# failed to initialize parent parameter test (%d)", err); + test->status = KUNIT_FAILURE; + test_case->status = KUNIT_FAILURE; + } + } +} + int kunit_run_tests(struct kunit_suite *suite) { char param_desc[KUNIT_PARAM_DESC_SIZE]; @@ -678,6 +692,11 @@ int kunit_run_tests(struct kunit_suite *suite) kunit_run_case_catch_errors(suite, test_case, &test); kunit_update_stats(¶m_stats, test.status); } else { + kunit_init_parent_param_test(test_case, &test); + if (test_case->status == KUNIT_FAILURE) { + kunit_update_stats(¶m_stats, test.status); + goto test_case_end; + } /* Get initial param. */ param_desc[0] = '\0'; /* TODO: Make generate_params try-catch */ @@ -714,10 +733,16 @@ int kunit_run_tests(struct kunit_suite *suite) param_desc[0] = '\0'; curr_param = test_case->generate_params(curr_param, param_desc); } + /* + * TODO: Put into a try catch. Since we don't need suite->exit + * for it we can't reuse kunit_try_run_cleanup for this yet. + */ + if (test_case->param_exit) + test_case->param_exit(&test); /* TODO: Put this kunit_cleanup into a try-catch. */ kunit_cleanup(&test); } - +test_case_end: kunit_print_attr((void *)test_case, true, KUNIT_LEVEL_CASE); kunit_print_test_stats(&test, param_stats); -- cgit v1.2.3 From b9a214b5f6aa55870b5678f31084f85c0c11ffdc Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:33 +0800 Subject: kunit: Pass parameterized test context to generate_params() To enable more complex parameterized testing scenarios, the generate_params() function needs additional context beyond just the previously generated parameter. This patch modifies the generate_params() function signature to include an extra `struct kunit *test` argument, giving test users access to the parameterized test context when generating parameters. The `struct kunit *test` argument was added as the first parameter to the function signature as it aligns with the convention of other KUnit functions that accept `struct kunit *test` first. This also mirrors the "this" or "self" reference found in object-oriented programming languages. This patch also modifies xe_pci_live_device_gen_param() in xe_pci.c and nthreads_gen_params() in kcsan_test.c to reflect this signature change. Link: https://lore.kernel.org/r/20250826091341.1427123-4-davidgow@google.com Reviewed-by: David Gow Reviewed-by: Rae Moar Acked-by: Marco Elver Acked-by: Rodrigo Vivi Signed-off-by: Marie Zhussupova [Catch some additional gen_params signatures in drm/xe/tests --David] Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 0fe61dec5a96..50705248abad 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -700,7 +700,7 @@ int kunit_run_tests(struct kunit_suite *suite) /* Get initial param. */ param_desc[0] = '\0'; /* TODO: Make generate_params try-catch */ - curr_param = test_case->generate_params(NULL, param_desc); + curr_param = test_case->generate_params(&test, NULL, param_desc); test_case->status = KUNIT_SKIPPED; kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT "KTAP version 1\n"); @@ -731,7 +731,8 @@ int kunit_run_tests(struct kunit_suite *suite) /* Get next param. */ param_desc[0] = '\0'; - curr_param = test_case->generate_params(curr_param, param_desc); + curr_param = test_case->generate_params(&test, curr_param, + param_desc); } /* * TODO: Put into a try catch. Since we don't need suite->exit -- cgit v1.2.3 From b820b9077b7f4008cc44a40261aefa681c63c7d3 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:34 +0800 Subject: kunit: Enable direct registration of parameter arrays to a KUnit test KUnit parameterized tests currently support two primary methods f or getting parameters: 1. Defining custom logic within a generate_params() function. 2. Using the KUNIT_ARRAY_PARAM() and KUNIT_ARRAY_PARAM_DESC() macros with a pre-defined static array and passing the created *_gen_params() to KUNIT_CASE_PARAM(). These methods present limitations when dealing with dynamically generated parameter arrays, or in scenarios where populating parameters sequentially via generate_params() is inefficient or overly complex. This patch addresses these limitations by adding a new `params_array` field to `struct kunit`, of the type `kunit_params`. The `struct kunit_params` is designed to store the parameter array itself, along with essential metadata including the parameter count, parameter size, and a get_description() function for providing custom descriptions for individual parameters. The `params_array` field can be populated by calling the new kunit_register_params_array() macro from within a param_init() function. This will register the array as part of the parameterized test context. The user will then need to pass kunit_array_gen_params() to the KUNIT_CASE_PARAM_WITH_INIT() macro as the generator function, if not providing their own. kunit_array_gen_params() is a KUnit helper that will use the registered array to generate parameters. The arrays passed to KUNIT_ARRAY_PARAM(,DESC) will also be registered to the parameterized test context for consistency as well as for higher availability of the parameter count that will be used for outputting a KTAP test plan for a parameterized test. This modification provides greater flexibility to the KUnit framework, allowing testers to easily register and utilize both dynamic and static parameter arrays. Link: https://lore.kernel.org/r/20250826091341.1427123-5-davidgow@google.com Reviewed-by: David Gow Reviewed-by: Rae Moar Signed-off-by: Marie Zhussupova [Only output the test plan if using kunit_array_gen_params --David] Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'lib') diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 50705248abad..bb66ea1a3eac 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -337,6 +337,14 @@ void __kunit_do_failed_assertion(struct kunit *test, } EXPORT_SYMBOL_GPL(__kunit_do_failed_assertion); +static void kunit_init_params(struct kunit *test) +{ + test->params_array.params = NULL; + test->params_array.get_description = NULL; + test->params_array.num_params = 0; + test->params_array.elem_size = 0; +} + void kunit_init_test(struct kunit *test, const char *name, struct string_stream *log) { spin_lock_init(&test->lock); @@ -347,6 +355,7 @@ void kunit_init_test(struct kunit *test, const char *name, struct string_stream string_stream_clear(log); test->status = KUNIT_SUCCESS; test->status_comment[0] = '\0'; + kunit_init_params(test); } EXPORT_SYMBOL_GPL(kunit_init_test); @@ -641,6 +650,23 @@ static void kunit_accumulate_stats(struct kunit_result_stats *total, total->total += add.total; } +const void *kunit_array_gen_params(struct kunit *test, const void *prev, char *desc) +{ + struct kunit_params *params_arr = &test->params_array; + const void *param; + + if (test->param_index < params_arr->num_params) { + param = (char *)params_arr->params + + test->param_index * params_arr->elem_size; + + if (params_arr->get_description) + params_arr->get_description(test, param, desc); + return param; + } + return NULL; +} +EXPORT_SYMBOL_GPL(kunit_array_gen_params); + static void kunit_init_parent_param_test(struct kunit_case *test_case, struct kunit *test) { if (test_case->param_init) { @@ -706,6 +732,12 @@ int kunit_run_tests(struct kunit_suite *suite) "KTAP version 1\n"); kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT KUNIT_SUBTEST_INDENT "# Subtest: %s", test_case->name); + if (test.params_array.params && + test_case->generate_params == kunit_array_gen_params) { + kunit_log(KERN_INFO, &test, KUNIT_SUBTEST_INDENT + KUNIT_SUBTEST_INDENT "1..%zd\n", + test.params_array.num_params); + } while (curr_param) { struct kunit param_test = { -- cgit v1.2.3 From a03e3caa0e961cadbab18ed104fb2c20a254c50d Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:35 +0800 Subject: kunit: Add example parameterized test with shared resource management using the Resource API Add example_params_test_with_init() to illustrate how to manage shared resources across a parameterized KUnit test. This example showcases the use of the new param_init() function and its registration to a test using the KUNIT_CASE_PARAM_WITH_INIT() macro. Additionally, the test demonstrates how to directly pass a parameter array to the parameterized test context via kunit_register_params_array() and leveraging the Resource API for shared resource management. Link: https://lore.kernel.org/r/20250826091341.1427123-6-davidgow@google.com Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Marie Zhussupova Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/kunit-example-test.c | 113 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) (limited to 'lib') diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c index 3056d6bc705d..3e858367be01 100644 --- a/lib/kunit/kunit-example-test.c +++ b/lib/kunit/kunit-example-test.c @@ -277,6 +277,117 @@ static void example_slow_test(struct kunit *test) KUNIT_EXPECT_EQ(test, 1 + 1, 2); } +/* + * This custom function allocates memory and sets the information we want + * stored in the kunit_resource->data field. + */ +static int example_resource_init(struct kunit_resource *res, void *context) +{ + int *info = kmalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return -ENOMEM; + *info = *(int *)context; + res->data = info; + return 0; +} + +/* + * This function deallocates memory for the kunit_resource->data field. + */ +static void example_resource_free(struct kunit_resource *res) +{ + kfree(res->data); +} + +/* + * This match function is invoked by kunit_find_resource() to locate + * a test resource based on certain criteria. + */ +static bool example_resource_alloc_match(struct kunit *test, + struct kunit_resource *res, + void *match_data) +{ + return res->data && res->free == example_resource_free; +} + +/* + * This is an example of a function that provides a description for each of the + * parameters in a parameterized test. + */ +static void example_param_array_get_desc(struct kunit *test, const void *p, char *desc) +{ + const struct example_param *param = p; + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, + "example check if %d is less than or equal to 3", param->value); +} + +/* + * This function gets passed in the parameterized test context i.e. the + * struct kunit belonging to the parameterized test. You can use this function + * to add resources you want shared across the whole parameterized test or + * for additional setup. + */ +static int example_param_init(struct kunit *test) +{ + int ctx = 3; /* Data to be stored. */ + size_t arr_size = ARRAY_SIZE(example_params_array); + + /* + * This allocates a struct kunit_resource, sets its data field to + * ctx, and adds it to the struct kunit's resources list. Note that + * this is parameterized test managed. So, it doesn't need to have + * a custom exit function to deallocation as it will get cleaned up at + * the end of the parameterized test. + */ + void *data = kunit_alloc_resource(test, example_resource_init, example_resource_free, + GFP_KERNEL, &ctx); + + if (!data) + return -ENOMEM; + /* + * Pass the parameter array information to the parameterized test context + * struct kunit. Note that you will need to provide kunit_array_gen_params() + * as the generator function to KUNIT_CASE_PARAM_WITH_INIT() when registering + * a parameter array this route. + */ + kunit_register_params_array(test, example_params_array, arr_size, + example_param_array_get_desc); + return 0; +} + +/* + * This is an example of a test that uses shared resources available in the + * parameterized test context. + */ +static void example_params_test_with_init(struct kunit *test) +{ + int threshold; + struct kunit_resource *res; + const struct example_param *param = test->param_value; + + /* By design, param pointer will not be NULL. */ + KUNIT_ASSERT_NOT_NULL(test, param); + + /* + * Here we pass test->parent to search for shared resources in the + * parameterized test context. + */ + res = kunit_find_resource(test->parent, example_resource_alloc_match, NULL); + + KUNIT_ASSERT_NOT_NULL(test, res); + + /* Since kunit_resource->data is a void pointer we need to typecast it. */ + threshold = *((int *)res->data); + + /* Assert that the parameter is less than or equal to a certain threshold. */ + KUNIT_ASSERT_LE(test, param->value, threshold); + + /* This decreases the reference count after calling kunit_find_resource(). */ + kunit_put_resource(res); +} + /* * Here we make a list of all the test cases we want to add to the test suite * below. @@ -296,6 +407,8 @@ static struct kunit_case example_test_cases[] = { KUNIT_CASE(example_static_stub_using_fn_ptr_test), KUNIT_CASE(example_priv_test), KUNIT_CASE_PARAM(example_params_test, example_gen_params), + KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init, kunit_array_gen_params, + example_param_init, NULL), KUNIT_CASE_SLOW(example_slow_test), {} }; -- cgit v1.2.3 From f9687f351ff9ec3bd8b74fd7873d2b5940829a81 Mon Sep 17 00:00:00 2001 From: Marie Zhussupova Date: Tue, 26 Aug 2025 17:13:36 +0800 Subject: kunit: Add example parameterized test with direct dynamic parameter array setup Introduce example_params_test_with_init_dynamic_arr(). This new KUnit test demonstrates directly assigning a dynamic parameter array, using the kunit_register_params_array() macro, to a parameterized test context. It highlights the use of param_init() and param_exit() for initialization and exit of a parameterized test, and their registration to the test case with KUNIT_CASE_PARAM_WITH_INIT(). Link: https://lore.kernel.org/r/20250826091341.1427123-7-davidgow@google.com Reviewed-by: Rae Moar Reviewed-by: David Gow Signed-off-by: Marie Zhussupova Signed-off-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/kunit-example-test.c | 104 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) (limited to 'lib') diff --git a/lib/kunit/kunit-example-test.c b/lib/kunit/kunit-example-test.c index 3e858367be01..9452b163956f 100644 --- a/lib/kunit/kunit-example-test.c +++ b/lib/kunit/kunit-example-test.c @@ -388,6 +388,107 @@ static void example_params_test_with_init(struct kunit *test) kunit_put_resource(res); } +/* + * Helper function to create a parameter array of Fibonacci numbers. This example + * highlights a parameter generation scenario that is: + * 1. Not feasible to fully pre-generate at compile time. + * 2. Challenging to implement with a standard generate_params() function, + * as it only provides the previous parameter, while Fibonacci requires + * access to two preceding values for calculation. + */ +static void *make_fibonacci_params(struct kunit *test, size_t seq_size) +{ + int *seq; + + if (seq_size <= 0) + return NULL; + /* + * Using kunit_kmalloc_array here ties the lifetime of the array to + * the parameterized test i.e. it will get automatically cleaned up + * by KUnit after the parameterized test finishes. + */ + seq = kunit_kmalloc_array(test, seq_size, sizeof(int), GFP_KERNEL); + + if (!seq) + return NULL; + if (seq_size >= 1) + seq[0] = 0; + if (seq_size >= 2) + seq[1] = 1; + for (int i = 2; i < seq_size; i++) + seq[i] = seq[i - 1] + seq[i - 2]; + return seq; +} + +/* + * This is an example of a function that provides a description for each of the + * parameters. + */ +static void example_param_dynamic_arr_get_desc(struct kunit *test, const void *p, char *desc) +{ + const int *fib_num = p; + + snprintf(desc, KUNIT_PARAM_DESC_SIZE, "fibonacci param: %d", *fib_num); +} + +/* + * Example of a parameterized test param_init() function that registers a dynamic + * array of parameters. + */ +static int example_param_init_dynamic_arr(struct kunit *test) +{ + size_t seq_size; + int *fibonacci_params; + + kunit_info(test, "initializing parameterized test\n"); + + seq_size = 6; + fibonacci_params = make_fibonacci_params(test, seq_size); + + if (!fibonacci_params) + return -ENOMEM; + + /* + * Passes the dynamic parameter array information to the parameterized test + * context struct kunit. The array and its metadata will be stored in + * test->parent->params_array. The array itself will be located in + * params_data.params. + * + * Note that you will need to pass kunit_array_gen_params() as the + * generator function to KUNIT_CASE_PARAM_WITH_INIT() when registering + * a parameter array this route. + */ + kunit_register_params_array(test, fibonacci_params, seq_size, + example_param_dynamic_arr_get_desc); + return 0; +} + +/* + * Example of a parameterized test param_exit() function that outputs a log + * at the end of the parameterized test. It could also be used for any other + * teardown logic. + */ +static void example_param_exit_dynamic_arr(struct kunit *test) +{ + kunit_info(test, "exiting parameterized test\n"); +} + +/* + * Example of test that uses the registered dynamic array to perform assertions + * and expectations. + */ +static void example_params_test_with_init_dynamic_arr(struct kunit *test) +{ + const int *param = test->param_value; + int param_val; + + /* By design, param pointer will not be NULL. */ + KUNIT_ASSERT_NOT_NULL(test, param); + + param_val = *param; + KUNIT_EXPECT_EQ(test, param_val - param_val, 0); +} + /* * Here we make a list of all the test cases we want to add to the test suite * below. @@ -409,6 +510,9 @@ static struct kunit_case example_test_cases[] = { KUNIT_CASE_PARAM(example_params_test, example_gen_params), KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init, kunit_array_gen_params, example_param_init, NULL), + KUNIT_CASE_PARAM_WITH_INIT(example_params_test_with_init_dynamic_arr, + kunit_array_gen_params, example_param_init_dynamic_arr, + example_param_exit_dynamic_arr), KUNIT_CASE_SLOW(example_slow_test), {} }; -- cgit v1.2.3 From d6b6aac0cdb4b4f81cccc531ed76211d56c17444 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:55 -0700 Subject: lib/crypto: tests: Add KUnit tests for MD5 and HMAC-MD5 Add a KUnit test suite for the MD5 library functions, including the corresponding HMAC support. The core test logic is in the previously-added hash-test-template.h. This commit just adds the actual KUnit suite, and it adds the generated test vectors to the tree so that gen-hash-testvecs.py won't have to be run at build time. Link: https://lore.kernel.org/r/20250805222855.10362-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 10 +++ lib/crypto/tests/Makefile | 1 + lib/crypto/tests/md5-testvecs.h | 186 ++++++++++++++++++++++++++++++++++++++++ lib/crypto/tests/md5_kunit.c | 39 +++++++++ 4 files changed, 236 insertions(+) create mode 100644 lib/crypto/tests/md5-testvecs.h create mode 100644 lib/crypto/tests/md5_kunit.c (limited to 'lib') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index de7e8babb6af..c21d53fd4b0c 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -1,5 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later +config CRYPTO_LIB_MD5_KUNIT_TEST + tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_MD5 + help + KUnit tests for the MD5 cryptographic hash function and its + corresponding HMAC. + config CRYPTO_LIB_POLY1305_KUNIT_TEST tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index 8601dccd6fdd..f6f82c6f9cb5 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA256_KUNIT_TEST) += sha224_kunit.o sha256_kunit.o diff --git a/lib/crypto/tests/md5-testvecs.h b/lib/crypto/tests/md5-testvecs.h new file mode 100644 index 000000000000..be6727feb296 --- /dev/null +++ b/lib/crypto/tests/md5-testvecs.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py md5 */ + +static const struct { + size_t data_len; + u8 digest[MD5_DIGEST_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, + 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, + }, + }, + { + .data_len = 1, + .digest = { + 0x16, 0x7b, 0x86, 0xf2, 0x1d, 0xf3, 0x76, 0xc9, + 0x6f, 0x10, 0xa0, 0x61, 0x5b, 0x14, 0x20, 0x0b, + }, + }, + { + .data_len = 2, + .digest = { + 0x2d, 0x30, 0x96, 0xc7, 0x43, 0x40, 0xed, 0xb2, + 0xfb, 0x84, 0x63, 0x9a, 0xec, 0xc7, 0x3c, 0x3c, + }, + }, + { + .data_len = 3, + .digest = { + 0xe5, 0x0f, 0xce, 0xe0, 0xc8, 0xff, 0x4e, 0x08, + 0x5e, 0x19, 0xe5, 0xf2, 0x08, 0x11, 0x19, 0x16, + }, + }, + { + .data_len = 16, + .digest = { + 0xe8, 0xca, 0x29, 0x05, 0x2f, 0xd1, 0xf3, 0x99, + 0x40, 0x71, 0xf5, 0xc2, 0xf7, 0xf8, 0x17, 0x3e, + }, + }, + { + .data_len = 32, + .digest = { + 0xe3, 0x20, 0xc1, 0xd8, 0x21, 0x14, 0x44, 0x59, + 0x1a, 0xf5, 0x91, 0xaf, 0x69, 0xbe, 0x93, 0x9d, + }, + }, + { + .data_len = 48, + .digest = { + 0xfb, 0x06, 0xb0, 0xf0, 0x00, 0x10, 0x4b, 0x68, + 0x3d, 0x75, 0xf9, 0x70, 0xde, 0xbb, 0x32, 0x16, + }, + }, + { + .data_len = 49, + .digest = { + 0x52, 0x86, 0x48, 0x8b, 0xae, 0x91, 0x7c, 0x4e, + 0xc2, 0x2a, 0x69, 0x07, 0x35, 0xcc, 0xb2, 0x88, + }, + }, + { + .data_len = 63, + .digest = { + 0xfa, 0xd3, 0xf6, 0xe6, 0x7b, 0x1a, 0xc6, 0x05, + 0x73, 0x35, 0x02, 0xab, 0xc7, 0xb3, 0x47, 0xcb, + }, + }, + { + .data_len = 64, + .digest = { + 0xc5, 0x59, 0x29, 0xe9, 0x0a, 0x4a, 0x86, 0x43, + 0x7c, 0xaf, 0xdf, 0x83, 0xd3, 0xb8, 0x33, 0x5f, + }, + }, + { + .data_len = 65, + .digest = { + 0x80, 0x05, 0x75, 0x39, 0xec, 0x44, 0x8a, 0x81, + 0xe7, 0x6e, 0x8d, 0xd1, 0xc6, 0xeb, 0xc2, 0xf0, + }, + }, + { + .data_len = 127, + .digest = { + 0x3f, 0x02, 0xe8, 0xc6, 0xb8, 0x6a, 0x39, 0xc3, + 0xa4, 0x1c, 0xd9, 0x8f, 0x4a, 0x71, 0x40, 0x30, + }, + }, + { + .data_len = 128, + .digest = { + 0x89, 0x4f, 0x79, 0x3e, 0xff, 0x0c, 0x22, 0x60, + 0xa2, 0xdc, 0x10, 0x5f, 0x23, 0x0a, 0xe7, 0xc6, + }, + }, + { + .data_len = 129, + .digest = { + 0x06, 0x56, 0x61, 0xb8, 0x8a, 0x82, 0x77, 0x1b, + 0x2c, 0x35, 0xb8, 0x9f, 0xd6, 0xf7, 0xbd, 0x5a, + }, + }, + { + .data_len = 256, + .digest = { + 0x5d, 0xdf, 0x7d, 0xc8, 0x43, 0x96, 0x3b, 0xdb, + 0xc7, 0x0e, 0x44, 0x42, 0x23, 0xf7, 0xed, 0xdf, + }, + }, + { + .data_len = 511, + .digest = { + 0xf6, 0x5f, 0x26, 0x51, 0x8a, 0x5a, 0x46, 0x8f, + 0x48, 0x72, 0x90, 0x74, 0x9d, 0x87, 0xbd, 0xdf, + }, + }, + { + .data_len = 513, + .digest = { + 0xd8, 0x2c, 0xc9, 0x76, 0xfa, 0x67, 0x2e, 0xa6, + 0xc8, 0x12, 0x4a, 0x64, 0xaa, 0x0b, 0x3d, 0xbd, + }, + }, + { + .data_len = 1000, + .digest = { + 0xe2, 0x7e, 0xb4, 0x5f, 0xe1, 0x74, 0x51, 0xfc, + 0xe0, 0xc8, 0xd5, 0xe6, 0x8b, 0x40, 0xd2, 0x0e, + }, + }, + { + .data_len = 3333, + .digest = { + 0xcd, 0x7d, 0x56, 0xa9, 0x4c, 0x47, 0xea, 0xc2, + 0x34, 0x0b, 0x84, 0x05, 0xf9, 0xad, 0xbb, 0x46, + }, + }, + { + .data_len = 4096, + .digest = { + 0x63, 0x6e, 0x58, 0xb3, 0x94, 0x6b, 0x83, 0x5f, + 0x1f, 0x0e, 0xd3, 0x66, 0x78, 0x71, 0x98, 0x42, + }, + }, + { + .data_len = 4128, + .digest = { + 0x9d, 0x68, 0xfc, 0x26, 0x8b, 0x4c, 0xa8, 0xe7, + 0x30, 0x0b, 0x19, 0x52, 0x6e, 0xa5, 0x65, 0x1c, + }, + }, + { + .data_len = 4160, + .digest = { + 0x1c, 0xaa, 0x7d, 0xee, 0x91, 0x01, 0xe2, 0x5a, + 0xec, 0xe9, 0xde, 0x57, 0x0a, 0xb6, 0x4c, 0x2f, + }, + }, + { + .data_len = 4224, + .digest = { + 0x1b, 0x31, 0xe3, 0x14, 0x07, 0x16, 0x17, 0xc6, + 0x98, 0x79, 0x88, 0x23, 0xb6, 0x3b, 0x25, 0xc4, + }, + }, + { + .data_len = 16384, + .digest = { + 0xc6, 0x3d, 0x56, 0x90, 0xf0, 0xf6, 0xe6, 0x50, + 0xf4, 0x76, 0x78, 0x67, 0xa3, 0xdd, 0x62, 0x7b, + }, + }, +}; + +static const u8 hash_testvec_consolidated[MD5_DIGEST_SIZE] = { + 0x70, 0x86, 0x9e, 0x6c, 0xa4, 0xc6, 0x71, 0x43, + 0x26, 0x02, 0x1b, 0x3f, 0xfd, 0x56, 0x9f, 0xa6, +}; + +static const u8 hmac_testvec_consolidated[MD5_DIGEST_SIZE] = { + 0x10, 0x02, 0x74, 0xf6, 0x4d, 0xb3, 0x3c, 0xc7, + 0xa1, 0xf7, 0xe6, 0xd4, 0x32, 0x64, 0xfa, 0x6d, +}; diff --git a/lib/crypto/tests/md5_kunit.c b/lib/crypto/tests/md5_kunit.c new file mode 100644 index 000000000000..38bd52c25ae3 --- /dev/null +++ b/lib/crypto/tests/md5_kunit.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2025 Google LLC + */ +#include +#include "md5-testvecs.h" + +#define HASH md5 +#define HASH_CTX md5_ctx +#define HASH_SIZE MD5_DIGEST_SIZE +#define HASH_INIT md5_init +#define HASH_UPDATE md5_update +#define HASH_FINAL md5_final +#define HMAC_KEY hmac_md5_key +#define HMAC_CTX hmac_md5_ctx +#define HMAC_PREPAREKEY hmac_md5_preparekey +#define HMAC_INIT hmac_md5_init +#define HMAC_UPDATE hmac_md5_update +#define HMAC_FINAL hmac_md5_final +#define HMAC hmac_md5 +#define HMAC_USINGRAWKEY hmac_md5_usingrawkey +#include "hash-test-template.h" + +static struct kunit_case hash_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite hash_test_suite = { + .name = "md5", + .test_cases = hash_test_cases, + .suite_init = hash_suite_init, + .suite_exit = hash_suite_exit, +}; +kunit_test_suite(hash_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for MD5 and HMAC-MD5"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 5012bd2dc6ab0c4499923b3b6c6113def9b0c88b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 15 Aug 2025 19:04:57 -0700 Subject: lib/crypto: Drop inline from all *_mod_init_arch() functions Drop 'inline' from all the *_mod_init_arch() functions so that the compiler will warn about any bugs where they are unused due to not being wired up properly. (There are no such bugs currently, so this just establishes a more robust convention for the future. Of course, these functions also tend to get inlined anyway, regardless of the keyword.) Link: https://lore.kernel.org/r/20250816020457.432040-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/sha1.h | 2 +- lib/crypto/arm/sha256.h | 2 +- lib/crypto/arm/sha512.h | 2 +- lib/crypto/arm64/sha1.h | 2 +- lib/crypto/arm64/sha256.h | 2 +- lib/crypto/arm64/sha512.h | 2 +- lib/crypto/riscv/sha256.h | 2 +- lib/crypto/riscv/sha512.h | 2 +- lib/crypto/s390/sha1.h | 2 +- lib/crypto/s390/sha256.h | 2 +- lib/crypto/s390/sha512.h | 2 +- lib/crypto/sparc/md5.h | 2 +- lib/crypto/sparc/sha1.h | 2 +- lib/crypto/sparc/sha256.h | 2 +- lib/crypto/sparc/sha512.h | 2 +- lib/crypto/x86/sha1.h | 2 +- lib/crypto/x86/sha256.h | 2 +- lib/crypto/x86/sha512.h | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/sha1.h b/lib/crypto/arm/sha1.h index fa1e92419000..29f8bcad0447 100644 --- a/lib/crypto/arm/sha1.h +++ b/lib/crypto/arm/sha1.h @@ -35,7 +35,7 @@ static void sha1_blocks(struct sha1_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h index eab713e650f3..7556457b3094 100644 --- a/lib/crypto/arm/sha256.h +++ b/lib/crypto/arm/sha256.h @@ -35,7 +35,7 @@ static void sha256_blocks(struct sha256_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h index cc2447acd562..d1b485dd275d 100644 --- a/lib/crypto/arm/sha512.h +++ b/lib/crypto/arm/sha512.h @@ -29,7 +29,7 @@ static void sha512_blocks(struct sha512_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_has_neon()) static_branch_enable(&have_neon); diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h index f822563538cc..aaef4ebfc5e3 100644 --- a/lib/crypto/arm64/sha1.h +++ b/lib/crypto/arm64/sha1.h @@ -32,7 +32,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (cpu_have_named_feature(SHA1)) static_branch_enable(&have_ce); diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h index d95f1077c32b..be4aeda9d0e6 100644 --- a/lib/crypto/arm64/sha256.h +++ b/lib/crypto/arm64/sha256.h @@ -46,7 +46,7 @@ static void sha256_blocks(struct sha256_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (cpu_have_named_feature(ASIMD)) { static_branch_enable(&have_neon); diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h index 7539ea3fef10..ddb0d256f73a 100644 --- a/lib/crypto/arm64/sha512.h +++ b/lib/crypto/arm64/sha512.h @@ -37,7 +37,7 @@ static void sha512_blocks(struct sha512_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_have_named_feature(SHA512)) static_branch_enable(&have_sha512_insns); diff --git a/lib/crypto/riscv/sha256.h b/lib/crypto/riscv/sha256.h index f36f68d2e88c..1def18b0a4fb 100644 --- a/lib/crypto/riscv/sha256.h +++ b/lib/crypto/riscv/sha256.h @@ -31,7 +31,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { /* Both zvknha and zvknhb provide the SHA-256 instructions. */ if ((riscv_isa_extension_available(NULL, ZVKNHA) || diff --git a/lib/crypto/riscv/sha512.h b/lib/crypto/riscv/sha512.h index 59dc0294a9a7..145bdab1214e 100644 --- a/lib/crypto/riscv/sha512.h +++ b/lib/crypto/riscv/sha512.h @@ -30,7 +30,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (riscv_isa_extension_available(NULL, ZVKNHB) && riscv_isa_extension_available(NULL, ZVKB) && diff --git a/lib/crypto/s390/sha1.h b/lib/crypto/s390/sha1.h index 08bd138e881c..73d94476a157 100644 --- a/lib/crypto/s390/sha1.h +++ b/lib/crypto/s390/sha1.h @@ -20,7 +20,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (cpu_have_feature(S390_CPU_FEATURE_MSA) && cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) diff --git a/lib/crypto/s390/sha256.h b/lib/crypto/s390/sha256.h index 70a81cbc06b2..acd483508789 100644 --- a/lib/crypto/s390/sha256.h +++ b/lib/crypto/s390/sha256.h @@ -20,7 +20,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (cpu_have_feature(S390_CPU_FEATURE_MSA) && cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) diff --git a/lib/crypto/s390/sha512.h b/lib/crypto/s390/sha512.h index 24744651550c..46699d43df7e 100644 --- a/lib/crypto/s390/sha512.h +++ b/lib/crypto/s390/sha512.h @@ -20,7 +20,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_have_feature(S390_CPU_FEATURE_MSA) && cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) diff --git a/lib/crypto/sparc/md5.h b/lib/crypto/sparc/md5.h index 3f1b0ed8c0b3..3995f3e075eb 100644 --- a/lib/crypto/sparc/md5.h +++ b/lib/crypto/sparc/md5.h @@ -32,7 +32,7 @@ static void md5_blocks(struct md5_block_state *state, } #define md5_mod_init_arch md5_mod_init_arch -static inline void md5_mod_init_arch(void) +static void md5_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/sparc/sha1.h b/lib/crypto/sparc/sha1.h index 5015f93584b7..bdf771fcc1f7 100644 --- a/lib/crypto/sparc/sha1.h +++ b/lib/crypto/sparc/sha1.h @@ -27,7 +27,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/sparc/sha256.h b/lib/crypto/sparc/sha256.h index 1d10108eb195..b2f4419ec778 100644 --- a/lib/crypto/sparc/sha256.h +++ b/lib/crypto/sparc/sha256.h @@ -27,7 +27,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/sparc/sha512.h b/lib/crypto/sparc/sha512.h index 55303ab6b15f..a8c37a7d4c39 100644 --- a/lib/crypto/sparc/sha512.h +++ b/lib/crypto/sparc/sha512.h @@ -26,7 +26,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/x86/sha1.h b/lib/crypto/x86/sha1.h index e308379d89bc..c48a0131fd12 100644 --- a/lib/crypto/x86/sha1.h +++ b/lib/crypto/x86/sha1.h @@ -55,7 +55,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { static_call_update(sha1_blocks_x86, sha1_blocks_ni); diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index c852396ef319..41fa95fbc3bf 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -36,7 +36,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { static_call_update(sha256_blocks_x86, sha256_blocks_ni); diff --git a/lib/crypto/x86/sha512.h b/lib/crypto/x86/sha512.h index be2c8fc12246..0213c70cedd0 100644 --- a/lib/crypto/x86/sha512.h +++ b/lib/crypto/x86/sha512.h @@ -35,7 +35,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { -- cgit v1.2.3 From a817de20091c3cf6de19d7ddf099b0e35003b7d0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 21 Aug 2025 14:15:47 -0700 Subject: lib/Kconfig.debug: Drop CLANG_VERSION check from DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT Now that the minimum supported version of LLVM for building the kernel has been bumped to 15.0.0, the CLANG_VERSION check for older than 14.0.0 is always false, so remove it. Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250821-bump-min-llvm-ver-15-v2-10-635f3294e5f0@kernel.org Signed-off-by: Nathan Chancellor --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dc0e0c6ed075..6c12852e77c8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -259,7 +259,7 @@ config DEBUG_INFO_NONE config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT bool "Rely on the toolchain's implicit default DWARF version" select DEBUG_INFO - depends on !CC_IS_CLANG || AS_IS_LLVM || CLANG_VERSION < 140000 || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128) + depends on !CC_IS_CLANG || AS_IS_LLVM || (AS_IS_GNU && AS_VERSION >= 23502 && AS_HAS_NON_CONST_ULEB128) help The implicit default version of DWARF debug info produced by a toolchain changes over time. -- cgit v1.2.3 From 573ad421cc551eb45d9ac9008395f93c070789b8 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 21 Aug 2025 14:15:48 -0700 Subject: objtool: Drop noinstr hack for KCSAN_WEAK_MEMORY Now that the minimum supported version of LLVM for building the kernel has been bumped to 15.0.0, __no_kcsan will always ensure that the thread sanitizer functions are not generated, so remove the check for tsan functions in is_profiling_func() and the always true depends and unnecessary select lines in KCSAN_WEAK_MEMORY. Acked-by: Marco Elver Acked-by: Peter Zijlstra (Intel) Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250821-bump-min-llvm-ver-15-v2-11-635f3294e5f0@kernel.org Signed-off-by: Nathan Chancellor --- lib/Kconfig.kcsan | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 609ddfc73de5..4ce4b0c0109c 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -185,12 +185,6 @@ config KCSAN_WEAK_MEMORY bool "Enable weak memory modeling to detect missing memory barriers" default y depends on KCSAN_STRICT - # We can either let objtool nop __tsan_func_{entry,exit}() and builtin - # atomics instrumentation in .noinstr.text, or use a compiler that can - # implement __no_kcsan to really remove all instrumentation. - depends on !ARCH_WANTS_NO_INSTR || HAVE_NOINSTR_HACK || \ - CC_IS_GCC || CLANG_VERSION >= 140000 - select OBJTOOL if HAVE_NOINSTR_HACK help Enable support for modeling a subset of weak memory, which allows detecting a subset of data races due to missing memory barriers. -- cgit v1.2.3 From 5ff8c11775c744dc5076ce126eb1b3adce0a70ae Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 21 Aug 2025 14:15:49 -0700 Subject: KMSAN: Remove tautological checks Now that the minimum supported version of LLVM for building the kernel has been bumped to 15.0.0, two KMSAN checks can be cleaned up. CONFIG_HAVE_KMSAN_COMPILER will always be true when using clang so remove the cc-option test and use a simple check for CONFIG_CC_IS_CLANG. CONFIG_HAVE_KMSAN_PARAM_RETVAL will always be true so it can be removed outright. Acked-by: Marco Elver Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20250821-bump-min-llvm-ver-15-v2-12-635f3294e5f0@kernel.org Signed-off-by: Nathan Chancellor --- lib/Kconfig.kmsan | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan index 0541d7b079cc..7251b6b59e69 100644 --- a/lib/Kconfig.kmsan +++ b/lib/Kconfig.kmsan @@ -3,10 +3,7 @@ config HAVE_ARCH_KMSAN bool config HAVE_KMSAN_COMPILER - # Clang versions <14.0.0 also support -fsanitize=kernel-memory, but not - # all the features necessary to build the kernel with KMSAN. - depends on CC_IS_CLANG && CLANG_VERSION >= 140000 - def_bool $(cc-option,-fsanitize=kernel-memory -mllvm -msan-disable-checks=1) + def_bool CC_IS_CLANG config KMSAN bool "KMSAN: detector of uninitialized values use" @@ -28,15 +25,9 @@ config KMSAN if KMSAN -config HAVE_KMSAN_PARAM_RETVAL - # -fsanitize-memory-param-retval is supported only by Clang >= 14. - depends on HAVE_KMSAN_COMPILER - def_bool $(cc-option,-fsanitize=kernel-memory -fsanitize-memory-param-retval) - config KMSAN_CHECK_PARAM_RETVAL bool "Check for uninitialized values passed to and returned from functions" default y - depends on HAVE_KMSAN_PARAM_RETVAL help If the compiler supports -fsanitize-memory-param-retval, KMSAN will eagerly check every function parameter passed by value and every -- cgit v1.2.3 From df220cc5e689213c34a0eec7ef26d25f503c77ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Aug 2025 08:25:11 -0700 Subject: lib/crypto: poly1305: Remove unused function poly1305_is_arch_optimized() poly1305_is_arch_optimized() is unused, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/poly1305-glue.c | 7 ------- lib/crypto/arm64/poly1305-glue.c | 7 ------- lib/crypto/mips/poly1305-glue.c | 6 ------ lib/crypto/powerpc/poly1305-p10-glue.c | 6 ------ lib/crypto/x86/poly1305_glue.c | 6 ------ 5 files changed, 32 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/poly1305-glue.c b/lib/crypto/arm/poly1305-glue.c index 2d86c78af883..9e513e319e37 100644 --- a/lib/crypto/arm/poly1305-glue.c +++ b/lib/crypto/arm/poly1305-glue.c @@ -51,13 +51,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -bool poly1305_is_arch_optimized(void) -{ - /* We always can use at least the ARM scalar implementation. */ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init arm_poly1305_mod_init(void) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index 31aea21ce42f..d4a522e7d25a 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -50,13 +50,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -bool poly1305_is_arch_optimized(void) -{ - /* We always can use at least the ARM64 scalar implementation. */ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init neon_poly1305_mod_init(void) { if (cpu_have_named_feature(ASIMD)) diff --git a/lib/crypto/mips/poly1305-glue.c b/lib/crypto/mips/poly1305-glue.c index 764a38a65200..002f50f710ab 100644 --- a/lib/crypto/mips/poly1305-glue.c +++ b/lib/crypto/mips/poly1305-glue.c @@ -23,11 +23,5 @@ asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, const u32 nonce[4]); EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/poly1305-p10-glue.c index 3f1664a724b6..184a71f9c1de 100644 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ b/lib/crypto/powerpc/poly1305-p10-glue.c @@ -72,12 +72,6 @@ void poly1305_emit_arch(const struct poly1305_state *state, } EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init poly1305_p10_init(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index 856d48fd422b..deb5841cb0ad 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -141,12 +141,6 @@ void poly1305_emit_arch(const struct poly1305_state *ctx, } EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&poly1305_use_avx); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init poly1305_simd_mod_init(void) { if (boot_cpu_has(X86_FEATURE_AVX) && -- cgit v1.2.3 From b646b782e522da3509e61f971e5502fccb3a3723 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Aug 2025 08:25:12 -0700 Subject: lib/crypto: poly1305: Consolidate into single module Consolidate the Poly1305 code into a single module, similar to various other algorithms (SHA-1, SHA-256, SHA-512, etc.): - Each arch now provides a header file lib/crypto/$(SRCARCH)/poly1305.h, replacing lib/crypto/$(SRCARCH)/poly1305*.c. The header defines poly1305_block_init(), poly1305_blocks(), poly1305_emit(), and optionally poly1305_mod_init_arch(). It is included by lib/crypto/poly1305.c, and thus the code gets built into the single libpoly1305 module, with improved inlining in some cases. - Whether arch-optimized Poly1305 is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. (The PPC64 one remains unconditionally disabled due to 'depends on BROKEN'.) - Any additional arch-specific translation units for the optimized Poly1305 code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. A special consideration is needed because the Adiantum code uses the poly1305_core_*() functions directly. For now, just carry forward that approach. This means retaining the CRYPTO_LIB_POLY1305_GENERIC kconfig symbol, and keeping the poly1305_core_*() functions in separate translation units. So it's not quite as streamlined I've done with the other hash functions, but we still get a single libpoly1305 module. Note: to see the diff from the arm, arm64, and x86 .c files to the new .h files, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 50 ++++---- lib/crypto/Makefile | 59 +++++++++- lib/crypto/arm/Kconfig | 5 - lib/crypto/arm/Makefile | 18 --- lib/crypto/arm/poly1305-armv4.pl | 3 +- lib/crypto/arm/poly1305-glue.c | 69 ----------- lib/crypto/arm/poly1305.h | 53 +++++++++ lib/crypto/arm64/Kconfig | 6 - lib/crypto/arm64/Makefile | 13 --- lib/crypto/arm64/poly1305-armv8.pl | 3 + lib/crypto/arm64/poly1305-glue.c | 67 ----------- lib/crypto/arm64/poly1305.h | 50 ++++++++ lib/crypto/mips/Kconfig | 5 - lib/crypto/mips/Makefile | 14 --- lib/crypto/mips/poly1305-glue.c | 27 ----- lib/crypto/mips/poly1305-mips.pl | 8 +- lib/crypto/mips/poly1305.h | 14 +++ lib/crypto/poly1305-generic.c | 25 ---- lib/crypto/poly1305.c | 81 ++++++++----- lib/crypto/powerpc/Kconfig | 8 -- lib/crypto/powerpc/Makefile | 3 - lib/crypto/powerpc/poly1305-p10-glue.c | 90 -------------- lib/crypto/powerpc/poly1305.h | 74 ++++++++++++ lib/crypto/x86/Kconfig | 6 - lib/crypto/x86/Makefile | 10 -- lib/crypto/x86/poly1305-x86_64-cryptogams.pl | 33 ++---- lib/crypto/x86/poly1305.h | 158 +++++++++++++++++++++++++ lib/crypto/x86/poly1305_glue.c | 169 --------------------------- 28 files changed, 498 insertions(+), 623 deletions(-) delete mode 100644 lib/crypto/arm/poly1305-glue.c create mode 100644 lib/crypto/arm/poly1305.h delete mode 100644 lib/crypto/arm64/poly1305-glue.c create mode 100644 lib/crypto/arm64/poly1305.h delete mode 100644 lib/crypto/mips/poly1305-glue.c create mode 100644 lib/crypto/mips/poly1305.h delete mode 100644 lib/crypto/poly1305-generic.c delete mode 100644 lib/crypto/powerpc/poly1305-p10-glue.c create mode 100644 lib/crypto/powerpc/poly1305.h create mode 100644 lib/crypto/x86/poly1305.h delete mode 100644 lib/crypto/x86/poly1305_glue.c (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 79b848448e07..9991118c41a9 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -114,6 +114,33 @@ config CRYPTO_LIB_MD5_ARCH default y if PPC default y if SPARC64 +config CRYPTO_LIB_POLY1305 + tristate + help + The Poly1305 library functions. Select this if your module uses any + of the functions from . + +config CRYPTO_LIB_POLY1305_ARCH + bool + depends on CRYPTO_LIB_POLY1305 && !UML + default y if ARM + default y if ARM64 && KERNEL_MODE_NEON + default y if MIPS + # The PPC64 code needs to be fixed to work in softirq context. + default y if PPC64 && CPU_LITTLE_ENDIAN && VSX && BROKEN + default y if X86_64 + +# This symbol controls the inclusion of the Poly1305 generic code. This differs +# from most of the other algorithms, which handle the generic code +# "automatically" via __maybe_unused. This is needed so that the Adiantum code, +# which calls the poly1305_core_*() functions directly, can enable them. +config CRYPTO_LIB_POLY1305_GENERIC + bool + depends on CRYPTO_LIB_POLY1305 + # Enable if there's no arch impl or the arch impl requires the generic + # impl as a fallback. (Or if selected explicitly.) + default y if !CRYPTO_LIB_POLY1305_ARCH || PPC64 + config CRYPTO_LIB_POLY1305_RSIZE int default 2 if MIPS @@ -121,29 +148,6 @@ config CRYPTO_LIB_POLY1305_RSIZE default 9 if ARM || ARM64 default 1 -config CRYPTO_ARCH_HAVE_LIB_POLY1305 - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Poly1305 library interface, - either builtin or as a module. - -config CRYPTO_LIB_POLY1305_GENERIC - tristate - default CRYPTO_LIB_POLY1305 if !CRYPTO_ARCH_HAVE_LIB_POLY1305 - help - This symbol can be selected by arch implementations of the Poly1305 - library interface that require the generic code as a fallback, e.g., - for SIMD implementations. If no arch specific implementation is - enabled, this implementation serves the users of CRYPTO_LIB_POLY1305. - -config CRYPTO_LIB_POLY1305 - tristate - help - Enable the Poly1305 library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. - config CRYPTO_LIB_CHACHA20POLY1305 tristate select CRYPTO_LIB_CHACHA diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index d362636a22d3..e0536e3b3a04 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -71,13 +71,60 @@ endif # CONFIG_CRYPTO_LIB_MD5_ARCH ################################################################################ -obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o -libpoly1305-y += poly1305.o +obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o +libpoly1305-y := poly1305.o +ifeq ($(CONFIG_ARCH_SUPPORTS_INT128),y) +libpoly1305-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += poly1305-donna64.o +else +libpoly1305-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += poly1305-donna32.o +endif + +ifeq ($(CONFIG_CRYPTO_LIB_POLY1305_ARCH),y) +CFLAGS_poly1305.o += -I$(src)/$(SRCARCH) + +ifeq ($(CONFIG_ARM),y) +libpoly1305-y += arm/poly1305-core.o +$(obj)/arm/poly1305-core.S: $(src)/arm/poly1305-armv4.pl + $(call cmd,perlasm) +# massage the perlasm code a bit so we only get the NEON routine if we need it +poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 +poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 +AFLAGS_arm/poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y) +endif + +ifeq ($(CONFIG_ARM64),y) +libpoly1305-y += arm64/poly1305-core.o +$(obj)/arm64/poly1305-core.S: $(src)/arm64/poly1305-armv8.pl + $(call cmd,perlasm_with_args) +endif + +ifeq ($(CONFIG_MIPS),y) +libpoly1305-y += mips/poly1305-core.o +poly1305-perlasm-flavour-$(CONFIG_32BIT) := o32 +poly1305-perlasm-flavour-$(CONFIG_64BIT) := 64 +quiet_cmd_perlasm_poly1305 = PERLASM $@ + cmd_perlasm_poly1305 = $(PERL) $< $(poly1305-perlasm-flavour-y) $@ +# Use if_changed instead of cmd, in case the flavour changed. +$(obj)/mips/poly1305-core.S: $(src)/mips/poly1305-mips.pl FORCE + $(call if_changed,perlasm_poly1305) +targets += mips/poly1305-core.S +endif -obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305-generic.o -libpoly1305-generic-y := poly1305-donna32.o -libpoly1305-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o -libpoly1305-generic-y += poly1305-generic.o +libpoly1305-$(CONFIG_PPC) += powerpc/poly1305-p10le_64.o + +ifeq ($(CONFIG_X86),y) +libpoly1305-y += x86/poly1305-x86_64-cryptogams.o +$(obj)/x86/poly1305-x86_64-cryptogams.S: $(src)/x86/poly1305-x86_64-cryptogams.pl + $(call cmd,perlasm) +endif + +endif # CONFIG_CRYPTO_LIB_POLY1305_ARCH + +# clean-files must be defined unconditionally +clean-files += arm/poly1305-core.S \ + arm64/poly1305-core.S \ + mips/poly1305-core.S \ + x86/poly1305-x86_64-cryptogams.S ################################################################################ diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index e8444fd0aae3..0d821e282c64 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -17,8 +17,3 @@ config CRYPTO_CHACHA20_NEON tristate default CRYPTO_LIB_CHACHA select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_ARM - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile index 4c042a4c77ed..9f70e61d419e 100644 --- a/lib/crypto/arm/Makefile +++ b/lib/crypto/arm/Makefile @@ -6,21 +6,3 @@ libblake2s-arm-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o chacha-neon-y := chacha-scalar-core.o chacha-glue.o chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o - -obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o -poly1305-arm-y := poly1305-core.o poly1305-glue.o - -quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $(<) > $(@) - -$(obj)/%-core.S: $(src)/%-armv4.pl - $(call cmd,perl) - -clean-files += poly1305-core.S - -aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 - -# massage the perlasm code a bit so we only get the NEON routine if we need it -poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 -poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 -AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y) diff --git a/lib/crypto/arm/poly1305-armv4.pl b/lib/crypto/arm/poly1305-armv4.pl index dd7a996361a7..34c11b7b44bd 100644 --- a/lib/crypto/arm/poly1305-armv4.pl +++ b/lib/crypto/arm/poly1305-armv4.pl @@ -43,9 +43,8 @@ $code.=<<___; #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -# define poly1305_init poly1305_block_init_arch +# define poly1305_init poly1305_block_init # define poly1305_blocks poly1305_blocks_arm -# define poly1305_emit poly1305_emit_arch #endif #if defined(__thumb2__) diff --git a/lib/crypto/arm/poly1305-glue.c b/lib/crypto/arm/poly1305-glue.c deleted file mode 100644 index 9e513e319e37..000000000000 --- a/lib/crypto/arm/poly1305-glue.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) -{ - len = round_down(len, POLY1305_BLOCK_SIZE); - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && likely(may_use_simd())) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else - poly1305_blocks_arm(state, src, len, padbit); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -static int __init arm_poly1305_mod_init(void) -{ - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - (elf_hwcap & HWCAP_NEON)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(arm_poly1305_mod_init); - -static void __exit arm_poly1305_mod_exit(void) -{ -} -module_exit(arm_poly1305_mod_exit); - -MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm/poly1305.h b/lib/crypto/arm/poly1305.h new file mode 100644 index 000000000000..0021cf368307 --- /dev/null +++ b/lib/crypto/arm/poly1305.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM + * + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon) && likely(may_use_simd())) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks_arm(state, src, len, padbit); +} + +#ifdef CONFIG_KERNEL_MODE_NEON +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (elf_hwcap & HWCAP_NEON) + static_branch_enable(&have_neon); +} +#endif /* CONFIG_KERNEL_MODE_NEON */ diff --git a/lib/crypto/arm64/Kconfig b/lib/crypto/arm64/Kconfig index 0b903ef524d8..07c8a4f0ab03 100644 --- a/lib/crypto/arm64/Kconfig +++ b/lib/crypto/arm64/Kconfig @@ -6,9 +6,3 @@ config CRYPTO_CHACHA20_NEON default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_NEON - tristate - depends on KERNEL_MODE_NEON - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/arm64/Makefile b/lib/crypto/arm64/Makefile index 6207088397a7..d49cceca3d1c 100644 --- a/lib/crypto/arm64/Makefile +++ b/lib/crypto/arm64/Makefile @@ -2,16 +2,3 @@ obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o -poly1305-neon-y := poly1305-core.o poly1305-glue.o -AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch -AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) void $(@) - -$(obj)/%-core.S: $(src)/%-armv8.pl - $(call cmd,perlasm) - -clean-files += poly1305-core.S diff --git a/lib/crypto/arm64/poly1305-armv8.pl b/lib/crypto/arm64/poly1305-armv8.pl index 22c9069c0650..f1930c6b55ce 100644 --- a/lib/crypto/arm64/poly1305-armv8.pl +++ b/lib/crypto/arm64/poly1305-armv8.pl @@ -50,6 +50,9 @@ $code.=<<___; #ifndef __KERNEL__ # include "arm_arch.h" .extern OPENSSL_armcap_P +#else +# define poly1305_init poly1305_block_init +# define poly1305_blocks poly1305_blocks_arm64 #endif .text diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c deleted file mode 100644 index d4a522e7d25a..000000000000 --- a/lib/crypto/arm64/poly1305-glue.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) -{ - len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon) && likely(may_use_simd())) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else - poly1305_blocks(state, src, len, padbit); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -static int __init neon_poly1305_mod_init(void) -{ - if (cpu_have_named_feature(ASIMD)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(neon_poly1305_mod_init); - -static void __exit neon_poly1305_mod_exit(void) -{ -} -module_exit(neon_poly1305_mod_exit); - -MODULE_DESCRIPTION("Poly1305 authenticator (ARM64 optimized)"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h new file mode 100644 index 000000000000..aed5921ccd9a --- /dev/null +++ b/lib/crypto/arm64/poly1305.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 + * + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_arm64(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks_arm64(state, src, len, padbit); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/mips/Kconfig b/lib/crypto/mips/Kconfig index 0670a170c1be..94c1a0892c20 100644 --- a/lib/crypto/mips/Kconfig +++ b/lib/crypto/mips/Kconfig @@ -5,8 +5,3 @@ config CRYPTO_CHACHA_MIPS depends on CPU_MIPS32_R2 default CRYPTO_LIB_CHACHA select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_MIPS - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/mips/Makefile b/lib/crypto/mips/Makefile index 804488c7aded..b5ea0e25c21e 100644 --- a/lib/crypto/mips/Makefile +++ b/lib/crypto/mips/Makefile @@ -3,17 +3,3 @@ obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o chacha-mips-y := chacha-core.o chacha-glue.o AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots - -obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o -poly1305-mips-y := poly1305-core.o poly1305-glue.o - -perlasm-flavour-$(CONFIG_32BIT) := o32 -perlasm-flavour-$(CONFIG_64BIT) := 64 - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) - -$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE - $(call if_changed,perlasm) - -targets += poly1305-core.S diff --git a/lib/crypto/mips/poly1305-glue.c b/lib/crypto/mips/poly1305-glue.c deleted file mode 100644 index 002f50f710ab..000000000000 --- a/lib/crypto/mips/poly1305-glue.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/mips/poly1305-mips.pl b/lib/crypto/mips/poly1305-mips.pl index 399f10c3e385..71347f34f4f9 100644 --- a/lib/crypto/mips/poly1305-mips.pl +++ b/lib/crypto/mips/poly1305-mips.pl @@ -93,9 +93,7 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch +# define poly1305_init poly1305_block_init #endif #if defined(__MIPSEB__) && !defined(MIPSEB) @@ -565,9 +563,7 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch +# define poly1305_init poly1305_block_init #endif #if defined(__MIPSEB__) && !defined(MIPSEB) diff --git a/lib/crypto/mips/poly1305.h b/lib/crypto/mips/poly1305.h new file mode 100644 index 000000000000..85de450f1a93 --- /dev/null +++ b/lib/crypto/mips/poly1305.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS + * + * Copyright (C) 2019 Linaro Ltd. + */ + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); diff --git a/lib/crypto/poly1305-generic.c b/lib/crypto/poly1305-generic.c deleted file mode 100644 index 71a16c5c538b..000000000000 --- a/lib/crypto/poly1305-generic.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Poly1305 authenticator algorithm, RFC7539 - * - * Copyright (C) 2015 Martin Willi - * - * Based on public domain code by Andrew Moon and Daniel J. Bernstein. - */ - -#include -#include -#include -#include - -void poly1305_block_init_generic(struct poly1305_block_state *desc, - const u8 raw_key[POLY1305_BLOCK_SIZE]) -{ - poly1305_core_init(&desc->h); - poly1305_core_setkey(&desc->core_r, raw_key); -} -EXPORT_SYMBOL_GPL(poly1305_block_init_generic); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("Poly1305 algorithm (generic implementation)"); diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index a6dc182b6c22..f313ccc4b4dd 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -7,7 +7,6 @@ * Based on public domain code by Andrew Moon and Daniel J. Bernstein. */ -#include #include #include #include @@ -15,6 +14,14 @@ #include #include +#ifdef CONFIG_CRYPTO_LIB_POLY1305_ARCH +#include "poly1305.h" /* $(SRCARCH)/poly1305.h */ +#else +#define poly1305_block_init poly1305_block_init_generic +#define poly1305_blocks poly1305_blocks_generic +#define poly1305_emit poly1305_emit_generic +#endif + void poly1305_init(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]) { @@ -23,28 +30,40 @@ void poly1305_init(struct poly1305_desc_ctx *desc, desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); desc->buflen = 0; - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_block_init_arch(&desc->state, key); - else - poly1305_block_init_generic(&desc->state, key); + poly1305_block_init(&desc->state, key); } EXPORT_SYMBOL(poly1305_init); -static inline void poly1305_blocks(struct poly1305_block_state *state, - const u8 *src, unsigned int len) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_blocks_arch(state, src, len, 1); - else - poly1305_blocks_generic(state, src, len, 1); -} - void poly1305_update(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes) { - desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, - src, nbytes, POLY1305_BLOCK_SIZE, - desc->buf, desc->buflen); + if (desc->buflen + nbytes >= POLY1305_BLOCK_SIZE) { + unsigned int bulk_len; + + if (desc->buflen) { + unsigned int l = POLY1305_BLOCK_SIZE - desc->buflen; + + memcpy(&desc->buf[desc->buflen], src, l); + src += l; + nbytes -= l; + + poly1305_blocks(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 1); + desc->buflen = 0; + } + + bulk_len = round_down(nbytes, POLY1305_BLOCK_SIZE); + nbytes %= POLY1305_BLOCK_SIZE; + + if (bulk_len) { + poly1305_blocks(&desc->state, src, bulk_len, 1); + src += bulk_len; + } + } + if (nbytes) { + memcpy(&desc->buf[desc->buflen], src, nbytes); + desc->buflen += nbytes; + } } EXPORT_SYMBOL(poly1305_update); @@ -54,22 +73,28 @@ void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst) desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_blocks_arch(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); - else - poly1305_blocks_generic(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); + poly1305_blocks(&desc->state, desc->buf, POLY1305_BLOCK_SIZE, + 0); } - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_emit_arch(&desc->state.h, dst, desc->s); - else - poly1305_emit_generic(&desc->state.h, dst, desc->s); + poly1305_emit(&desc->state.h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final); +#ifdef poly1305_mod_init_arch +static int __init poly1305_mod_init(void) +{ + poly1305_mod_init_arch(); + return 0; +} +subsys_initcall(poly1305_mod_init); + +static void __exit poly1305_mod_exit(void) +{ +} +module_exit(poly1305_mod_exit); +#endif + MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); MODULE_DESCRIPTION("Poly1305 authenticator algorithm, RFC7539"); diff --git a/lib/crypto/powerpc/Kconfig b/lib/crypto/powerpc/Kconfig index 2eaeb7665a6a..e41012a61876 100644 --- a/lib/crypto/powerpc/Kconfig +++ b/lib/crypto/powerpc/Kconfig @@ -6,11 +6,3 @@ config CRYPTO_CHACHA20_P10 default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - depends on BROKEN # Needs to be fixed to work in softirq context - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - select CRYPTO_LIB_POLY1305_GENERIC diff --git a/lib/crypto/powerpc/Makefile b/lib/crypto/powerpc/Makefile index 5709ae14258a..778a04edd226 100644 --- a/lib/crypto/powerpc/Makefile +++ b/lib/crypto/powerpc/Makefile @@ -2,6 +2,3 @@ obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o - -obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o -poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/poly1305-p10-glue.c deleted file mode 100644 index 184a71f9c1de..000000000000 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Poly1305 authenticator algorithm, RFC7539. - * - * Copyright 2023- IBM Corp. All rights reserved. - */ -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); -asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit); -asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); - -static void vsx_begin(void) -{ - preempt_disable(); - enable_kernel_vsx(); -} - -static void vsx_end(void) -{ - disable_kernel_vsx(); - preempt_enable(); -} - -void poly1305_block_init_arch(struct poly1305_block_state *dctx, - const u8 raw_key[POLY1305_BLOCK_SIZE]) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_block_init_generic(dctx, raw_key); - - dctx->h = (struct poly1305_state){}; - dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); - dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); -} -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_blocks_generic(state, src, len, padbit); - vsx_begin(); - if (len >= POLY1305_BLOCK_SIZE * 4) { - poly1305_p10le_4blocks(state, src, len); - src += len - (len % (POLY1305_BLOCK_SIZE * 4)); - len %= POLY1305_BLOCK_SIZE * 4; - } - while (len >= POLY1305_BLOCK_SIZE) { - poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit); - len -= POLY1305_BLOCK_SIZE; - src += POLY1305_BLOCK_SIZE; - } - vsx_end(); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_emit_generic(state, digest, nonce); - poly1305_emit_64(state, nonce, digest); -} -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static int __init poly1305_p10_init(void) -{ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - static_branch_enable(&have_p10); - return 0; -} -subsys_initcall(poly1305_p10_init); - -static void __exit poly1305_p10_exit(void) -{ -} -module_exit(poly1305_p10_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Danny Tsen "); -MODULE_DESCRIPTION("Optimized Poly1305 for P10"); diff --git a/lib/crypto/powerpc/poly1305.h b/lib/crypto/powerpc/poly1305.h new file mode 100644 index 000000000000..b8ed098a0e95 --- /dev/null +++ b/lib/crypto/powerpc/poly1305.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Poly1305 authenticator algorithm, RFC7539. + * + * Copyright 2023- IBM Corp. All rights reserved. + */ +#include +#include +#include +#include +#include + +asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); +asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit); +asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); + +static void vsx_begin(void) +{ + preempt_disable(); + enable_kernel_vsx(); +} + +static void vsx_end(void) +{ + disable_kernel_vsx(); + preempt_enable(); +} + +static void poly1305_block_init(struct poly1305_block_state *dctx, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_block_init_generic(dctx, raw_key); + + dctx->h = (struct poly1305_state){}; + dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); + dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); +} + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_blocks_generic(state, src, len, padbit); + vsx_begin(); + if (len >= POLY1305_BLOCK_SIZE * 4) { + poly1305_p10le_4blocks(state, src, len); + src += len - (len % (POLY1305_BLOCK_SIZE * 4)); + len %= POLY1305_BLOCK_SIZE * 4; + } + while (len >= POLY1305_BLOCK_SIZE) { + poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit); + len -= POLY1305_BLOCK_SIZE; + src += POLY1305_BLOCK_SIZE; + } + vsx_end(); +} + +static void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_emit_generic(state, digest, nonce); + poly1305_emit_64(state, nonce, digest); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + static_branch_enable(&have_p10); +} diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index 546fe2afe0b5..24dc9a59b272 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -18,9 +18,3 @@ config CRYPTO_CHACHA20_X86_64 default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_X86_64 - tristate - depends on 64BIT - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile index c2ff8c5f1046..16c9d76f9947 100644 --- a/lib/crypto/x86/Makefile +++ b/lib/crypto/x86/Makefile @@ -5,13 +5,3 @@ libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x86_64.o chacha_glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o -poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o -targets += poly1305-x86_64-cryptogams.S - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $< > $@ - -$(obj)/%.S: $(src)/%.pl FORCE - $(call if_changed,perlasm) diff --git a/lib/crypto/x86/poly1305-x86_64-cryptogams.pl b/lib/crypto/x86/poly1305-x86_64-cryptogams.pl index 501827254fed..409ec6955733 100644 --- a/lib/crypto/x86/poly1305-x86_64-cryptogams.pl +++ b/lib/crypto/x86/poly1305-x86_64-cryptogams.pl @@ -118,19 +118,6 @@ sub declare_function() { } } -sub declare_typed_function() { - my ($name, $align, $nargs) = @_; - if($kernel) { - $code .= "SYM_TYPED_FUNC_START($name)\n"; - $code .= ".L$name:\n"; - } else { - $code .= ".globl $name\n"; - $code .= ".type $name,\@function,$nargs\n"; - $code .= ".align $align\n"; - $code .= "$name:\n"; - } -} - sub end_function() { my ($name) = @_; if($kernel) { @@ -141,7 +128,7 @@ sub end_function() { } $code.=<<___ if $kernel; -#include +#include ___ if ($avx) { @@ -249,14 +236,14 @@ ___ $code.=<<___ if (!$kernel); .extern OPENSSL_ia32cap_P -.globl poly1305_block_init_arch -.hidden poly1305_block_init_arch +.globl poly1305_init_x86_64 +.hidden poly1305_init_x86_64 .globl poly1305_blocks_x86_64 .hidden poly1305_blocks_x86_64 .globl poly1305_emit_x86_64 .hidden poly1305_emit_x86_64 ___ -&declare_typed_function("poly1305_block_init_arch", 32, 3); +&declare_function("poly1305_init_x86_64", 32, 3); $code.=<<___; xor %eax,%eax mov %rax,0($ctx) # initialize hash value @@ -311,7 +298,7 @@ $code.=<<___; .Lno_key: RET ___ -&end_function("poly1305_block_init_arch"); +&end_function("poly1305_init_x86_64"); &declare_function("poly1305_blocks_x86_64", 32, 4); $code.=<<___; @@ -4118,9 +4105,9 @@ avx_handler: .section .pdata .align 4 - .rva .LSEH_begin_poly1305_block_init_arch - .rva .LSEH_end_poly1305_block_init_arch - .rva .LSEH_info_poly1305_block_init_arch + .rva .LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_end_poly1305_init_x86_64 + .rva .LSEH_info_poly1305_init_x86_64 .rva .LSEH_begin_poly1305_blocks_x86_64 .rva .LSEH_end_poly1305_blocks_x86_64 @@ -4168,10 +4155,10 @@ ___ $code.=<<___; .section .xdata .align 8 -.LSEH_info_poly1305_block_init_arch: +.LSEH_info_poly1305_init_x86_64: .byte 9,0,0,0 .rva se_handler - .rva .LSEH_begin_poly1305_block_init_arch,.LSEH_begin_poly1305_block_init_arch + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 .LSEH_info_poly1305_blocks_x86_64: .byte 9,0,0,0 diff --git a/lib/crypto/x86/poly1305.h b/lib/crypto/x86/poly1305.h new file mode 100644 index 000000000000..ee92e3740a78 --- /dev/null +++ b/lib/crypto/x86/poly1305.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include +#include + +struct poly1305_arch_internal { + union { + struct { + u32 h[5]; + u32 is_base2_26; + }; + u64 hs[3]; + }; + u64 r[2]; + u64 pad; + struct { u32 r2, r1, r4, r3; } rn[9]; +}; + +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + +asmlinkage void poly1305_init_x86_64(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); + +static void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_init_x86_64(state, raw_key); +} + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *inp, + unsigned int len, u32 padbit) +{ + struct poly1305_arch_internal *ctx = + container_of(&state->h.h, struct poly1305_arch_internal, h); + + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || + SZ_4K % POLY1305_BLOCK_SIZE); + + /* + * The AVX implementations have significant setup overhead (e.g. key + * power computation, kernel FPU enabling) which makes them slower for + * short messages. Fall back to the scalar implementation for messages + * shorter than 288 bytes, unless the AVX-specific key setup has already + * been performed (indicated by ctx->is_base2_26). + */ + if (!static_branch_likely(&poly1305_use_avx) || + (len < POLY1305_BLOCK_SIZE * 18 && !ctx->is_base2_26) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); + poly1305_blocks_x86_64(ctx, inp, len, padbit); + return; + } + + do { + const unsigned int bytes = min(len, SZ_4K); + + kernel_fpu_begin(); + if (static_branch_likely(&poly1305_use_avx512)) + poly1305_blocks_avx512(ctx, inp, bytes, padbit); + else if (static_branch_likely(&poly1305_use_avx2)) + poly1305_blocks_avx2(ctx, inp, bytes, padbit); + else + poly1305_blocks_avx(ctx, inp, bytes, padbit); + kernel_fpu_end(); + + len -= bytes; + inp += bytes; + } while (len); +} + +static void poly1305_emit(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) +{ + if (!static_branch_likely(&poly1305_use_avx)) + poly1305_emit_x86_64(ctx, mac, nonce); + else + poly1305_emit_avx(ctx, mac, nonce); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_AVX) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx); + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx2); + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ + boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X) + static_branch_enable(&poly1305_use_avx512); +} diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c deleted file mode 100644 index deb5841cb0ad..000000000000 --- a/lib/crypto/x86/poly1305_glue.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct poly1305_arch_internal { - union { - struct { - u32 h[5]; - u32 is_base2_26; - }; - u64 hs[3]; - }; - u64 r[2]; - u64 pad; - struct { u32 r2, r1, r4, r3; } rn[9]; -}; - -/* - * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit - * the unfortunate situation of using AVX and then having to go back to scalar - * -- because the user is silly and has called the update function from two - * separate contexts -- then we need to convert back to the original base before - * proceeding. It is possible to reason that the initial reduction below is - * sufficient given the implementation invariants. However, for an avoidance of - * doubt and because this is not performance critical, we do the full reduction - * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py - */ -static void convert_to_base2_64(void *ctx) -{ - struct poly1305_arch_internal *state = ctx; - u32 cy; - - if (!state->is_base2_26) - return; - - cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; - cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; - cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; - cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; - state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; - state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); - state->hs[2] = state->h[4] >> 24; - /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ -#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) - cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); - state->hs[2] &= 3; - state->hs[0] += cy; - state->hs[1] += (cy = ULT(state->hs[0], cy)); - state->hs[2] += ULT(state->hs[1], cy); -#undef ULT - state->is_base2_26 = 0; -} - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, - const u8 *inp, - const size_t len, const u32 padbit); -asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx, - const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx, - const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx, - const u8 *inp, - const size_t len, const u32 padbit); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, - unsigned int len, u32 padbit) -{ - struct poly1305_arch_internal *ctx = - container_of(&state->h.h, struct poly1305_arch_internal, h); - - /* SIMD disables preemption, so relax after processing each page. */ - BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || - SZ_4K % POLY1305_BLOCK_SIZE); - - /* - * The AVX implementations have significant setup overhead (e.g. key - * power computation, kernel FPU enabling) which makes them slower for - * short messages. Fall back to the scalar implementation for messages - * shorter than 288 bytes, unless the AVX-specific key setup has already - * been performed (indicated by ctx->is_base2_26). - */ - if (!static_branch_likely(&poly1305_use_avx) || - (len < POLY1305_BLOCK_SIZE * 18 && !ctx->is_base2_26) || - unlikely(!irq_fpu_usable())) { - convert_to_base2_64(ctx); - poly1305_blocks_x86_64(ctx, inp, len, padbit); - return; - } - - do { - const unsigned int bytes = min(len, SZ_4K); - - kernel_fpu_begin(); - if (static_branch_likely(&poly1305_use_avx512)) - poly1305_blocks_avx512(ctx, inp, bytes, padbit); - else if (static_branch_likely(&poly1305_use_avx2)) - poly1305_blocks_avx2(ctx, inp, bytes, padbit); - else - poly1305_blocks_avx(ctx, inp, bytes, padbit); - kernel_fpu_end(); - - len -= bytes; - inp += bytes; - } while (len); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -void poly1305_emit_arch(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) -{ - if (!static_branch_likely(&poly1305_use_avx)) - poly1305_emit_x86_64(ctx, mac, nonce); - else - poly1305_emit_avx(ctx, mac, nonce); -} -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static int __init poly1305_simd_mod_init(void) -{ - if (boot_cpu_has(X86_FEATURE_AVX) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - static_branch_enable(&poly1305_use_avx); - if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - static_branch_enable(&poly1305_use_avx2); - if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_AVX512F) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && - /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ - boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X) - static_branch_enable(&poly1305_use_avx512); - return 0; -} -subsys_initcall(poly1305_simd_mod_init); - -static void __exit poly1305_simd_mod_exit(void) -{ -} -module_exit(poly1305_simd_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jason A. Donenfeld "); -MODULE_DESCRIPTION("Poly1305 authenticator"); -- cgit v1.2.3 From bef9c755986980eecc18e9cecd847bc3c037aebb Mon Sep 17 00:00:00 2001 From: Zhihang Shao Date: Fri, 29 Aug 2025 08:25:13 -0700 Subject: lib/crypto: riscv/poly1305: Import OpenSSL/CRYPTOGAMS implementation This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for riscv authored by Andy Polyakov. The file 'poly1305-riscv.pl' is taken straight from https://github.com/dot-asm/cryptogams commit 5e3fba73576244708a752fa61a8e93e587f271bb. This patch was tested on SpacemiT X60, with 2~2.5x improvement over generic implementation. Signed-off-by: Chunyan Zhang Signed-off-by: Zhihang Shao [EB: ported to lib/crypto/riscv/] Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 3 +- lib/crypto/Makefile | 14 + lib/crypto/riscv/poly1305-riscv.pl | 847 +++++++++++++++++++++++++++++++++++++ lib/crypto/riscv/poly1305.h | 14 + 4 files changed, 877 insertions(+), 1 deletion(-) create mode 100644 lib/crypto/riscv/poly1305-riscv.pl create mode 100644 lib/crypto/riscv/poly1305.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 9991118c41a9..cb4e056a98fa 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -128,6 +128,7 @@ config CRYPTO_LIB_POLY1305_ARCH default y if MIPS # The PPC64 code needs to be fixed to work in softirq context. default y if PPC64 && CPU_LITTLE_ENDIAN && VSX && BROKEN + default y if RISCV default y if X86_64 # This symbol controls the inclusion of the Poly1305 generic code. This differs @@ -143,7 +144,7 @@ config CRYPTO_LIB_POLY1305_GENERIC config CRYPTO_LIB_POLY1305_RSIZE int - default 2 if MIPS + default 2 if MIPS || RISCV default 11 if X86_64 default 9 if ARM || ARM64 default 1 diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e0536e3b3a04..cd460e5e3dd2 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -112,6 +112,19 @@ endif libpoly1305-$(CONFIG_PPC) += powerpc/poly1305-p10le_64.o +ifeq ($(CONFIG_RISCV),y) +libpoly1305-y += riscv/poly1305-core.o +poly1305-perlasm-flavour-$(CONFIG_32BIT) := 32 +poly1305-perlasm-flavour-$(CONFIG_64BIT) := 64 +quiet_cmd_perlasm_poly1305 = PERLASM $@ + cmd_perlasm_poly1305 = $(PERL) $< $(poly1305-perlasm-flavour-y) $@ +# Use if_changed instead of cmd, in case the flavour changed. +$(obj)/riscv/poly1305-core.S: $(src)/riscv/poly1305-riscv.pl FORCE + $(call if_changed,perlasm_poly1305) +targets += riscv/poly1305-core.S +AFLAGS_riscv/poly1305-core.o += -Dpoly1305_init=poly1305_block_init +endif + ifeq ($(CONFIG_X86),y) libpoly1305-y += x86/poly1305-x86_64-cryptogams.o $(obj)/x86/poly1305-x86_64-cryptogams.S: $(src)/x86/poly1305-x86_64-cryptogams.pl @@ -124,6 +137,7 @@ endif # CONFIG_CRYPTO_LIB_POLY1305_ARCH clean-files += arm/poly1305-core.S \ arm64/poly1305-core.S \ mips/poly1305-core.S \ + riscv/poly1305-core.S \ x86/poly1305-x86_64-cryptogams.S ################################################################################ diff --git a/lib/crypto/riscv/poly1305-riscv.pl b/lib/crypto/riscv/poly1305-riscv.pl new file mode 100644 index 000000000000..e25e6338a9ac --- /dev/null +++ b/lib/crypto/riscv/poly1305-riscv.pl @@ -0,0 +1,847 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, initially for use with OpenSSL. +# ==================================================================== +# +# Poly1305 hash for RISC-V. +# +# February 2019 +# +# In the essence it's pretty straightforward transliteration of MIPS +# module [without big-endian option]. +# +# 1.8 cycles per byte on U74, >100% faster than compiler-generated +# code. 1.9 cpb on C910, ~75% improvement. 3.3 on Spacemit X60, ~69% +# improvement. +# +# June 2024. +# +# Add CHERI support. +# +###################################################################### +# +($zero,$ra,$sp,$gp,$tp)=map("x$_",(0..4)); +($t0,$t1,$t2,$t3,$t4,$t5,$t6)=map("x$_",(5..7,28..31)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("x$_",(10..17)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("x$_",(8,9,18..27)); +# +###################################################################### + +$flavour = shift || "64"; + +for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +$code.=<<___; +#ifdef __KERNEL__ +# ifdef __riscv_zicfilp +# undef __riscv_zicfilp // calls are expected to be direct +# endif +#endif + +#if defined(__CHERI_PURE_CAPABILITY__) && !defined(__riscv_misaligned_fast) +# define __riscv_misaligned_fast 1 +#endif +___ + +if ($flavour =~ /64/) {{{ +###################################################################### +# 64-bit code path... +# +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$t0,$t1,$t2); + +$code.=<<___; +#if __riscv_xlen == 64 +# if __SIZEOF_POINTER__ == 16 +# define PUSH csc +# define POP clc +# else +# define PUSH sd +# define POP ld +# endif +#else +# error "unsupported __riscv_xlen" +#endif + +.option pic +.text + +.globl poly1305_init +.type poly1305_init,\@function +poly1305_init: +#ifdef __riscv_zicfilp + lpad 0 +#endif + sd $zero,0($ctx) + sd $zero,8($ctx) + sd $zero,16($ctx) + + beqz $inp,.Lno_key + +#ifndef __riscv_misaligned_fast + andi $tmp0,$inp,7 # $inp % 8 + andi $inp,$inp,-8 # align $inp + slli $tmp0,$tmp0,3 # byte to bit offset +#endif + ld $in0,0($inp) + ld $in1,8($inp) +#ifndef __riscv_misaligned_fast + beqz $tmp0,.Laligned_key + + ld $tmp2,16($inp) + neg $tmp1,$tmp0 # implicit &63 in sll + srl $in0,$in0,$tmp0 + sll $tmp3,$in1,$tmp1 + srl $in1,$in1,$tmp0 + sll $tmp2,$tmp2,$tmp1 + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 + +.Laligned_key: +#endif + li $tmp0,1 + slli $tmp0,$tmp0,32 # 0x0000000100000000 + addi $tmp0,$tmp0,-63 # 0x00000000ffffffc1 + slli $tmp0,$tmp0,28 # 0x0ffffffc10000000 + addi $tmp0,$tmp0,-1 # 0x0ffffffc0fffffff + + and $in0,$in0,$tmp0 + addi $tmp0,$tmp0,-3 # 0x0ffffffc0ffffffc + and $in1,$in1,$tmp0 + + sd $in0,24($ctx) + srli $tmp0,$in1,2 + sd $in1,32($ctx) + add $tmp0,$tmp0,$in1 # s1 = r1 + (r1 >> 2) + sd $tmp0,40($ctx) + +.Lno_key: + li $a0,0 # return 0 + ret +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = + ($s0,$s1,$s2,$s3,$t3,$t4,$in0,$in1,$t2); +my ($shr,$shl) = ($t5,$t6); # used on R6 + +$code.=<<___; +.globl poly1305_blocks +.type poly1305_blocks,\@function +poly1305_blocks: +#ifdef __riscv_zicfilp + lpad 0 +#endif + andi $len,$len,-16 # complete blocks only + beqz $len,.Lno_data + + caddi $sp,$sp,-4*__SIZEOF_POINTER__ + PUSH $s0,3*__SIZEOF_POINTER__($sp) + PUSH $s1,2*__SIZEOF_POINTER__($sp) + PUSH $s2,1*__SIZEOF_POINTER__($sp) + PUSH $s3,0*__SIZEOF_POINTER__($sp) + +#ifndef __riscv_misaligned_fast + andi $shr,$inp,7 + andi $inp,$inp,-8 # align $inp + slli $shr,$shr,3 # byte to bit offset + neg $shl,$shr # implicit &63 in sll +#endif + + ld $h0,0($ctx) # load hash value + ld $h1,8($ctx) + ld $h2,16($ctx) + + ld $r0,24($ctx) # load key + ld $r1,32($ctx) + ld $rs1,40($ctx) + + add $len,$len,$inp # end of buffer + +.Loop: + ld $in0,0($inp) # load input + ld $in1,8($inp) +#ifndef __riscv_misaligned_fast + beqz $shr,.Laligned_inp + + ld $tmp2,16($inp) + srl $in0,$in0,$shr + sll $tmp3,$in1,$shl + srl $in1,$in1,$shr + sll $tmp2,$tmp2,$shl + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 + +.Laligned_inp: +#endif + caddi $inp,$inp,16 + + andi $tmp0,$h2,-4 # modulo-scheduled reduction + srli $tmp1,$h2,2 + andi $h2,$h2,3 + + add $d0,$h0,$in0 # accumulate input + add $tmp1,$tmp1,$tmp0 + sltu $tmp0,$d0,$h0 + add $d0,$d0,$tmp1 # ... and residue + sltu $tmp1,$d0,$tmp1 + add $d1,$h1,$in1 + add $tmp0,$tmp0,$tmp1 + sltu $tmp1,$d1,$h1 + add $d1,$d1,$tmp0 + + add $d2,$h2,$padbit + sltu $tmp0,$d1,$tmp0 + mulhu $h1,$r0,$d0 # h0*r0 + mul $h0,$r0,$d0 + + add $d2,$d2,$tmp1 + add $d2,$d2,$tmp0 + mulhu $tmp1,$rs1,$d1 # h1*5*r1 + mul $tmp0,$rs1,$d1 + + mulhu $h2,$r1,$d0 # h0*r1 + mul $tmp2,$r1,$d0 + add $h0,$h0,$tmp0 + add $h1,$h1,$tmp1 + sltu $tmp0,$h0,$tmp0 + + add $h1,$h1,$tmp0 + add $h1,$h1,$tmp2 + mulhu $tmp1,$r0,$d1 # h1*r0 + mul $tmp0,$r0,$d1 + + sltu $tmp2,$h1,$tmp2 + add $h2,$h2,$tmp2 + mul $tmp2,$rs1,$d2 # h2*5*r1 + + add $h1,$h1,$tmp0 + add $h2,$h2,$tmp1 + mul $tmp3,$r0,$d2 # h2*r0 + sltu $tmp0,$h1,$tmp0 + add $h2,$h2,$tmp0 + + add $h1,$h1,$tmp2 + sltu $tmp2,$h1,$tmp2 + add $h2,$h2,$tmp2 + add $h2,$h2,$tmp3 + + bne $inp,$len,.Loop + + sd $h0,0($ctx) # store hash value + sd $h1,8($ctx) + sd $h2,16($ctx) + + POP $s0,3*__SIZEOF_POINTER__($sp) # epilogue + POP $s1,2*__SIZEOF_POINTER__($sp) + POP $s2,1*__SIZEOF_POINTER__($sp) + POP $s3,0*__SIZEOF_POINTER__($sp) + caddi $sp,$sp,4*__SIZEOF_POINTER__ + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); + +$code.=<<___; +.globl poly1305_emit +.type poly1305_emit,\@function +poly1305_emit: +#ifdef __riscv_zicfilp + lpad 0 +#endif + ld $tmp2,16($ctx) + ld $tmp0,0($ctx) + ld $tmp1,8($ctx) + + andi $in0,$tmp2,-4 # final reduction + srl $in1,$tmp2,2 + andi $tmp2,$tmp2,3 + add $in0,$in0,$in1 + + add $tmp0,$tmp0,$in0 + sltu $in1,$tmp0,$in0 + addi $in0,$tmp0,5 # compare to modulus + add $tmp1,$tmp1,$in1 + sltiu $tmp3,$in0,5 + sltu $tmp4,$tmp1,$in1 + add $in1,$tmp1,$tmp3 + add $tmp2,$tmp2,$tmp4 + sltu $tmp3,$in1,$tmp3 + add $tmp2,$tmp2,$tmp3 + + srli $tmp2,$tmp2,2 # see if it carried/borrowed + neg $tmp2,$tmp2 + + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + and $in0,$in0,$tmp2 + and $in1,$in1,$tmp2 + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + + lwu $tmp0,0($nonce) # load nonce + lwu $tmp1,4($nonce) + lwu $tmp2,8($nonce) + lwu $tmp3,12($nonce) + slli $tmp1,$tmp1,32 + slli $tmp3,$tmp3,32 + or $tmp0,$tmp0,$tmp1 + or $tmp2,$tmp2,$tmp3 + + add $in0,$in0,$tmp0 # accumulate nonce + add $in1,$in1,$tmp2 + sltu $tmp0,$in0,$tmp0 + add $in1,$in1,$tmp0 + +#ifdef __riscv_misaligned_fast + sd $in0,0($mac) # write mac value + sd $in1,8($mac) +#else + srli $tmp0,$in0,8 # write mac value + srli $tmp1,$in0,16 + srli $tmp2,$in0,24 + sb $in0,0($mac) + srli $tmp3,$in0,32 + sb $tmp0,1($mac) + srli $tmp0,$in0,40 + sb $tmp1,2($mac) + srli $tmp1,$in0,48 + sb $tmp2,3($mac) + srli $tmp2,$in0,56 + sb $tmp3,4($mac) + srli $tmp3,$in1,8 + sb $tmp0,5($mac) + srli $tmp0,$in1,16 + sb $tmp1,6($mac) + srli $tmp1,$in1,24 + sb $tmp2,7($mac) + + sb $in1,8($mac) + srli $tmp2,$in1,32 + sb $tmp3,9($mac) + srli $tmp3,$in1,40 + sb $tmp0,10($mac) + srli $tmp0,$in1,48 + sb $tmp1,11($mac) + srli $tmp1,$in1,56 + sb $tmp2,12($mac) + sb $tmp3,13($mac) + sb $tmp0,14($mac) + sb $tmp1,15($mac) +#endif + + ret +.size poly1305_emit,.-poly1305_emit +.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm" +___ +} +}}} else {{{ +###################################################################### +# 32-bit code path +# + +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = + ($a4,$a5,$a6,$a7,$t0,$t1,$t2,$t3); + +$code.=<<___; +#if __riscv_xlen == 32 +# if __SIZEOF_POINTER__ == 8 +# define PUSH csc +# define POP clc +# else +# define PUSH sw +# define POP lw +# endif +# define MULX(hi,lo,a,b) mulhu hi,a,b; mul lo,a,b +# define srliw srli +# define srlw srl +# define sllw sll +# define addw add +# define addiw addi +# define mulw mul +#elif __riscv_xlen == 64 +# if __SIZEOF_POINTER__ == 16 +# define PUSH csc +# define POP clc +# else +# define PUSH sd +# define POP ld +# endif +# define MULX(hi,lo,a,b) slli b,b,32; srli b,b,32; mul hi,a,b; addiw lo,hi,0; srai hi,hi,32 +#else +# error "unsupported __riscv_xlen" +#endif + +.option pic +.text + +.globl poly1305_init +.type poly1305_init,\@function +poly1305_init: +#ifdef __riscv_zicfilp + lpad 0 +#endif + sw $zero,0($ctx) + sw $zero,4($ctx) + sw $zero,8($ctx) + sw $zero,12($ctx) + sw $zero,16($ctx) + + beqz $inp,.Lno_key + +#ifndef __riscv_misaligned_fast + andi $tmp0,$inp,3 # $inp % 4 + sub $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset +#endif + lw $in0,0($inp) + lw $in1,4($inp) + lw $in2,8($inp) + lw $in3,12($inp) +#ifndef __riscv_misaligned_fast + beqz $tmp0,.Laligned_key + + lw $tmp2,16($inp) + sub $tmp1,$zero,$tmp0 + srlw $in0,$in0,$tmp0 + sllw $tmp3,$in1,$tmp1 + srlw $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + sllw $tmp3,$in2,$tmp1 + srlw $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + sllw $tmp3,$in3,$tmp1 + srlw $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + sllw $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +.Laligned_key: +#endif + + lui $tmp0,0x10000 + addi $tmp0,$tmp0,-1 # 0x0fffffff + and $in0,$in0,$tmp0 + addi $tmp0,$tmp0,-3 # 0x0ffffffc + and $in1,$in1,$tmp0 + and $in2,$in2,$tmp0 + and $in3,$in3,$tmp0 + + sw $in0,20($ctx) + sw $in1,24($ctx) + sw $in2,28($ctx) + sw $in3,32($ctx) + + srlw $tmp1,$in1,2 + srlw $tmp2,$in2,2 + srlw $tmp3,$in3,2 + addw $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) + addw $in2,$in2,$tmp2 + addw $in3,$in3,$tmp3 + sw $in1,36($ctx) + sw $in2,40($ctx) + sw $in3,44($ctx) +.Lno_key: + li $a0,0 + ret +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = + ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $t0,$t1,$t2); +my ($d0,$d1,$d2,$d3) = + ($a4,$a5,$a6,$a7); +my $shr = $ra; # used on R6 + +$code.=<<___; +.globl poly1305_blocks +.type poly1305_blocks,\@function +poly1305_blocks: +#ifdef __riscv_zicfilp + lpad 0 +#endif + andi $len,$len,-16 # complete blocks only + beqz $len,.Labort + +#ifdef __riscv_zcmp + cm.push {ra,s0-s8}, -48 +#else + caddi $sp,$sp,-__SIZEOF_POINTER__*12 + PUSH $ra, __SIZEOF_POINTER__*11($sp) + PUSH $s0, __SIZEOF_POINTER__*10($sp) + PUSH $s1, __SIZEOF_POINTER__*9($sp) + PUSH $s2, __SIZEOF_POINTER__*8($sp) + PUSH $s3, __SIZEOF_POINTER__*7($sp) + PUSH $s4, __SIZEOF_POINTER__*6($sp) + PUSH $s5, __SIZEOF_POINTER__*5($sp) + PUSH $s6, __SIZEOF_POINTER__*4($sp) + PUSH $s7, __SIZEOF_POINTER__*3($sp) + PUSH $s8, __SIZEOF_POINTER__*2($sp) +#endif + +#ifndef __riscv_misaligned_fast + andi $shr,$inp,3 + andi $inp,$inp,-4 # align $inp + slli $shr,$shr,3 # byte to bit offset +#endif + + lw $h0,0($ctx) # load hash value + lw $h1,4($ctx) + lw $h2,8($ctx) + lw $h3,12($ctx) + lw $h4,16($ctx) + + lw $r0,20($ctx) # load key + lw $r1,24($ctx) + lw $r2,28($ctx) + lw $r3,32($ctx) + lw $rs1,36($ctx) + lw $rs2,40($ctx) + lw $rs3,44($ctx) + + add $len,$len,$inp # end of buffer + +.Loop: + lw $d0,0($inp) # load input + lw $d1,4($inp) + lw $d2,8($inp) + lw $d3,12($inp) +#ifndef __riscv_misaligned_fast + beqz $shr,.Laligned_inp + + lw $t4,16($inp) + sub $t5,$zero,$shr + srlw $d0,$d0,$shr + sllw $t3,$d1,$t5 + srlw $d1,$d1,$shr + or $d0,$d0,$t3 + sllw $t3,$d2,$t5 + srlw $d2,$d2,$shr + or $d1,$d1,$t3 + sllw $t3,$d3,$t5 + srlw $d3,$d3,$shr + or $d2,$d2,$t3 + sllw $t4,$t4,$t5 + or $d3,$d3,$t4 + +.Laligned_inp: +#endif + srliw $t3,$h4,2 # modulo-scheduled reduction + andi $t4,$h4,-4 + andi $h4,$h4,3 + + addw $d0,$d0,$h0 # accumulate input + addw $t4,$t4,$t3 + sltu $h0,$d0,$h0 + addw $d0,$d0,$t4 # ... and residue + sltu $t4,$d0,$t4 + + addw $d1,$d1,$h1 + addw $h0,$h0,$t4 # carry + sltu $h1,$d1,$h1 + addw $d1,$d1,$h0 + sltu $h0,$d1,$h0 + + addw $d2,$d2,$h2 + addw $h1,$h1,$h0 # carry + sltu $h2,$d2,$h2 + addw $d2,$d2,$h1 + sltu $h1,$d2,$h1 + + addw $d3,$d3,$h3 + addw $h2,$h2,$h1 # carry + sltu $h3,$d3,$h3 + addw $d3,$d3,$h2 + + MULX ($h1,$h0,$r0,$d0) # d0*r0 + + sltu $h2,$d3,$h2 + addw $h3,$h3,$h2 # carry + + MULX ($t4,$t3,$rs3,$d1) # d1*s3 + + addw $h4,$h4,$padbit + caddi $inp,$inp,16 + addw $h4,$h4,$h3 + + MULX ($t6,$a3,$rs2,$d2) # d2*s2 + addw $h0,$h0,$t3 + addw $h1,$h1,$t4 + sltu $t3,$h0,$t3 + addw $h1,$h1,$t3 + + MULX ($t4,$t3,$rs1,$d3) # d3*s1 + addw $h0,$h0,$a3 + addw $h1,$h1,$t6 + sltu $a3,$h0,$a3 + addw $h1,$h1,$a3 + + + MULX ($h2,$a3,$r1,$d0) # d0*r1 + addw $h0,$h0,$t3 + addw $h1,$h1,$t4 + sltu $t3,$h0,$t3 + addw $h1,$h1,$t3 + + MULX ($t4,$t3,$r0,$d1) # d1*r0 + addw $h1,$h1,$a3 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + MULX ($t6,$a3,$rs3,$d2) # d2*s3 + addw $h1,$h1,$t3 + addw $h2,$h2,$t4 + sltu $t3,$h1,$t3 + addw $h2,$h2,$t3 + + MULX ($t4,$t3,$rs2,$d3) # d3*s2 + addw $h1,$h1,$a3 + addw $h2,$h2,$t6 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + mulw $a3,$rs1,$h4 # h4*s1 + addw $h1,$h1,$t3 + addw $h2,$h2,$t4 + sltu $t3,$h1,$t3 + addw $h2,$h2,$t3 + + + MULX ($h3,$t3,$r2,$d0) # d0*r2 + addw $h1,$h1,$a3 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + MULX ($t6,$a3,$r1,$d1) # d1*r1 + addw $h2,$h2,$t3 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + MULX ($t4,$t3,$r0,$d2) # d2*r0 + addw $h2,$h2,$a3 + addw $h3,$h3,$t6 + sltu $a3,$h2,$a3 + addw $h3,$h3,$a3 + + MULX ($t6,$a3,$rs3,$d3) # d3*s3 + addw $h2,$h2,$t3 + addw $h3,$h3,$t4 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + mulw $t3,$rs2,$h4 # h4*s2 + addw $h2,$h2,$a3 + addw $h3,$h3,$t6 + sltu $a3,$h2,$a3 + addw $h3,$h3,$a3 + + + MULX ($t6,$a3,$r3,$d0) # d0*r3 + addw $h2,$h2,$t3 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + MULX ($t4,$t3,$r2,$d1) # d1*r2 + addw $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addw $t6,$t6,$a3 + + MULX ($a3,$d3,$r0,$d3) # d3*r0 + addw $h3,$h3,$t3 + addw $t6,$t6,$t4 + sltu $t3,$h3,$t3 + addw $t6,$t6,$t3 + + MULX ($t4,$t3,$r1,$d2) # d2*r1 + addw $h3,$h3,$d3 + addw $t6,$t6,$a3 + sltu $d3,$h3,$d3 + addw $t6,$t6,$d3 + + mulw $a3,$rs3,$h4 # h4*s3 + addw $h3,$h3,$t3 + addw $t6,$t6,$t4 + sltu $t3,$h3,$t3 + addw $t6,$t6,$t3 + + + mulw $h4,$r0,$h4 # h4*r0 + addw $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addw $t6,$t6,$a3 + addw $h4,$t6,$h4 + + li $padbit,1 # if we loop, padbit is 1 + + bne $inp,$len,.Loop + + sw $h0,0($ctx) # store hash value + sw $h1,4($ctx) + sw $h2,8($ctx) + sw $h3,12($ctx) + sw $h4,16($ctx) + +#ifdef __riscv_zcmp + cm.popret {ra,s0-s8}, 48 +#else + POP $ra, __SIZEOF_POINTER__*11($sp) + POP $s0, __SIZEOF_POINTER__*10($sp) + POP $s1, __SIZEOF_POINTER__*9($sp) + POP $s2, __SIZEOF_POINTER__*8($sp) + POP $s3, __SIZEOF_POINTER__*7($sp) + POP $s4, __SIZEOF_POINTER__*6($sp) + POP $s5, __SIZEOF_POINTER__*5($sp) + POP $s6, __SIZEOF_POINTER__*4($sp) + POP $s7, __SIZEOF_POINTER__*3($sp) + POP $s8, __SIZEOF_POINTER__*2($sp) + caddi $sp,$sp,__SIZEOF_POINTER__*12 +#endif +.Labort: + ret +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); + +$code.=<<___; +.globl poly1305_emit +.type poly1305_emit,\@function +poly1305_emit: +#ifdef __riscv_zicfilp + lpad 0 +#endif + lw $tmp4,16($ctx) + lw $tmp0,0($ctx) + lw $tmp1,4($ctx) + lw $tmp2,8($ctx) + lw $tmp3,12($ctx) + + srliw $ctx,$tmp4,2 # final reduction + andi $in0,$tmp4,-4 + andi $tmp4,$tmp4,3 + addw $ctx,$ctx,$in0 + + addw $tmp0,$tmp0,$ctx + sltu $ctx,$tmp0,$ctx + addiw $in0,$tmp0,5 # compare to modulus + addw $tmp1,$tmp1,$ctx + sltiu $in1,$in0,5 + sltu $ctx,$tmp1,$ctx + addw $in1,$in1,$tmp1 + addw $tmp2,$tmp2,$ctx + sltu $in2,$in1,$tmp1 + sltu $ctx,$tmp2,$ctx + addw $in2,$in2,$tmp2 + addw $tmp3,$tmp3,$ctx + sltu $in3,$in2,$tmp2 + sltu $ctx,$tmp3,$ctx + addw $in3,$in3,$tmp3 + addw $tmp4,$tmp4,$ctx + sltu $ctx,$in3,$tmp3 + addw $ctx,$ctx,$tmp4 + + srl $ctx,$ctx,2 # see if it carried/borrowed + sub $ctx,$zero,$ctx + + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + xor $in2,$in2,$tmp2 + xor $in3,$in3,$tmp3 + and $in0,$in0,$ctx + and $in1,$in1,$ctx + and $in2,$in2,$ctx + and $in3,$in3,$ctx + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + xor $in2,$in2,$tmp2 + xor $in3,$in3,$tmp3 + + lw $tmp0,0($nonce) # load nonce + lw $tmp1,4($nonce) + lw $tmp2,8($nonce) + lw $tmp3,12($nonce) + + addw $in0,$in0,$tmp0 # accumulate nonce + sltu $ctx,$in0,$tmp0 + + addw $in1,$in1,$tmp1 + sltu $tmp1,$in1,$tmp1 + addw $in1,$in1,$ctx + sltu $ctx,$in1,$ctx + addw $ctx,$ctx,$tmp1 + + addw $in2,$in2,$tmp2 + sltu $tmp2,$in2,$tmp2 + addw $in2,$in2,$ctx + sltu $ctx,$in2,$ctx + addw $ctx,$ctx,$tmp2 + + addw $in3,$in3,$tmp3 + addw $in3,$in3,$ctx + +#ifdef __riscv_misaligned_fast + sw $in0,0($mac) # write mac value + sw $in1,4($mac) + sw $in2,8($mac) + sw $in3,12($mac) +#else + srl $tmp0,$in0,8 # write mac value + srl $tmp1,$in0,16 + srl $tmp2,$in0,24 + sb $in0, 0($mac) + sb $tmp0,1($mac) + srl $tmp0,$in1,8 + sb $tmp1,2($mac) + srl $tmp1,$in1,16 + sb $tmp2,3($mac) + srl $tmp2,$in1,24 + sb $in1, 4($mac) + sb $tmp0,5($mac) + srl $tmp0,$in2,8 + sb $tmp1,6($mac) + srl $tmp1,$in2,16 + sb $tmp2,7($mac) + srl $tmp2,$in2,24 + sb $in2, 8($mac) + sb $tmp0,9($mac) + srl $tmp0,$in3,8 + sb $tmp1,10($mac) + srl $tmp1,$in3,16 + sb $tmp2,11($mac) + srl $tmp2,$in3,24 + sb $in3, 12($mac) + sb $tmp0,13($mac) + sb $tmp1,14($mac) + sb $tmp2,15($mac) +#endif + + ret +.size poly1305_emit,.-poly1305_emit +.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm" +___ +} +}}} + +foreach (split("\n", $code)) { + if ($flavour =~ /^cheri/) { + s/\(x([0-9]+)\)/(c$1)/ and s/\b([ls][bhwd]u?)\b/c$1/; + s/\b(PUSH|POP)(\s+)x([0-9]+)/$1$2c$3/ or + s/\b(ret|jal)\b/c$1/; + s/\bcaddi?\b/cincoffset/ and s/\bx([0-9]+,)/c$1/g or + m/\bcmove\b/ and s/\bx([0-9]+)/c$1/g; + } else { + s/\bcaddi?\b/add/ or + s/\bcmove\b/mv/; + } + print $_, "\n"; +} + +close STDOUT; diff --git a/lib/crypto/riscv/poly1305.h b/lib/crypto/riscv/poly1305.h new file mode 100644 index 000000000000..88f3df44e355 --- /dev/null +++ b/lib/crypto/riscv/poly1305.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for riscv + * + * Copyright (C) 2025 Institute of Software, CAS. + */ + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); -- cgit v1.2.3 From c4b846ff6ecab0427cc7dcccbe0af60b244a6d56 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:22 -0700 Subject: lib/crypto: chacha: Remove unused function chacha_is_arch_optimized() chacha_is_arch_optimized() is no longer used, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/chacha-glue.c | 7 ------- lib/crypto/arm64/chacha-neon-glue.c | 6 ------ lib/crypto/mips/chacha-glue.c | 6 ------ lib/crypto/powerpc/chacha-p10-glue.c | 6 ------ lib/crypto/riscv/chacha-riscv64-glue.c | 6 ------ lib/crypto/s390/chacha-glue.c | 6 ------ lib/crypto/x86/chacha_glue.c | 6 ------ 7 files changed, 43 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/chacha-glue.c b/lib/crypto/arm/chacha-glue.c index 88ec96415283..67ba045cae35 100644 --- a/lib/crypto/arm/chacha-glue.c +++ b/lib/crypto/arm/chacha-glue.c @@ -101,13 +101,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - /* We always can use at least the ARM scalar implementation. */ - return true; -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_arm_mod_init(void) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { diff --git a/lib/crypto/arm64/chacha-neon-glue.c b/lib/crypto/arm64/chacha-neon-glue.c index d0188f974ca5..48097aa34af7 100644 --- a/lib/crypto/arm64/chacha-neon-glue.c +++ b/lib/crypto/arm64/chacha-neon-glue.c @@ -95,12 +95,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&have_neon); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_simd_mod_init(void) { if (cpu_have_named_feature(ASIMD)) diff --git a/lib/crypto/mips/chacha-glue.c b/lib/crypto/mips/chacha-glue.c index 88c097594eb0..f8390af21dc9 100644 --- a/lib/crypto/mips/chacha-glue.c +++ b/lib/crypto/mips/chacha-glue.c @@ -18,12 +18,6 @@ asmlinkage void hchacha_block_arch(const struct chacha_state *state, u32 out[HCHACHA_OUT_WORDS], int nrounds); EXPORT_SYMBOL(hchacha_block_arch); -bool chacha_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - MODULE_DESCRIPTION("ChaCha and HChaCha functions (MIPS optimized)"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/chacha-p10-glue.c b/lib/crypto/powerpc/chacha-p10-glue.c index fcd23c6f1590..5d3d5506d7f9 100644 --- a/lib/crypto/powerpc/chacha-p10-glue.c +++ b/lib/crypto/powerpc/chacha-p10-glue.c @@ -76,12 +76,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_p10_init(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) diff --git a/lib/crypto/riscv/chacha-riscv64-glue.c b/lib/crypto/riscv/chacha-riscv64-glue.c index 8c3f11d79be3..a15f0aca3fc4 100644 --- a/lib/crypto/riscv/chacha-riscv64-glue.c +++ b/lib/crypto/riscv/chacha-riscv64-glue.c @@ -50,12 +50,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&use_zvkb); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init riscv64_chacha_mod_init(void) { if (riscv_isa_extension_available(NULL, ZVKB) && diff --git a/lib/crypto/s390/chacha-glue.c b/lib/crypto/s390/chacha-glue.c index c57dc851214f..d8137387fe28 100644 --- a/lib/crypto/s390/chacha-glue.c +++ b/lib/crypto/s390/chacha-glue.c @@ -47,11 +47,5 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return cpu_has_vx(); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/x86/chacha_glue.c b/lib/crypto/x86/chacha_glue.c index 10b2c945f541..de7da9d512af 100644 --- a/lib/crypto/x86/chacha_glue.c +++ b/lib/crypto/x86/chacha_glue.c @@ -160,12 +160,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&chacha_use_simd); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_SSSE3)) -- cgit v1.2.3 From 20a1acb68d7a16481b70a693d49c2a42882f57a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:23 -0700 Subject: lib/crypto: chacha: Rename chacha.c to chacha-block-generic.c Rename chacha.c to chacha-block-generic.c to free up the name chacha.c for the high-level API entry points (chacha_crypt() and hchacha_block()), similar to the other algorithms. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 4 +- lib/crypto/chacha-block-generic.c | 114 ++++++++++++++++++++++++++++++++++++++ lib/crypto/chacha.c | 114 -------------------------------------- 3 files changed, 116 insertions(+), 116 deletions(-) create mode 100644 lib/crypto/chacha-block-generic.c delete mode 100644 lib/crypto/chacha.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index cd460e5e3dd2..e71c4bee8310 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -15,8 +15,8 @@ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o libcryptoutils-y := memneq.o utils.o -# chacha is used by the /dev/random driver which is always builtin -obj-y += chacha.o +# chacha20_block() is used by the /dev/random driver which is always builtin +obj-y += chacha-block-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o diff --git a/lib/crypto/chacha-block-generic.c b/lib/crypto/chacha-block-generic.c new file mode 100644 index 000000000000..77f68de71066 --- /dev/null +++ b/lib/crypto/chacha-block-generic.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) + * + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include +#include +#include +#include +#include + +static void chacha_permute(struct chacha_state *state, int nrounds) +{ + u32 *x = state->x; + int i; + + /* whitelist the allowed round counts */ + WARN_ON_ONCE(nrounds != 20 && nrounds != 12); + + for (i = 0; i < nrounds; i += 2) { + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); + + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); + + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); + + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); + + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); + + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); + + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); + + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); + } +} + +/** + * chacha_block_generic - generate one keystream block and increment block counter + * @state: input state matrix + * @out: output keystream block + * @nrounds: number of rounds (20 or 12; 20 is recommended) + * + * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. + * The caller has already converted the endianness of the input. This function + * also handles incrementing the block counter in the input matrix. + */ +void chacha_block_generic(struct chacha_state *state, + u8 out[CHACHA_BLOCK_SIZE], int nrounds) +{ + struct chacha_state permuted_state = *state; + int i; + + chacha_permute(&permuted_state, nrounds); + + for (i = 0; i < ARRAY_SIZE(state->x); i++) + put_unaligned_le32(permuted_state.x[i] + state->x[i], + &out[i * sizeof(u32)]); + + state->x[12]++; +} +EXPORT_SYMBOL(chacha_block_generic); + +/** + * hchacha_block_generic - abbreviated ChaCha core, for XChaCha + * @state: input state matrix + * @out: the output words + * @nrounds: number of rounds (20 or 12; 20 is recommended) + * + * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step + * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha + * skips the final addition of the initial state, and outputs only certain words + * of the state. It should not be used for streaming directly. + */ +void hchacha_block_generic(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + struct chacha_state permuted_state = *state; + + chacha_permute(&permuted_state, nrounds); + + memcpy(&out[0], &permuted_state.x[0], 16); + memcpy(&out[4], &permuted_state.x[12], 16); +} +EXPORT_SYMBOL(hchacha_block_generic); diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c deleted file mode 100644 index 77f68de71066..000000000000 --- a/lib/crypto/chacha.c +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) - * - * Copyright (C) 2015 Martin Willi - */ - -#include -#include -#include -#include -#include -#include -#include - -static void chacha_permute(struct chacha_state *state, int nrounds) -{ - u32 *x = state->x; - int i; - - /* whitelist the allowed round counts */ - WARN_ON_ONCE(nrounds != 20 && nrounds != 12); - - for (i = 0; i < nrounds; i += 2) { - x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); - - x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); - - x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); - - x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); - - x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); - - x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); - - x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); - - x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); - } -} - -/** - * chacha_block_generic - generate one keystream block and increment block counter - * @state: input state matrix - * @out: output keystream block - * @nrounds: number of rounds (20 or 12; 20 is recommended) - * - * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. - * The caller has already converted the endianness of the input. This function - * also handles incrementing the block counter in the input matrix. - */ -void chacha_block_generic(struct chacha_state *state, - u8 out[CHACHA_BLOCK_SIZE], int nrounds) -{ - struct chacha_state permuted_state = *state; - int i; - - chacha_permute(&permuted_state, nrounds); - - for (i = 0; i < ARRAY_SIZE(state->x); i++) - put_unaligned_le32(permuted_state.x[i] + state->x[i], - &out[i * sizeof(u32)]); - - state->x[12]++; -} -EXPORT_SYMBOL(chacha_block_generic); - -/** - * hchacha_block_generic - abbreviated ChaCha core, for XChaCha - * @state: input state matrix - * @out: the output words - * @nrounds: number of rounds (20 or 12; 20 is recommended) - * - * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step - * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha - * skips the final addition of the initial state, and outputs only certain words - * of the state. It should not be used for streaming directly. - */ -void hchacha_block_generic(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - struct chacha_state permuted_state = *state; - - chacha_permute(&permuted_state, nrounds); - - memcpy(&out[0], &permuted_state.x[0], 16); - memcpy(&out[4], &permuted_state.x[12], 16); -} -EXPORT_SYMBOL(hchacha_block_generic); -- cgit v1.2.3 From 1ae46b6eb5b9a97978fe12a71f5de53ab977297f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:24 -0700 Subject: lib/crypto: chacha: Rename libchacha.c to chacha.c Rename libchacha.c to chacha.c to make the naming consistent with other algorithms and allow additional source files to be added to the libchacha module. This file currently contains chacha_crypt_generic(), but it will soon be updated to contain chacha_crypt(). Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 + lib/crypto/chacha.c | 35 +++++++++++++++++++++++++++++++++++ lib/crypto/libchacha.c | 35 ----------------------------------- 3 files changed, 36 insertions(+), 35 deletions(-) create mode 100644 lib/crypto/chacha.c delete mode 100644 lib/crypto/libchacha.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e71c4bee8310..a006048ba2bd 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -18,6 +18,7 @@ libcryptoutils-y := memneq.o utils.o # chacha20_block() is used by the /dev/random driver which is always builtin obj-y += chacha-block-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o +libchacha-y := chacha.o obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c new file mode 100644 index 000000000000..26862ad90a96 --- /dev/null +++ b/lib/crypto/chacha.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * The ChaCha stream cipher (RFC7539) + * + * Copyright (C) 2015 Martin Willi + */ + +#include // for crypto_xor_cpy +#include +#include +#include +#include + +void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* aligned to potentially speed up crypto_xor() */ + u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); + + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block_generic(state, stream, nrounds); + crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); + bytes -= CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + } + if (bytes) { + chacha_block_generic(state, stream, nrounds); + crypto_xor_cpy(dst, src, stream, bytes); + } +} +EXPORT_SYMBOL(chacha_crypt_generic); + +MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)"); +MODULE_LICENSE("GPL"); diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c deleted file mode 100644 index 26862ad90a96..000000000000 --- a/lib/crypto/libchacha.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The ChaCha stream cipher (RFC7539) - * - * Copyright (C) 2015 Martin Willi - */ - -#include // for crypto_xor_cpy -#include -#include -#include -#include - -void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* aligned to potentially speed up crypto_xor() */ - u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); - - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_generic(state, stream, nrounds); - crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); - bytes -= CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - } - if (bytes) { - chacha_block_generic(state, stream, nrounds); - crypto_xor_cpy(dst, src, stream, bytes); - } -} -EXPORT_SYMBOL(chacha_crypt_generic); - -MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 13cecc526d8fe7eeb9b136159738688a1a10cd82 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:25 -0700 Subject: lib/crypto: chacha: Consolidate into single module Consolidate the ChaCha code into a single module (excluding chacha-block-generic.c which remains always built-in for random.c), similar to various other algorithms: - Each arch now provides a header file lib/crypto/$(SRCARCH)/chacha.h, replacing lib/crypto/$(SRCARCH)/chacha*.c. The header defines chacha_crypt_arch() and hchacha_block_arch(). It is included by lib/crypto/chacha.c, and thus the code gets built into the single libchacha module, with improved inlining in some cases. - Whether arch-optimized ChaCha is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. - Any additional arch-specific translation units for the optimized ChaCha code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. This removes the last use for the Makefile and Kconfig files in the arm64, mips, powerpc, riscv, and s390 subdirectories of lib/crypto/. So also remove those files and the references to them. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 47 +++----- lib/crypto/Makefile | 43 ++++++-- lib/crypto/arm/Kconfig | 5 - lib/crypto/arm/Makefile | 4 - lib/crypto/arm/chacha-glue.c | 131 ----------------------- lib/crypto/arm/chacha.h | 117 ++++++++++++++++++++ lib/crypto/arm64/Kconfig | 8 -- lib/crypto/arm64/Makefile | 4 - lib/crypto/arm64/chacha-neon-glue.c | 113 -------------------- lib/crypto/arm64/chacha.h | 99 +++++++++++++++++ lib/crypto/chacha.c | 41 ++++++- lib/crypto/mips/Kconfig | 7 -- lib/crypto/mips/Makefile | 5 - lib/crypto/mips/chacha-glue.c | 23 ---- lib/crypto/mips/chacha.h | 14 +++ lib/crypto/powerpc/Kconfig | 8 -- lib/crypto/powerpc/Makefile | 4 - lib/crypto/powerpc/chacha-p10-glue.c | 94 ---------------- lib/crypto/powerpc/chacha.h | 76 +++++++++++++ lib/crypto/riscv/Kconfig | 8 -- lib/crypto/riscv/Makefile | 4 - lib/crypto/riscv/chacha-riscv64-glue.c | 69 ------------ lib/crypto/riscv/chacha.h | 51 +++++++++ lib/crypto/s390/Kconfig | 7 -- lib/crypto/s390/Makefile | 4 - lib/crypto/s390/chacha-glue.c | 51 --------- lib/crypto/s390/chacha.h | 36 +++++++ lib/crypto/x86/Kconfig | 7 -- lib/crypto/x86/Makefile | 3 - lib/crypto/x86/chacha.h | 176 ++++++++++++++++++++++++++++++ lib/crypto/x86/chacha_glue.c | 190 --------------------------------- 31 files changed, 653 insertions(+), 796 deletions(-) delete mode 100644 lib/crypto/arm/chacha-glue.c create mode 100644 lib/crypto/arm/chacha.h delete mode 100644 lib/crypto/arm64/Kconfig delete mode 100644 lib/crypto/arm64/Makefile delete mode 100644 lib/crypto/arm64/chacha-neon-glue.c create mode 100644 lib/crypto/arm64/chacha.h delete mode 100644 lib/crypto/mips/Kconfig delete mode 100644 lib/crypto/mips/Makefile delete mode 100644 lib/crypto/mips/chacha-glue.c create mode 100644 lib/crypto/mips/chacha.h delete mode 100644 lib/crypto/powerpc/Kconfig delete mode 100644 lib/crypto/powerpc/Makefile delete mode 100644 lib/crypto/powerpc/chacha-p10-glue.c create mode 100644 lib/crypto/powerpc/chacha.h delete mode 100644 lib/crypto/riscv/Kconfig delete mode 100644 lib/crypto/riscv/Makefile delete mode 100644 lib/crypto/riscv/chacha-riscv64-glue.c create mode 100644 lib/crypto/riscv/chacha.h delete mode 100644 lib/crypto/s390/Kconfig delete mode 100644 lib/crypto/s390/Makefile delete mode 100644 lib/crypto/s390/chacha-glue.c create mode 100644 lib/crypto/s390/chacha.h create mode 100644 lib/crypto/x86/chacha.h delete mode 100644 lib/crypto/x86/chacha_glue.c (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index cb4e056a98fa..c1db483bc230 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -44,29 +44,23 @@ config CRYPTO_LIB_BLAKE2S_GENERIC implementation is enabled, this implementation serves the users of CRYPTO_LIB_BLAKE2S. -config CRYPTO_ARCH_HAVE_LIB_CHACHA - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the ChaCha library interface, - either builtin or as a module. - -config CRYPTO_LIB_CHACHA_GENERIC +config CRYPTO_LIB_CHACHA tristate - default CRYPTO_LIB_CHACHA if !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_UTILS help - This symbol can be selected by arch implementations of the ChaCha - library interface that require the generic code as a fallback, e.g., - for SIMD implementations. If no arch specific implementation is - enabled, this implementation serves the users of CRYPTO_LIB_CHACHA. + Enable the ChaCha library interface. Select this if your module uses + chacha_crypt() or hchacha_block(). -config CRYPTO_LIB_CHACHA - tristate - help - Enable the ChaCha library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. +config CRYPTO_LIB_CHACHA_ARCH + bool + depends on CRYPTO_LIB_CHACHA && !UML && !KMSAN + default y if ARM + default y if ARM64 && KERNEL_MODE_NEON + default y if MIPS && CPU_MIPS32_R2 + default y if PPC64 && CPU_LITTLE_ENDIAN && VSX + default y if RISCV && 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + default y if S390 + default y if X86_64 config CRYPTO_ARCH_HAVE_LIB_CURVE25519 bool @@ -218,21 +212,6 @@ if !KMSAN # avoid false positives from assembly if ARM source "lib/crypto/arm/Kconfig" endif -if ARM64 -source "lib/crypto/arm64/Kconfig" -endif -if MIPS -source "lib/crypto/mips/Kconfig" -endif -if PPC -source "lib/crypto/powerpc/Kconfig" -endif -if RISCV -source "lib/crypto/riscv/Kconfig" -endif -if S390 -source "lib/crypto/s390/Kconfig" -endif if X86 source "lib/crypto/x86/Kconfig" endif diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index a006048ba2bd..baafd58b5dfd 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -15,11 +15,6 @@ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o libcryptoutils-y := memneq.o utils.o -# chacha20_block() is used by the /dev/random driver which is always builtin -obj-y += chacha-block-generic.o -obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o -libchacha-y := chacha.o - obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o @@ -40,6 +35,39 @@ libblake2s-y := blake2s.o libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o libblake2s-$(CONFIG_CRYPTO_SELFTESTS) += blake2s-selftest.o +################################################################################ + +# chacha20_block() is used by the /dev/random driver which is always builtin +obj-y += chacha-block-generic.o + +obj-$(CONFIG_CRYPTO_LIB_CHACHA) += libchacha.o +libchacha-y := chacha.o + +ifeq ($(CONFIG_CRYPTO_LIB_CHACHA_ARCH),y) +CFLAGS_chacha.o += -I$(src)/$(SRCARCH) + +ifeq ($(CONFIG_ARM),y) +libchacha-y += arm/chacha-scalar-core.o +libchacha-$(CONFIG_KERNEL_MODE_NEON) += arm/chacha-neon-core.o +endif + +libchacha-$(CONFIG_ARM64) += arm64/chacha-neon-core.o + +ifeq ($(CONFIG_MIPS),y) +libchacha-y += mips/chacha-core.o +AFLAGS_mips/chacha-core.o += -O2 # needed to fill branch delay slots +endif + +libchacha-$(CONFIG_PPC) += powerpc/chacha-p10le-8x.o +libchacha-$(CONFIG_RISCV) += riscv/chacha-riscv64-zvkb.o +libchacha-$(CONFIG_S390) += s390/chacha-s390.o +libchacha-$(CONFIG_X86) += x86/chacha-ssse3-x86_64.o \ + x86/chacha-avx2-x86_64.o \ + x86/chacha-avx512vl-x86_64.o +endif # CONFIG_CRYPTO_LIB_CHACHA_ARCH + +################################################################################ + obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o libchacha20poly1305-$(CONFIG_CRYPTO_SELFTESTS) += chacha20poly1305-selftest.o @@ -231,11 +259,6 @@ obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o obj-$(CONFIG_ARM) += arm/ -obj-$(CONFIG_ARM64) += arm64/ -obj-$(CONFIG_MIPS) += mips/ -obj-$(CONFIG_PPC) += powerpc/ -obj-$(CONFIG_RISCV) += riscv/ -obj-$(CONFIG_S390) += s390/ obj-$(CONFIG_X86) += x86/ # clean-files must be defined unconditionally diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index 0d821e282c64..740341aa35d2 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -12,8 +12,3 @@ config CRYPTO_BLAKE2S_ARM BLAKE2b, but slower than the NEON implementation of BLAKE2b. There is no NEON implementation of BLAKE2s, since NEON doesn't really help with it. - -config CRYPTO_CHACHA20_NEON - tristate - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile index 9f70e61d419e..0574b0e9739e 100644 --- a/lib/crypto/arm/Makefile +++ b/lib/crypto/arm/Makefile @@ -2,7 +2,3 @@ obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o libblake2s-arm-y := blake2s-core.o blake2s-glue.o - -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o -chacha-neon-y := chacha-scalar-core.o chacha-glue.o -chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o diff --git a/lib/crypto/arm/chacha-glue.c b/lib/crypto/arm/chacha-glue.c deleted file mode 100644 index 67ba045cae35..000000000000 --- a/lib/crypto/arm/chacha-glue.c +++ /dev/null @@ -1,131 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha and HChaCha functions (ARM optimized) - * - * Copyright (C) 2016-2019 Linaro, Ltd. - * Copyright (C) 2015 Martin Willi - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, int nrounds); -asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, - int nrounds, unsigned int nbytes); -asmlinkage void hchacha_block_arm(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); -asmlinkage void hchacha_block_neon(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); - -asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, - const struct chacha_state *state, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); - -static inline bool neon_usable(void) -{ - return static_branch_likely(&use_neon) && crypto_simd_usable(); -} - -static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 buf[CHACHA_BLOCK_SIZE]; - - while (bytes > CHACHA_BLOCK_SIZE) { - unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); - - chacha_4block_xor_neon(state, dst, src, nrounds, l); - bytes -= l; - src += l; - dst += l; - state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } - if (bytes) { - const u8 *s = src; - u8 *d = dst; - - if (bytes != CHACHA_BLOCK_SIZE) - s = d = memcpy(buf, src, bytes); - chacha_block_xor_neon(state, d, s, nrounds); - if (d != dst) - memcpy(dst, buf, bytes); - state->x[12]++; - } -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { - hchacha_block_arm(state, out, nrounds); - } else { - kernel_neon_begin(); - hchacha_block_neon(state, out, nrounds); - kernel_neon_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || - bytes <= CHACHA_BLOCK_SIZE) { - chacha_doarm(dst, src, bytes, state, nrounds); - state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); - return; - } - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_arm_mod_init(void) -{ - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { - switch (read_cpuid_part()) { - case ARM_CPU_PART_CORTEX_A7: - case ARM_CPU_PART_CORTEX_A5: - /* - * The Cortex-A7 and Cortex-A5 do not perform well with - * the NEON implementation but do incredibly with the - * scalar one and use less power. - */ - break; - default: - static_branch_enable(&use_neon); - } - } - return 0; -} -subsys_initcall(chacha_arm_mod_init); - -static void __exit chacha_arm_mod_exit(void) -{ -} -module_exit(chacha_arm_mod_exit); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm/chacha.h b/lib/crypto/arm/chacha.h new file mode 100644 index 000000000000..0cae30f8ee5d --- /dev/null +++ b/lib/crypto/arm/chacha.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ChaCha and HChaCha functions (ARM optimized) + * + * Copyright (C) 2016-2019 Linaro, Ltd. + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include + +#include +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, unsigned int nbytes); +asmlinkage void hchacha_block_arm(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const struct chacha_state *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } + if (bytes) { + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); + state->x[12]++; + } +} + +static void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + break; + default: + static_branch_enable(&use_neon); + } + } +} diff --git a/lib/crypto/arm64/Kconfig b/lib/crypto/arm64/Kconfig deleted file mode 100644 index 07c8a4f0ab03..000000000000 --- a/lib/crypto/arm64/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA20_NEON - tristate - depends on KERNEL_MODE_NEON - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/arm64/Makefile b/lib/crypto/arm64/Makefile deleted file mode 100644 index d49cceca3d1c..000000000000 --- a/lib/crypto/arm64/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o diff --git a/lib/crypto/arm64/chacha-neon-glue.c b/lib/crypto/arm64/chacha-neon-glue.c deleted file mode 100644 index 48097aa34af7..000000000000 --- a/lib/crypto/arm64/chacha-neon-glue.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * ChaCha and HChaCha functions (ARM64 optimized) - * - * Copyright (C) 2016 - 2017 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, int nrounds); -asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, - int nrounds, int bytes); -asmlinkage void hchacha_block_neon(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, - int bytes, int nrounds) -{ - while (bytes > 0) { - int l = min(bytes, CHACHA_BLOCK_SIZE * 5); - - if (l <= CHACHA_BLOCK_SIZE) { - u8 buf[CHACHA_BLOCK_SIZE]; - - memcpy(buf, src, l); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, l); - state->x[12] += 1; - break; - } - chacha_4block_xor_neon(state, dst, src, nrounds, l); - bytes -= l; - src += l; - dst += l; - state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { - hchacha_block_generic(state, out, nrounds); - } else { - kernel_neon_begin(); - hchacha_block_neon(state, out, nrounds); - kernel_neon_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_simd_mod_init(void) -{ - if (cpu_have_named_feature(ASIMD)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(chacha_simd_mod_init); - -static void __exit chacha_simd_mod_exit(void) -{ -} -module_exit(chacha_simd_mod_exit); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM64 optimized)"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm64/chacha.h b/lib/crypto/arm64/chacha.h new file mode 100644 index 000000000000..ba6c22d46086 --- /dev/null +++ b/lib/crypto/arm64/chacha.h @@ -0,0 +1,99 @@ +/* + * ChaCha and HChaCha functions (ARM64 optimized) + * + * Copyright (C) 2016 - 2017 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, int bytes); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + int bytes, int nrounds) +{ + while (bytes > 0) { + int l = min(bytes, CHACHA_BLOCK_SIZE * 5); + + if (l <= CHACHA_BLOCK_SIZE) { + u8 buf[CHACHA_BLOCK_SIZE]; + + memcpy(buf, src, l); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, l); + state->x[12] += 1; + break; + } + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } +} + +static void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { + hchacha_block_generic(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c index 26862ad90a96..e0c7cb4af318 100644 --- a/lib/crypto/chacha.c +++ b/lib/crypto/chacha.c @@ -11,8 +11,9 @@ #include #include -void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) +static void __maybe_unused +chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) { /* aligned to potentially speed up crypto_xor() */ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); @@ -29,7 +30,41 @@ void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, crypto_xor_cpy(dst, src, stream, bytes); } } -EXPORT_SYMBOL(chacha_crypt_generic); + +#ifdef CONFIG_CRYPTO_LIB_CHACHA_ARCH +#include "chacha.h" /* $(SRCARCH)/chacha.h */ +#else +#define chacha_crypt_arch chacha_crypt_generic +#define hchacha_block_arch hchacha_block_generic +#endif + +void chacha_crypt(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + chacha_crypt_arch(state, dst, src, bytes, nrounds); +} +EXPORT_SYMBOL_GPL(chacha_crypt); + +void hchacha_block(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + hchacha_block_arch(state, out, nrounds); +} +EXPORT_SYMBOL_GPL(hchacha_block); + +#ifdef chacha_mod_init_arch +static int __init chacha_mod_init(void) +{ + chacha_mod_init_arch(); + return 0; +} +subsys_initcall(chacha_mod_init); + +static void __exit chacha_mod_exit(void) +{ +} +module_exit(chacha_mod_exit); +#endif MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)"); MODULE_LICENSE("GPL"); diff --git a/lib/crypto/mips/Kconfig b/lib/crypto/mips/Kconfig deleted file mode 100644 index 94c1a0892c20..000000000000 --- a/lib/crypto/mips/Kconfig +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_MIPS - tristate - depends on CPU_MIPS32_R2 - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/mips/Makefile b/lib/crypto/mips/Makefile deleted file mode 100644 index b5ea0e25c21e..000000000000 --- a/lib/crypto/mips/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o -chacha-mips-y := chacha-core.o chacha-glue.o -AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots diff --git a/lib/crypto/mips/chacha-glue.c b/lib/crypto/mips/chacha-glue.c deleted file mode 100644 index f8390af21dc9..000000000000 --- a/lib/crypto/mips/chacha-glue.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha and HChaCha functions (MIPS optimized) - * - * Copyright (C) 2019 Linaro, Ltd. - */ - -#include -#include -#include - -asmlinkage void chacha_crypt_arch(struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int bytes, int nrounds); -EXPORT_SYMBOL(chacha_crypt_arch); - -asmlinkage void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); -EXPORT_SYMBOL(hchacha_block_arch); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (MIPS optimized)"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/mips/chacha.h b/lib/crypto/mips/chacha.h new file mode 100644 index 000000000000..0c18c0dc2a40 --- /dev/null +++ b/lib/crypto/mips/chacha.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ChaCha and HChaCha functions (MIPS optimized) + * + * Copyright (C) 2019 Linaro, Ltd. + */ + +#include + +asmlinkage void chacha_crypt_arch(struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int bytes, int nrounds); +asmlinkage void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); diff --git a/lib/crypto/powerpc/Kconfig b/lib/crypto/powerpc/Kconfig deleted file mode 100644 index e41012a61876..000000000000 --- a/lib/crypto/powerpc/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA20_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/powerpc/Makefile b/lib/crypto/powerpc/Makefile deleted file mode 100644 index 778a04edd226..000000000000 --- a/lib/crypto/powerpc/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o -chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o diff --git a/lib/crypto/powerpc/chacha-p10-glue.c b/lib/crypto/powerpc/chacha-p10-glue.c deleted file mode 100644 index 5d3d5506d7f9..000000000000 --- a/lib/crypto/powerpc/chacha-p10-glue.c +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * ChaCha stream cipher (P10 accelerated) - * - * Copyright 2023- IBM Corp. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void chacha_p10le_8x(const struct chacha_state *state, u8 *dst, - const u8 *src, unsigned int len, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); - -static void vsx_begin(void) -{ - preempt_disable(); - enable_kernel_vsx(); -} - -static void vsx_end(void) -{ - disable_kernel_vsx(); - preempt_enable(); -} - -static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - unsigned int l = bytes & ~0x0FF; - - if (l > 0) { - chacha_p10le_8x(state, dst, src, l, nrounds); - bytes -= l; - src += l; - dst += l; - state->x[12] += l / CHACHA_BLOCK_SIZE; - } - - if (bytes > 0) - chacha_crypt_generic(state, dst, src, bytes, nrounds); -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - vsx_begin(); - chacha_p10_do_8x(state, dst, src, todo, nrounds); - vsx_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_p10_init(void) -{ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - static_branch_enable(&have_p10); - return 0; -} -subsys_initcall(chacha_p10_init); - -static void __exit chacha_p10_exit(void) -{ -} -module_exit(chacha_p10_exit); - -MODULE_DESCRIPTION("ChaCha stream cipher (P10 accelerated)"); -MODULE_AUTHOR("Danny Tsen "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/chacha.h b/lib/crypto/powerpc/chacha.h new file mode 100644 index 000000000000..1df6e1ce31c4 --- /dev/null +++ b/lib/crypto/powerpc/chacha.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ChaCha stream cipher (P10 accelerated) + * + * Copyright 2023- IBM Corp. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void chacha_p10le_8x(const struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); + +static void vsx_begin(void) +{ + preempt_disable(); + enable_kernel_vsx(); +} + +static void vsx_end(void) +{ + disable_kernel_vsx(); + preempt_enable(); +} + +static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + unsigned int l = bytes & ~0x0FF; + + if (l > 0) { + chacha_p10le_8x(state, dst, src, l, nrounds); + bytes -= l; + src += l; + dst += l; + state->x[12] += l / CHACHA_BLOCK_SIZE; + } + + if (bytes > 0) + chacha_crypt_generic(state, dst, src, bytes, nrounds); +} + +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + vsx_begin(); + chacha_p10_do_8x(state, dst, src, todo, nrounds); + vsx_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + static_branch_enable(&have_p10); +} diff --git a/lib/crypto/riscv/Kconfig b/lib/crypto/riscv/Kconfig deleted file mode 100644 index bc7a43f33eb3..000000000000 --- a/lib/crypto/riscv/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_RISCV64 - tristate - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC diff --git a/lib/crypto/riscv/Makefile b/lib/crypto/riscv/Makefile deleted file mode 100644 index e27b78f317fc..000000000000 --- a/lib/crypto/riscv/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o -chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o diff --git a/lib/crypto/riscv/chacha-riscv64-glue.c b/lib/crypto/riscv/chacha-riscv64-glue.c deleted file mode 100644 index a15f0aca3fc4..000000000000 --- a/lib/crypto/riscv/chacha-riscv64-glue.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ChaCha stream cipher (RISC-V optimized) - * - * Copyright (C) 2023 SiFive, Inc. - * Author: Jerry Shih - */ - -#include -#include -#include -#include -#include -#include - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb); - -asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out, - size_t nblocks, int nrounds); - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 block_buffer[CHACHA_BLOCK_SIZE]; - unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE; - unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE; - - if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - kernel_vector_begin(); - if (full_blocks) { - chacha_zvkb(state, src, dst, full_blocks, nrounds); - src += full_blocks * CHACHA_BLOCK_SIZE; - dst += full_blocks * CHACHA_BLOCK_SIZE; - } - if (tail_bytes) { - memcpy(block_buffer, src, tail_bytes); - chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds); - memcpy(dst, block_buffer, tail_bytes); - } - kernel_vector_end(); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init riscv64_chacha_mod_init(void) -{ - if (riscv_isa_extension_available(NULL, ZVKB) && - riscv_vector_vlen() >= 128) - static_branch_enable(&use_zvkb); - return 0; -} -subsys_initcall(riscv64_chacha_mod_init); - -static void __exit riscv64_chacha_mod_exit(void) -{ -} -module_exit(riscv64_chacha_mod_exit); - -MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)"); -MODULE_AUTHOR("Jerry Shih "); -MODULE_LICENSE("GPL"); diff --git a/lib/crypto/riscv/chacha.h b/lib/crypto/riscv/chacha.h new file mode 100644 index 000000000000..5c000c6aef4b --- /dev/null +++ b/lib/crypto/riscv/chacha.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * ChaCha stream cipher (RISC-V optimized) + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih + */ + +#include +#include +#include +#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb); + +asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out, + size_t nblocks, int nrounds); + +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + u8 block_buffer[CHACHA_BLOCK_SIZE]; + unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE; + unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE; + + if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + kernel_vector_begin(); + if (full_blocks) { + chacha_zvkb(state, src, dst, full_blocks, nrounds); + src += full_blocks * CHACHA_BLOCK_SIZE; + dst += full_blocks * CHACHA_BLOCK_SIZE; + } + if (tail_bytes) { + memcpy(block_buffer, src, tail_bytes); + chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds); + memcpy(dst, block_buffer, tail_bytes); + } + kernel_vector_end(); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + static_branch_enable(&use_zvkb); +} diff --git a/lib/crypto/s390/Kconfig b/lib/crypto/s390/Kconfig deleted file mode 100644 index 069b355fe51a..000000000000 --- a/lib/crypto/s390/Kconfig +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_S390 - tristate - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/s390/Makefile b/lib/crypto/s390/Makefile deleted file mode 100644 index 06c2cf77178e..000000000000 --- a/lib/crypto/s390/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o -chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/lib/crypto/s390/chacha-glue.c b/lib/crypto/s390/chacha-glue.c deleted file mode 100644 index d8137387fe28..000000000000 --- a/lib/crypto/s390/chacha-glue.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha stream cipher (s390 optimized) - * - * Copyright IBM Corp. 2021 - */ - -#define KMSG_COMPONENT "chacha_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include "chacha-s390.h" - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - /* TODO: implement hchacha_block_arch() in assembly */ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* s390 chacha20 implementation has 20 rounds hard-coded, - * it cannot handle a block of data or less, but otherwise - * it can handle data of arbitrary size - */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { - chacha_crypt_generic(state, dst, src, bytes, nrounds); - } else { - DECLARE_KERNEL_FPU_ONSTACK32(vxstate); - - kernel_fpu_begin(&vxstate, KERNEL_VXR); - chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); - kernel_fpu_end(&vxstate, KERNEL_VXR); - - state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / - CHACHA_BLOCK_SIZE; - } -} -EXPORT_SYMBOL(chacha_crypt_arch); - -MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/s390/chacha.h b/lib/crypto/s390/chacha.h new file mode 100644 index 000000000000..fd9c4a422365 --- /dev/null +++ b/lib/crypto/s390/chacha.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ChaCha stream cipher (s390 optimized) + * + * Copyright IBM Corp. 2021 + */ + +#include +#include +#include +#include +#include +#include "chacha-s390.h" + +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + /* s390 chacha20 implementation has 20 rounds hard-coded, + * it cannot handle a block of data or less, but otherwise + * it can handle data of arbitrary size + */ + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { + chacha_crypt_generic(state, dst, src, bytes, nrounds); + } else { + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); + kernel_fpu_end(&vxstate, KERNEL_VXR); + + state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / + CHACHA_BLOCK_SIZE; + } +} diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index 24dc9a59b272..eb47da71aa6b 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -11,10 +11,3 @@ config CRYPTO_BLAKE2S_X86 Architecture: x86_64 using: - SSSE3 (Supplemental SSE3) - AVX-512 (Advanced Vector Extensions-512) - -config CRYPTO_CHACHA20_X86_64 - tristate - depends on 64BIT - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile index 16c9d76f9947..4454556b243e 100644 --- a/lib/crypto/x86/Makefile +++ b/lib/crypto/x86/Makefile @@ -2,6 +2,3 @@ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o - -obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o -chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x86_64.o chacha_glue.o diff --git a/lib/crypto/x86/chacha.h b/lib/crypto/x86/chacha.h new file mode 100644 index 000000000000..10cf8f1c569d --- /dev/null +++ b/lib/crypto/x86/chacha.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ChaCha and HChaCha functions (x86_64 optimized) + * + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include +#include + +asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void hchacha_block_ssse3(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); + +static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) +{ + len = min(len, maxblocks * CHACHA_BLOCK_SIZE); + return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; +} + +static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + if (static_branch_likely(&chacha_use_avx512vl)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state->x[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state->x[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state->x[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_2block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state->x[12] += chacha_advance(bytes, 2); + return; + } + } + + if (static_branch_likely(&chacha_use_avx2)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state->x[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 4); + return; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 2); + return; + } + } + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state->x[12] += 4; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); + state->x[12]++; + } +} + +static void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!static_branch_likely(&chacha_use_simd)) { + hchacha_block_generic(state, out, nrounds); + } else { + kernel_fpu_begin(); + hchacha_block_ssse3(state, out, nrounds); + kernel_fpu_end(); + } +} + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&chacha_use_simd) || + bytes <= CHACHA_BLOCK_SIZE) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_fpu_begin(); + chacha_dosimd(state, dst, src, todo, nrounds); + kernel_fpu_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (!boot_cpu_has(X86_FEATURE_SSSE3)) + return; + + static_branch_enable(&chacha_use_simd); + + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + static_branch_enable(&chacha_use_avx2); + + if (boot_cpu_has(X86_FEATURE_AVX512VL) && + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ + static_branch_enable(&chacha_use_avx512vl); + } +} diff --git a/lib/crypto/x86/chacha_glue.c b/lib/crypto/x86/chacha_glue.c deleted file mode 100644 index de7da9d512af..000000000000 --- a/lib/crypto/x86/chacha_glue.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * ChaCha and HChaCha functions (x86_64 optimized) - * - * Copyright (C) 2015 Martin Willi - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void hchacha_block_ssse3(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); - -asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); - -asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); - -static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) -{ - len = min(len, maxblocks * CHACHA_BLOCK_SIZE); - return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; -} - -static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (static_branch_likely(&chacha_use_avx512vl)) { - while (bytes >= CHACHA_BLOCK_SIZE * 8) { - chacha_8block_xor_avx512vl(state, dst, src, bytes, - nrounds); - bytes -= CHACHA_BLOCK_SIZE * 8; - src += CHACHA_BLOCK_SIZE * 8; - dst += CHACHA_BLOCK_SIZE * 8; - state->x[12] += 8; - } - if (bytes > CHACHA_BLOCK_SIZE * 4) { - chacha_8block_xor_avx512vl(state, dst, src, bytes, - nrounds); - state->x[12] += chacha_advance(bytes, 8); - return; - } - if (bytes > CHACHA_BLOCK_SIZE * 2) { - chacha_4block_xor_avx512vl(state, dst, src, bytes, - nrounds); - state->x[12] += chacha_advance(bytes, 4); - return; - } - if (bytes) { - chacha_2block_xor_avx512vl(state, dst, src, bytes, - nrounds); - state->x[12] += chacha_advance(bytes, 2); - return; - } - } - - if (static_branch_likely(&chacha_use_avx2)) { - while (bytes >= CHACHA_BLOCK_SIZE * 8) { - chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 8; - src += CHACHA_BLOCK_SIZE * 8; - dst += CHACHA_BLOCK_SIZE * 8; - state->x[12] += 8; - } - if (bytes > CHACHA_BLOCK_SIZE * 4) { - chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 8); - return; - } - if (bytes > CHACHA_BLOCK_SIZE * 2) { - chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 4); - return; - } - if (bytes > CHACHA_BLOCK_SIZE) { - chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 2); - return; - } - } - - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state->x[12] += 4; - } - if (bytes > CHACHA_BLOCK_SIZE) { - chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 4); - return; - } - if (bytes) { - chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); - state->x[12]++; - } -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (!static_branch_likely(&chacha_use_simd)) { - hchacha_block_generic(state, out, nrounds); - } else { - kernel_fpu_begin(); - hchacha_block_ssse3(state, out, nrounds); - kernel_fpu_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!static_branch_likely(&chacha_use_simd) || - bytes <= CHACHA_BLOCK_SIZE) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_fpu_begin(); - chacha_dosimd(state, dst, src, todo, nrounds); - kernel_fpu_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_simd_mod_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return 0; - - static_branch_enable(&chacha_use_simd); - - if (boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - static_branch_enable(&chacha_use_avx2); - - if (boot_cpu_has(X86_FEATURE_AVX512VL) && - boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ - static_branch_enable(&chacha_use_avx512vl); - } - return 0; -} -subsys_initcall(chacha_simd_mod_init); - -static void __exit chacha_simd_mod_exit(void) -{ -} -module_exit(chacha_simd_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("ChaCha and HChaCha functions (x86_64 optimized)"); -- cgit v1.2.3 From 453eda46b7f807f6fc4283f9639085697100ec08 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:26 -0700 Subject: lib/crypto: x86/blake2s: Reduce size of BLAKE2S_SIGMA2 Save 480 bytes of .rodata by replacing the .long constants with .bytes, and using the vpmovzxbd instruction to expand them. Also update the code to do the loads before incrementing %rax rather than after. This avoids the need for the first load to use an offset. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/x86/blake2s-core.S | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/crypto/x86/blake2s-core.S b/lib/crypto/x86/blake2s-core.S index ac1c845445a4..ef8e9f427aab 100644 --- a/lib/crypto/x86/blake2s-core.S +++ b/lib/crypto/x86/blake2s-core.S @@ -29,19 +29,19 @@ SIGMA: .byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 .byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 .byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 -.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 160 .align 64 SIGMA2: -.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 -.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 -.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 -.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 -.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 -.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 -.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 -.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 -.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 -.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.byte 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 +.byte 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 +.byte 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 +.byte 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 +.byte 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 +.byte 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 +.byte 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 +.byte 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 +.byte 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 .text SYM_FUNC_START(blake2s_compress_ssse3) @@ -193,9 +193,9 @@ SYM_FUNC_START(blake2s_compress_avx512) leaq SIGMA2(%rip),%rax movb $0xa,%cl .Lblake2s_compress_avx512_roundloop: - addq $0x40,%rax - vmovdqa -0x40(%rax),%ymm8 - vmovdqa -0x20(%rax),%ymm9 + vpmovzxbd (%rax),%ymm8 + vpmovzxbd 0x8(%rax),%ymm9 + addq $0x10,%rax vpermi2d %ymm7,%ymm6,%ymm8 vpermi2d %ymm7,%ymm6,%ymm9 vmovdqa %ymm8,%ymm6 -- cgit v1.2.3 From 126f5d90f6c855b39eebec17f93c2f9d2ce01ebb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:27 -0700 Subject: lib/crypto: blake2s: Remove obsolete self-test Remove the original BLAKE2s self-test, since it will be superseded by blake2s_kunit. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 - lib/crypto/blake2s-selftest.c | 651 ------------------------------------------ lib/crypto/blake2s.c | 10 - 3 files changed, 662 deletions(-) delete mode 100644 lib/crypto/blake2s-selftest.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index baafd58b5dfd..b2d2745879d1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o obj-y += libblake2s.o libblake2s-y := blake2s.o libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o -libblake2s-$(CONFIG_CRYPTO_SELFTESTS) += blake2s-selftest.o ################################################################################ diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c deleted file mode 100644 index d0634ed6a937..000000000000 --- a/lib/crypto/blake2s-selftest.c +++ /dev/null @@ -1,651 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include -#include -#include -#include - -/* - * blake2s_testvecs[] generated with the program below (using libb2-dev and - * libssl-dev [OpenSSL]) - * - * #include - * #include - * #include - * - * #include - * - * #define BLAKE2S_TESTVEC_COUNT 256 - * - * static void print_vec(const uint8_t vec[], int len) - * { - * int i; - * - * printf(" { "); - * for (i = 0; i < len; i++) { - * if (i && (i % 12) == 0) - * printf("\n "); - * printf("0x%02x, ", vec[i]); - * } - * printf("},\n"); - * } - * - * int main(void) - * { - * uint8_t key[BLAKE2S_KEYBYTES]; - * uint8_t buf[BLAKE2S_TESTVEC_COUNT]; - * uint8_t hash[BLAKE2S_OUTBYTES]; - * int i, j; - * - * key[0] = key[1] = 1; - * for (i = 2; i < BLAKE2S_KEYBYTES; ++i) - * key[i] = key[i - 2] + key[i - 1]; - * - * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) - * buf[i] = (uint8_t)i; - * - * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); - * - * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) { - * int outlen = 1 + i % BLAKE2S_OUTBYTES; - * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1); - * - * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i, - * keylen); - * print_vec(hash, outlen); - * } - * printf("};\n\n"); - * - * return 0; - *} - */ -static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { - { 0xa1, }, - { 0x7c, 0x89, }, - { 0x74, 0x0e, 0xd4, }, - { 0x47, 0x0c, 0x21, 0x15, }, - { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, }, - { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, }, - { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, }, - { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, }, - { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, }, - { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, }, - { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, }, - { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, }, - { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda, - 0xb7, }, - { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25, - 0x52, 0x3e, }, - { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc, - 0x57, 0xe2, 0x27, }, - { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6, - 0x47, 0x2a, 0xc7, 0xf5, }, - { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43, - 0xe7, 0x72, 0x08, 0xec, 0xbe, }, - { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96, - 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, }, - { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e, - 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, }, - { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d, - 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, }, - { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86, - 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, }, - { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41, - 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, }, - { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22, - 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, }, - { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00, - 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, }, - { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9, - 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13, - 0xd1, }, - { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05, - 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07, - 0x01, 0x3e, }, - { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74, - 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b, - 0xec, 0x13, 0xed, }, - { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7, - 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37, - 0x72, 0x67, 0x09, 0xfc, }, - { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38, - 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c, - 0x95, 0x29, 0x19, 0xf2, 0x4b, }, - { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22, - 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30, - 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, }, - { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e, - 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd, - 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, }, - { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe, - 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61, - 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, }, - { 0x0a, }, - { 0x6e, 0xd4, }, - { 0x64, 0xe9, 0xd1, }, - { 0x30, 0xdd, 0x71, 0xef, }, - { 0x11, 0xb5, 0x0c, 0x87, 0xc9, }, - { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, }, - { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, }, - { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, }, - { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, }, - { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, }, - { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, }, - { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, }, - { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf, - 0xe2, }, - { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64, - 0x7e, 0xb0, }, - { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51, - 0x98, 0x0a, 0x8d, }, - { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04, - 0xf1, 0xc3, 0x94, 0xd8, }, - { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7, - 0x2c, 0xe8, 0x8f, 0xc7, 0x34, }, - { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41, - 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, }, - { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81, - 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, }, - { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4, - 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, }, - { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a, - 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, }, - { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb, - 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, }, - { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9, - 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, }, - { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9, - 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, }, - { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e, - 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42, - 0xe3, }, - { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9, - 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e, - 0xe3, 0x90, }, - { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3, - 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10, - 0x58, 0x06, 0x9a, }, - { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5, - 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d, - 0x53, 0x03, 0x1a, 0x39, }, - { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0, - 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5, - 0xd4, 0x36, 0xb1, 0xac, 0x73, }, - { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59, - 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90, - 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, }, - { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28, - 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75, - 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, }, - { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6, - 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77, - 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, }, - { 0x9d, }, - { 0x9d, 0x7d, }, - { 0xfd, 0xc3, 0xda, }, - { 0xe8, 0x82, 0xcd, 0x21, }, - { 0xc3, 0x1d, 0x42, 0x4c, 0x74, }, - { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, }, - { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, }, - { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, }, - { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, }, - { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, }, - { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, }, - { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, }, - { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3, - 0x63, }, - { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f, - 0xe6, 0xa9, }, - { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a, - 0xf6, 0x0e, 0x20, }, - { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39, - 0x18, 0x3e, 0xc7, 0xc3, }, - { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c, - 0x92, 0xfc, 0x7f, 0x34, 0x41, }, - { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb, - 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, }, - { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96, - 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, }, - { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6, - 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, }, - { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba, - 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, }, - { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0, - 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, }, - { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d, - 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, }, - { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59, - 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, }, - { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89, - 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e, - 0xaf, }, - { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc, - 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7, - 0xb1, 0x1d, }, - { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9, - 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf, - 0xee, 0x6a, 0xbe, }, - { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8, - 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd, - 0x5c, 0xef, 0xa3, 0x25, }, - { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6, - 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8, - 0x4b, 0x0e, 0xe3, 0x94, 0x9f, }, - { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc, - 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf, - 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, }, - { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76, - 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46, - 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, }, - { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02, - 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3, - 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, }, - { 0x02, }, - { 0x52, 0xa8, }, - { 0x38, 0x25, 0x0d, }, - { 0xe3, 0x04, 0xd4, 0x92, }, - { 0x97, 0xdb, 0xf7, 0x81, 0xca, }, - { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, }, - { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, }, - { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, }, - { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, }, - { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, }, - { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, }, - { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, }, - { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa, - 0xd3, }, - { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c, - 0xb7, 0x9a, }, - { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31, - 0x3d, 0x47, 0x3d, }, - { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0, - 0x24, 0xf0, 0x17, 0xc0, }, - { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76, - 0xd2, 0xfd, 0x0c, 0x47, 0x40, }, - { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae, - 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, }, - { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee, - 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, }, - { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d, - 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, }, - { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61, - 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, }, - { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd, - 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, }, - { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5, - 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, }, - { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17, - 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, }, - { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c, - 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c, - 0xae, }, - { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5, - 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d, - 0x59, 0x00, }, - { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e, - 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c, - 0x5a, 0x2c, 0x3b, }, - { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda, - 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c, - 0x10, 0x07, 0x2a, 0xc4, }, - { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14, - 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf, - 0xee, 0xa1, 0x74, 0xd6, 0x71, }, - { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33, - 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff, - 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, }, - { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c, - 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44, - 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, }, - { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff, - 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23, - 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, }, - { 0xbe, }, - { 0x17, 0x6c, }, - { 0x1a, 0x42, 0x4f, }, - { 0xba, 0xaf, 0xb7, 0x65, }, - { 0xc2, 0x63, 0x43, 0x6a, 0xea, }, - { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, }, - { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, }, - { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, }, - { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, }, - { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, }, - { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, }, - { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, }, - { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92, - 0xe9, }, - { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d, - 0xc4, 0xb3, }, - { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60, - 0xd7, 0xd3, 0x5e, }, - { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16, - 0xaf, 0x39, 0xbd, 0x92, }, - { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10, - 0xd9, 0xa7, 0x66, 0x27, 0xcf, }, - { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02, - 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, }, - { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd, - 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, }, - { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3, - 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, }, - { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f, - 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, }, - { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51, - 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, }, - { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3, - 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, }, - { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda, - 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, }, - { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6, - 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a, - 0x26, }, - { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24, - 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35, - 0xf4, 0x1d, }, - { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9, - 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46, - 0x4e, 0x29, 0x26, }, - { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09, - 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98, - 0x1a, 0x5b, 0xff, 0xc9, }, - { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2, - 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69, - 0xbf, 0xf6, 0xbe, 0x3c, 0x40, }, - { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31, - 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20, - 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, }, - { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4, - 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30, - 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, }, - { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0, - 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5, - 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, }, - { 0x1f, }, - { 0x82, 0x60, }, - { 0xcc, 0xe3, 0x08, }, - { 0x56, 0x17, 0xe4, 0x59, }, - { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, }, - { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, }, - { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, }, - { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, }, - { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, }, - { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, }, - { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, }, - { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, }, - { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc, - 0x5b, }, - { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0, - 0x90, 0x48, }, - { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60, - 0x04, 0xd9, 0xd5, }, - { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47, - 0xe5, 0x31, 0x06, 0x70, }, - { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c, - 0x70, 0x25, 0xdb, 0x33, 0x27, }, - { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a, - 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, }, - { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3, - 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, }, - { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7, - 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, }, - { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac, - 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, }, - { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98, - 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, }, - { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11, - 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, }, - { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a, - 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, }, - { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41, - 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70, - 0x88, }, - { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4, - 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3, - 0xc6, 0xbb, }, - { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55, - 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5, - 0x55, 0xfd, 0x4f, }, - { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e, - 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2, - 0x42, 0x64, 0x3b, 0x1a, }, - { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95, - 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5, - 0x1d, 0x60, 0x8f, 0x8a, 0x1d, }, - { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4, - 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d, - 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, }, - { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b, - 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3, - 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, }, - { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf, - 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f, - 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, }, - { 0x60, }, - { 0x24, 0x26, }, - { 0x47, 0xeb, 0xc9, }, - { 0x4a, 0xd0, 0xbc, 0xf0, }, - { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, }, - { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, }, - { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, }, - { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, }, - { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, }, - { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, }, - { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, }, - { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, }, - { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91, - 0x8d, }, - { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33, - 0xbf, 0xa0, }, - { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c, - 0xd5, 0x4b, 0xed, }, - { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44, - 0xc8, 0x24, 0x0a, 0xb7, }, - { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75, - 0xb2, 0x15, 0xd1, 0xb1, 0x10, }, - { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35, - 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, }, - { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9, - 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, }, - { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47, - 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, }, - { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35, - 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, }, - { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1, - 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, }, - { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21, - 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, }, - { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4, - 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, }, - { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7, - 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7, - 0xd3, }, - { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb, - 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16, - 0xa6, 0xd6, }, - { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80, - 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88, - 0x55, 0xd1, 0xa9, }, - { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54, - 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46, - 0xef, 0xb0, 0x00, 0x8d, }, - { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8, - 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94, - 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, }, - { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4, - 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67, - 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, }, - { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02, - 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04, - 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, }, - { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28, - 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca, - 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, }, - { 0x7e, }, - { 0x1e, 0x21, }, - { 0x26, 0xd3, 0xdd, }, - { 0x2c, 0xd4, 0xb3, 0x3d, }, - { 0x86, 0x7b, 0x76, 0x3c, 0xf0, }, - { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, }, - { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, }, - { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, }, - { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, }, - { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, }, - { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, }, - { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, }, - { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77, - 0x66, }, - { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31, - 0x55, 0x66, }, - { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12, - 0x43, 0xbf, 0xa1, }, - { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e, - 0x95, 0x8a, 0xc5, 0xed, }, - { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef, - 0xf6, 0x2b, 0x3a, 0xba, 0x60, }, - { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2, - 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, }, - { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b, - 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, }, - { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94, - 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, }, - { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b, - 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, }, - { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf, - 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, }, - { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8, - 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, }, - { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea, - 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, }, - { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2, - 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0, - 0xd7, }, - { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b, - 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd, - 0xb6, 0xef, }, - { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6, - 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57, - 0xef, 0xf8, 0x53, }, - { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46, - 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89, - 0x89, 0xfd, 0xf7, 0x99, }, - { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03, - 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a, - 0xa9, 0x8f, 0x2b, 0x59, 0xfb, }, - { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb, - 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d, - 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, }, - { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0, - 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d, - 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, }, - { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c, - 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92, - 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, -}; - -static bool __init noinline_for_stack blake2s_digest_test(void) -{ - u8 key[BLAKE2S_KEY_SIZE]; - u8 buf[ARRAY_SIZE(blake2s_testvecs)]; - u8 hash[BLAKE2S_HASH_SIZE]; - struct blake2s_state state; - bool success = true; - int i, l; - - key[0] = key[1] = 1; - for (i = 2; i < sizeof(key); ++i) - key[i] = key[i - 2] + key[i - 1]; - - for (i = 0; i < sizeof(buf); ++i) - buf[i] = (u8)i; - - for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) { - int outlen = 1 + i % BLAKE2S_HASH_SIZE; - int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1); - - blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i, - keylen); - if (memcmp(hash, blake2s_testvecs[i], outlen)) { - pr_err("blake2s self-test %d: FAIL\n", i + 1); - success = false; - } - - if (!keylen) - blake2s_init(&state, outlen); - else - blake2s_init_key(&state, outlen, - key + BLAKE2S_KEY_SIZE - keylen, - keylen); - - blake2s_update(&state, buf, l); - blake2s_update(&state, buf + l, i - l); - blake2s_final(&state, hash); - if (memcmp(hash, blake2s_testvecs[i], outlen)) { - pr_err("blake2s init/update/final self-test %d: FAIL\n", - i + 1); - success = false; - } - } - - return success; -} - -static bool __init noinline_for_stack blake2s_random_test(void) -{ - struct blake2s_state state; - bool success = true; - int i, l; - - for (i = 0; i < 32; ++i) { - enum { TEST_ALIGNMENT = 16 }; - u8 blocks[BLAKE2S_BLOCK_SIZE * 2 + TEST_ALIGNMENT - 1] - __aligned(TEST_ALIGNMENT); - u8 *unaligned_block = blocks + BLAKE2S_BLOCK_SIZE; - struct blake2s_state state1, state2; - - get_random_bytes(blocks, sizeof(blocks)); - get_random_bytes(&state, sizeof(state)); - -#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \ - defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) - memcpy(&state1, &state, sizeof(state1)); - memcpy(&state2, &state, sizeof(state2)); - blake2s_compress(&state1, blocks, 2, BLAKE2S_BLOCK_SIZE); - blake2s_compress_generic(&state2, blocks, 2, BLAKE2S_BLOCK_SIZE); - if (memcmp(&state1, &state2, sizeof(state1))) { - pr_err("blake2s random compress self-test %d: FAIL\n", - i + 1); - success = false; - } -#endif - - memcpy(&state1, &state, sizeof(state1)); - blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE); - for (l = 1; l < TEST_ALIGNMENT; ++l) { - memcpy(unaligned_block + l, blocks, - BLAKE2S_BLOCK_SIZE); - memcpy(&state2, &state, sizeof(state2)); - blake2s_compress(&state2, unaligned_block + l, 1, - BLAKE2S_BLOCK_SIZE); - if (memcmp(&state1, &state2, sizeof(state1))) { - pr_err("blake2s random compress align %d self-test %d: FAIL\n", - l, i + 1); - success = false; - } - } - } - - return success; -} - -bool __init blake2s_selftest(void) -{ - bool success; - - success = blake2s_digest_test(); - success &= blake2s_random_test(); - - return success; -} diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index f6ec68c3dcda..51f2dd7a38a4 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -59,14 +58,5 @@ void blake2s_final(struct blake2s_state *state, u8 *out) } EXPORT_SYMBOL(blake2s_final); -static int __init blake2s_mod_init(void) -{ - if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) && - WARN_ON(!blake2s_selftest())) - return -ENODEV; - return 0; -} - -module_init(blake2s_mod_init); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld "); -- cgit v1.2.3 From 56e48d4e138cb105a17e0f8c257f3bc41b1bd69d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:28 -0700 Subject: lib/crypto: blake2s: Always enable arch-optimized BLAKE2s code When support for a crypto algorithm is enabled, the arch-optimized implementation of that algorithm should be enabled too. We've learned this the hard way many times over the years: people regularly forget to enable the arch-optimized implementations of the crypto algorithms, resulting in significant performance being left on the table. Currently, BLAKE2s support is always enabled ('obj-y'), since random.c uses it. Therefore, the arch-optimized BLAKE2s code, which exists for ARM and x86_64, should be always enabled too. Let's do that. Note that the effect on kernel image size is very small and should not be a concern. On ARM, enabling CRYPTO_BLAKE2S_ARM actually *shrinks* the kernel size by about 1200 bytes, since the ARM-optimized blake2s_compress() completely replaces the generic blake2s_compress(). On x86_64, enabling CRYPTO_BLAKE2S_X86 increases the kernel size by about 1400 bytes, as the generic blake2s_compress() is still included as a fallback; however, for context, that is only about a quarter the size of the generic blake2s_compress(). The x86_64 optimized BLAKE2s code uses much less icache at runtime than the generic code. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/Kconfig | 2 +- lib/crypto/x86/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index 740341aa35d2..a5607ad079c4 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config CRYPTO_BLAKE2S_ARM - bool "Hash functions: BLAKE2s" + def_bool y select CRYPTO_ARCH_HAVE_LIB_BLAKE2S help BLAKE2s cryptographic hash function (RFC 7693) diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index eb47da71aa6b..ffa718321369 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config CRYPTO_BLAKE2S_X86 - bool "Hash functions: BLAKE2s (SSSE3/AVX-512)" + def_bool y depends on 64BIT select CRYPTO_LIB_BLAKE2S_GENERIC select CRYPTO_ARCH_HAVE_LIB_BLAKE2S -- cgit v1.2.3 From 5d313a7625fabe9b92eaca7c5ce0e6d1019d279a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:29 -0700 Subject: lib/crypto: blake2s: Move generic code into blake2s.c Move blake2s_compress_generic() from blake2s-generic.c to blake2s.c. For now it's still guarded by CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC, but this prepares for changing it to a 'static __maybe_unused' function and just using the compiler to automatically decide its inclusion. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 - lib/crypto/blake2s-generic.c | 111 ------------------------------------------- lib/crypto/blake2s.c | 94 ++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 112 deletions(-) delete mode 100644 lib/crypto/blake2s-generic.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index b2d2745879d1..c9193f0604d9 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -32,7 +32,6 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o # blake2s is used by the /dev/random driver which is always builtin obj-y += libblake2s.o libblake2s-y := blake2s.o -libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o ################################################################################ diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c deleted file mode 100644 index 9828176a2efe..000000000000 --- a/lib/crypto/blake2s-generic.c +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - * - * This is an implementation of the BLAKE2s hash and PRF functions. - * - * Information: https://blake2.net/ - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static const u8 blake2s_sigma[10][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, -}; - -static inline void blake2s_increment_counter(struct blake2s_state *state, - const u32 inc) -{ - state->t[0] += inc; - state->t[1] += (state->t[0] < inc); -} - -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) - __weak __alias(blake2s_compress_generic); - -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) -{ - u32 m[16]; - u32 v[16]; - int i; - - WARN_ON(IS_ENABLED(DEBUG) && - (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); - - while (nblocks > 0) { - blake2s_increment_counter(state, inc); - memcpy(m, block, BLAKE2S_BLOCK_SIZE); - le32_to_cpu_array(m, ARRAY_SIZE(m)); - memcpy(v, state->h, 32); - v[ 8] = BLAKE2S_IV0; - v[ 9] = BLAKE2S_IV1; - v[10] = BLAKE2S_IV2; - v[11] = BLAKE2S_IV3; - v[12] = BLAKE2S_IV4 ^ state->t[0]; - v[13] = BLAKE2S_IV5 ^ state->t[1]; - v[14] = BLAKE2S_IV6 ^ state->f[0]; - v[15] = BLAKE2S_IV7 ^ state->f[1]; - -#define G(r, i, a, b, c, d) do { \ - a += b + m[blake2s_sigma[r][2 * i + 0]]; \ - d = ror32(d ^ a, 16); \ - c += d; \ - b = ror32(b ^ c, 12); \ - a += b + m[blake2s_sigma[r][2 * i + 1]]; \ - d = ror32(d ^ a, 8); \ - c += d; \ - b = ror32(b ^ c, 7); \ -} while (0) - -#define ROUND(r) do { \ - G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ - G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ - G(r, 2, v[2], v[ 6], v[10], v[14]); \ - G(r, 3, v[3], v[ 7], v[11], v[15]); \ - G(r, 4, v[0], v[ 5], v[10], v[15]); \ - G(r, 5, v[1], v[ 6], v[11], v[12]); \ - G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ - G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ -} while (0) - ROUND(0); - ROUND(1); - ROUND(2); - ROUND(3); - ROUND(4); - ROUND(5); - ROUND(6); - ROUND(7); - ROUND(8); - ROUND(9); - -#undef G -#undef ROUND - - for (i = 0; i < 8; ++i) - state->h[i] ^= v[i] ^ v[i + 8]; - - block += BLAKE2S_BLOCK_SIZE; - --nblocks; - } -} - -EXPORT_SYMBOL(blake2s_compress_generic); diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 51f2dd7a38a4..b5b75ade4658 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,6 +16,100 @@ #include #include +#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC +static const u8 blake2s_sigma[10][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, +}; + +static inline void blake2s_increment_counter(struct blake2s_state *state, + const u32 inc) +{ + state->t[0] += inc; + state->t[1] += (state->t[0] < inc); +} + +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) + __weak __alias(blake2s_compress_generic); + +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) +{ + u32 m[16]; + u32 v[16]; + int i; + + WARN_ON(IS_ENABLED(DEBUG) && + (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); + + while (nblocks > 0) { + blake2s_increment_counter(state, inc); + memcpy(m, block, BLAKE2S_BLOCK_SIZE); + le32_to_cpu_array(m, ARRAY_SIZE(m)); + memcpy(v, state->h, 32); + v[ 8] = BLAKE2S_IV0; + v[ 9] = BLAKE2S_IV1; + v[10] = BLAKE2S_IV2; + v[11] = BLAKE2S_IV3; + v[12] = BLAKE2S_IV4 ^ state->t[0]; + v[13] = BLAKE2S_IV5 ^ state->t[1]; + v[14] = BLAKE2S_IV6 ^ state->f[0]; + v[15] = BLAKE2S_IV7 ^ state->f[1]; + +#define G(r, i, a, b, c, d) do { \ + a += b + m[blake2s_sigma[r][2 * i + 0]]; \ + d = ror32(d ^ a, 16); \ + c += d; \ + b = ror32(b ^ c, 12); \ + a += b + m[blake2s_sigma[r][2 * i + 1]]; \ + d = ror32(d ^ a, 8); \ + c += d; \ + b = ror32(b ^ c, 7); \ +} while (0) + +#define ROUND(r) do { \ + G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ + G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ + G(r, 2, v[2], v[ 6], v[10], v[14]); \ + G(r, 3, v[3], v[ 7], v[11], v[15]); \ + G(r, 4, v[0], v[ 5], v[10], v[15]); \ + G(r, 5, v[1], v[ 6], v[11], v[12]); \ + G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ + G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ +} while (0) + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + +#undef G +#undef ROUND + + for (i = 0; i < 8; ++i) + state->h[i] ^= v[i] ^ v[i + 8]; + + block += BLAKE2S_BLOCK_SIZE; + --nblocks; + } +} +EXPORT_SYMBOL(blake2s_compress_generic); +#endif /* CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC */ + static inline void blake2s_set_lastblock(struct blake2s_state *state) { state->f[0] = -1; -- cgit v1.2.3 From 39ee3970f26d55b57343da392d45117d7f893205 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:30 -0700 Subject: lib/crypto: blake2s: Consolidate into single C translation unit As was done with the other algorithms, reorganize the BLAKE2s code so that the generic implementation and the arch-specific "glue" code is consolidated into a single translation unit, so that the compiler will inline the functions and automatically decide whether to include the generic code in the resulting binary or not. Similarly, also consolidate the build rules into lib/crypto/{Makefile,Kconfig}. This removes the last uses of lib/crypto/{arm,x86}/{Makefile,Kconfig}, so remove those too. Don't keep the !KMSAN dependency. It was needed only for other algorithms such as ChaCha that initialize memory from assembly code. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 29 ++++-------------- lib/crypto/Makefile | 13 ++++---- lib/crypto/arm/Kconfig | 14 --------- lib/crypto/arm/Makefile | 4 --- lib/crypto/arm/blake2s-core.S | 5 +++- lib/crypto/arm/blake2s-glue.c | 7 ----- lib/crypto/arm/blake2s.h | 5 ++++ lib/crypto/blake2s.c | 29 +++++++++++------- lib/crypto/x86/Kconfig | 13 -------- lib/crypto/x86/Makefile | 4 --- lib/crypto/x86/blake2s-glue.c | 70 ------------------------------------------- lib/crypto/x86/blake2s.h | 64 +++++++++++++++++++++++++++++++++++++++ 12 files changed, 106 insertions(+), 151 deletions(-) delete mode 100644 lib/crypto/arm/Kconfig delete mode 100644 lib/crypto/arm/Makefile delete mode 100644 lib/crypto/arm/blake2s-glue.c create mode 100644 lib/crypto/arm/blake2s.h delete mode 100644 lib/crypto/x86/Kconfig delete mode 100644 lib/crypto/x86/Makefile delete mode 100644 lib/crypto/x86/blake2s-glue.c create mode 100644 lib/crypto/x86/blake2s.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index c1db483bc230..37d85e0c9b97 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -28,21 +28,13 @@ config CRYPTO_LIB_ARC4 config CRYPTO_LIB_GF128MUL tristate -config CRYPTO_ARCH_HAVE_LIB_BLAKE2S - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Blake2s library interface, - either builtin or as a module. +# BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option. -config CRYPTO_LIB_BLAKE2S_GENERIC - def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - This symbol can be depended upon by arch implementations of the - Blake2s library interface that require the generic code as a - fallback, e.g., for SIMD implementations. If no arch specific - implementation is enabled, this implementation serves the users - of CRYPTO_LIB_BLAKE2S. +config CRYPTO_LIB_BLAKE2S_ARCH + bool + depends on !UML + default y if ARM + default y if X86_64 config CRYPTO_LIB_CHACHA tristate @@ -208,13 +200,4 @@ config CRYPTO_LIB_SM3 source "lib/crypto/tests/Kconfig" -if !KMSAN # avoid false positives from assembly -if ARM -source "lib/crypto/arm/Kconfig" -endif -if X86 -source "lib/crypto/x86/Kconfig" -endif -endif - endmenu diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index c9193f0604d9..ad27c5bf99e1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -29,9 +29,15 @@ libarc4-y := arc4.o obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o +################################################################################ + # blake2s is used by the /dev/random driver which is always builtin -obj-y += libblake2s.o -libblake2s-y := blake2s.o +obj-y += blake2s.o +ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2S_ARCH),y) +CFLAGS_blake2s.o += -I$(src)/$(SRCARCH) +obj-$(CONFIG_ARM) += arm/blake2s-core.o +obj-$(CONFIG_X86) += x86/blake2s-core.o +endif ################################################################################ @@ -256,9 +262,6 @@ obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o -obj-$(CONFIG_ARM) += arm/ -obj-$(CONFIG_X86) += x86/ - # clean-files must be defined unconditionally clean-files += arm/sha256-core.S arm/sha512-core.S clean-files += arm64/sha256-core.S arm64/sha512-core.S diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig deleted file mode 100644 index a5607ad079c4..000000000000 --- a/lib/crypto/arm/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_BLAKE2S_ARM - def_bool y - select CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - BLAKE2s cryptographic hash function (RFC 7693) - - Architecture: arm - - This is faster than the generic implementations of BLAKE2s and - BLAKE2b, but slower than the NEON implementation of BLAKE2b. - There is no NEON implementation of BLAKE2s, since NEON doesn't - really help with it. diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile deleted file mode 100644 index 0574b0e9739e..000000000000 --- a/lib/crypto/arm/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o -libblake2s-arm-y := blake2s-core.o blake2s-glue.o diff --git a/lib/crypto/arm/blake2s-core.S b/lib/crypto/arm/blake2s-core.S index df40e46601f1..293f44fa8f31 100644 --- a/lib/crypto/arm/blake2s-core.S +++ b/lib/crypto/arm/blake2s-core.S @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * BLAKE2s digest algorithm, ARM scalar implementation + * BLAKE2s digest algorithm, ARM scalar implementation. This is faster + * than the generic implementations of BLAKE2s and BLAKE2b, but slower + * than the NEON implementation of BLAKE2b. There is no NEON + * implementation of BLAKE2s, since NEON doesn't really help with it. * * Copyright 2020 Google LLC * diff --git a/lib/crypto/arm/blake2s-glue.c b/lib/crypto/arm/blake2s-glue.c deleted file mode 100644 index 0238a70d9581..000000000000 --- a/lib/crypto/arm/blake2s-glue.c +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include - -/* defined in blake2s-core.S */ -EXPORT_SYMBOL(blake2s_compress); diff --git a/lib/crypto/arm/blake2s.h b/lib/crypto/arm/blake2s.h new file mode 100644 index 000000000000..aa7a97139ea7 --- /dev/null +++ b/lib/crypto/arm/blake2s.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* defined in blake2s-core.S */ +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, u32 inc); diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index b5b75ade4658..5638ed9d882d 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -8,7 +8,7 @@ * */ -#include +#include #include #include #include @@ -16,7 +16,6 @@ #include #include -#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC static const u8 blake2s_sigma[10][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, @@ -37,12 +36,9 @@ static inline void blake2s_increment_counter(struct blake2s_state *state, state->t[1] += (state->t[0] < inc); } -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) - __weak __alias(blake2s_compress_generic); - -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) +static void __maybe_unused +blake2s_compress_generic(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) { u32 m[16]; u32 v[16]; @@ -107,8 +103,12 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, --nblocks; } } -EXPORT_SYMBOL(blake2s_compress_generic); -#endif /* CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC */ + +#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_ARCH +#include "blake2s.h" /* $(SRCARCH)/blake2s.h */ +#else +#define blake2s_compress blake2s_compress_generic +#endif static inline void blake2s_set_lastblock(struct blake2s_state *state) { @@ -152,5 +152,14 @@ void blake2s_final(struct blake2s_state *state, u8 *out) } EXPORT_SYMBOL(blake2s_final); +#ifdef blake2s_mod_init_arch +static int __init blake2s_mod_init(void) +{ + blake2s_mod_init_arch(); + return 0; +} +subsys_initcall(blake2s_mod_init); +#endif + MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig deleted file mode 100644 index ffa718321369..000000000000 --- a/lib/crypto/x86/Kconfig +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_BLAKE2S_X86 - def_bool y - depends on 64BIT - select CRYPTO_LIB_BLAKE2S_GENERIC - select CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - BLAKE2s cryptographic hash function (RFC 7693) - - Architecture: x86_64 using: - - SSSE3 (Supplemental SSE3) - - AVX-512 (Advanced Vector Extensions-512) diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile deleted file mode 100644 index 4454556b243e..000000000000 --- a/lib/crypto/x86/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o -libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o diff --git a/lib/crypto/x86/blake2s-glue.c b/lib/crypto/x86/blake2s-glue.c deleted file mode 100644 index adc296cd17c9..000000000000 --- a/lib/crypto/x86/blake2s-glue.c +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, - const u8 *block, const size_t nblocks, - const u32 inc); -asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, - const u8 *block, const size_t nblocks, - const u32 inc); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); - -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) -{ - /* SIMD disables preemption, so relax after processing each page. */ - BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); - - if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { - blake2s_compress_generic(state, block, nblocks, inc); - return; - } - - do { - const size_t blocks = min_t(size_t, nblocks, - SZ_4K / BLAKE2S_BLOCK_SIZE); - - kernel_fpu_begin(); - if (static_branch_likely(&blake2s_use_avx512)) - blake2s_compress_avx512(state, block, blocks, inc); - else - blake2s_compress_ssse3(state, block, blocks, inc); - kernel_fpu_end(); - - nblocks -= blocks; - block += blocks * BLAKE2S_BLOCK_SIZE; - } while (nblocks); -} -EXPORT_SYMBOL(blake2s_compress); - -static int __init blake2s_mod_init(void) -{ - if (boot_cpu_has(X86_FEATURE_SSSE3)) - static_branch_enable(&blake2s_use_ssse3); - - if (boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_AVX512F) && - boot_cpu_has(X86_FEATURE_AVX512VL) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | - XFEATURE_MASK_AVX512, NULL)) - static_branch_enable(&blake2s_use_avx512); - - return 0; -} - -subsys_initcall(blake2s_mod_init); diff --git a/lib/crypto/x86/blake2s.h b/lib/crypto/x86/blake2s.h new file mode 100644 index 000000000000..b6d30d2fa045 --- /dev/null +++ b/lib/crypto/x86/blake2s.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); + +static void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); + + if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { + blake2s_compress_generic(state, block, nblocks, inc); + return; + } + + do { + const size_t blocks = min_t(size_t, nblocks, + SZ_4K / BLAKE2S_BLOCK_SIZE); + + kernel_fpu_begin(); + if (static_branch_likely(&blake2s_use_avx512)) + blake2s_compress_avx512(state, block, blocks, inc); + else + blake2s_compress_ssse3(state, block, blocks, inc); + kernel_fpu_end(); + + nblocks -= blocks; + block += blocks * BLAKE2S_BLOCK_SIZE; + } while (nblocks); +} + +#define blake2s_mod_init_arch blake2s_mod_init_arch +static void blake2s_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); + + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + static_branch_enable(&blake2s_use_avx512); +} -- cgit v1.2.3 From 362f92286065d9f8282da5def89e173a12191568 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:31 -0700 Subject: lib/crypto: tests: Add KUnit tests for BLAKE2s Add a KUnit test suite for BLAKE2s. Most of the core test logic is in the previously-added hash-test-template.h. This commit just adds the actual KUnit suite, commits the generated test vectors to the tree so that gen-hash-testvecs.py won't have to be run at build time, and adds a few BLAKE2s-specific test cases. This is the replacement for blake2s-selftest, which an earlier commit removed. Improvements over blake2s-selftest include integration with KUnit, more comprehensive test cases, and support for benchmarking. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-13-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 10 ++ lib/crypto/tests/Makefile | 1 + lib/crypto/tests/blake2s-testvecs.h | 238 ++++++++++++++++++++++++++++++++++++ lib/crypto/tests/blake2s_kunit.c | 134 ++++++++++++++++++++ 4 files changed, 383 insertions(+) create mode 100644 lib/crypto/tests/blake2s-testvecs.h create mode 100644 lib/crypto/tests/blake2s_kunit.c (limited to 'lib') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index c21d53fd4b0c..fd341aa12f15 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -1,5 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later +config CRYPTO_LIB_BLAKE2S_KUNIT_TEST + tristate "KUnit tests for BLAKE2s" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + # No need to select CRYPTO_LIB_BLAKE2S here, as that option doesn't + # exist; the BLAKE2s code is always built-in for the /dev/random driver. + help + KUnit tests for the BLAKE2s cryptographic hash function. + config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index f6f82c6f9cb5..be7de929af2c 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o diff --git a/lib/crypto/tests/blake2s-testvecs.h b/lib/crypto/tests/blake2s-testvecs.h new file mode 100644 index 000000000000..6f978b79a59b --- /dev/null +++ b/lib/crypto/tests/blake2s-testvecs.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py blake2s */ + +static const struct { + size_t data_len; + u8 digest[BLAKE2S_HASH_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94, + 0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c, + 0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e, + 0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9, + }, + }, + { + .data_len = 1, + .digest = { + 0x7c, 0xab, 0x53, 0xe2, 0x48, 0x87, 0xdf, 0x64, + 0x98, 0x6a, 0xc1, 0x7e, 0xf0, 0x01, 0x4d, 0xc9, + 0x07, 0x4f, 0xb8, 0x2f, 0x46, 0xd7, 0xee, 0xa9, + 0xad, 0xe5, 0xf8, 0x21, 0xac, 0xfe, 0x17, 0x58, + }, + }, + { + .data_len = 2, + .digest = { + 0x5e, 0x63, 0x2c, 0xd0, 0xf8, 0x7b, 0xf5, 0xae, + 0x61, 0x97, 0x94, 0x57, 0xc8, 0x76, 0x22, 0xd9, + 0x8b, 0x04, 0x5e, 0xf1, 0x5d, 0xd0, 0xfc, 0xd9, + 0x0c, 0x19, 0x2e, 0xe2, 0xc5, 0xd9, 0x73, 0x51, + }, + }, + { + .data_len = 3, + .digest = { + 0x33, 0x65, 0xa6, 0x37, 0xbf, 0xf8, 0x4f, 0x15, + 0x4c, 0xac, 0x9e, 0xa4, 0x3b, 0x02, 0x07, 0x0c, + 0x80, 0x86, 0x0d, 0x6c, 0xe4, 0xaf, 0x1c, 0xbc, + 0x0b, 0x9c, 0x0a, 0x98, 0xc2, 0x99, 0x71, 0xcd, + }, + }, + { + .data_len = 16, + .digest = { + 0x59, 0xd2, 0x10, 0xd3, 0x75, 0xac, 0x48, 0x32, + 0xb1, 0xea, 0xee, 0xcf, 0x0a, 0xd2, 0x8b, 0x15, + 0x5d, 0x72, 0x71, 0x4c, 0xa7, 0x29, 0xb0, 0x7a, + 0x44, 0x48, 0x8a, 0x54, 0x54, 0x54, 0x41, 0xf5, + }, + }, + { + .data_len = 32, + .digest = { + 0xdc, 0xfc, 0x46, 0x81, 0xc6, 0x1b, 0x2b, 0x47, + 0x8b, 0xed, 0xe0, 0x73, 0x34, 0x38, 0x53, 0x92, + 0x97, 0x2f, 0xfb, 0x51, 0xab, 0x4f, 0x2d, 0x9d, + 0x69, 0x04, 0xa9, 0x5d, 0x33, 0xef, 0xcb, 0x1c, + }, + }, + { + .data_len = 48, + .digest = { + 0xd6, 0x2a, 0x7f, 0x96, 0x04, 0x4d, 0x16, 0xc8, + 0x49, 0xe0, 0x37, 0x33, 0xe3, 0x7b, 0x34, 0x56, + 0x99, 0xc5, 0x78, 0x57, 0x06, 0x02, 0xb4, 0xea, + 0x80, 0xc4, 0xf8, 0x8f, 0x8d, 0x2b, 0xe4, 0x05, + }, + }, + { + .data_len = 49, + .digest = { + 0x8b, 0x58, 0x62, 0xb5, 0x85, 0xf6, 0x83, 0x36, + 0xf5, 0x34, 0xb8, 0xd4, 0xbc, 0x5c, 0x8b, 0x38, + 0xfd, 0x15, 0xcd, 0x44, 0x83, 0x25, 0x71, 0xe1, + 0xd5, 0xe8, 0xa1, 0xa4, 0x36, 0x98, 0x7e, 0x68, + }, + }, + { + .data_len = 63, + .digest = { + 0x7e, 0xeb, 0x06, 0x87, 0xdf, 0x1a, 0xdc, 0xe5, + 0xfb, 0x64, 0xd4, 0xd1, 0x5d, 0x9e, 0x75, 0xc0, + 0xb9, 0xad, 0x55, 0x6c, 0xe6, 0xba, 0x4d, 0x98, + 0x2f, 0xbf, 0x72, 0xad, 0x61, 0x37, 0xf6, 0x11, + }, + }, + { + .data_len = 64, + .digest = { + 0x72, 0xdb, 0x43, 0x16, 0x57, 0x8e, 0x3a, 0x96, + 0xf3, 0x98, 0x19, 0x24, 0x17, 0x3b, 0xe8, 0xad, + 0xa1, 0x9b, 0xa4, 0x1b, 0x74, 0x85, 0x2e, 0x24, + 0x70, 0xea, 0x31, 0x5a, 0x1c, 0xbe, 0x43, 0xb5, + }, + }, + { + .data_len = 65, + .digest = { + 0x32, 0x48, 0xb0, 0xf0, 0x3f, 0xbb, 0xd2, 0xa3, + 0xfd, 0xf6, 0x28, 0x4a, 0x2a, 0xc5, 0xbe, 0x4b, + 0x73, 0x50, 0x63, 0xd6, 0x16, 0x00, 0xef, 0xed, + 0xfe, 0x97, 0x41, 0x29, 0xb2, 0x84, 0xc4, 0xa3, + }, + }, + { + .data_len = 127, + .digest = { + 0x17, 0xda, 0x6b, 0x96, 0x6a, 0xa6, 0xa4, 0xa6, + 0xa6, 0xf3, 0x9d, 0x18, 0x19, 0x8d, 0x98, 0x7c, + 0x66, 0x38, 0xe8, 0x99, 0xe7, 0x0a, 0x50, 0x92, + 0xaf, 0x11, 0x80, 0x05, 0x66, 0xed, 0xab, 0x74, + }, + }, + { + .data_len = 128, + .digest = { + 0x13, 0xd5, 0x8b, 0x22, 0xae, 0x90, 0x7b, 0x67, + 0x87, 0x4e, 0x3c, 0x35, 0x4e, 0x01, 0xf0, 0xb1, + 0xd3, 0xd1, 0x67, 0xbb, 0x43, 0xdb, 0x7c, 0x75, + 0xa4, 0xc7, 0x64, 0x83, 0x1e, 0x9b, 0x98, 0xad, + }, + }, + { + .data_len = 129, + .digest = { + 0x6f, 0xe0, 0x5d, 0x9d, 0xd5, 0x78, 0x29, 0xfb, + 0xd0, 0x77, 0xd1, 0x8a, 0xf0, 0x80, 0xcb, 0x81, + 0x71, 0x9e, 0x4d, 0x49, 0xde, 0x74, 0x2a, 0x37, + 0xc0, 0xd5, 0xf0, 0xfa, 0x50, 0xe6, 0x23, 0xfe, + }, + }, + { + .data_len = 256, + .digest = { + 0x89, 0xac, 0xf6, 0xe7, 0x5e, 0xba, 0x53, 0xf4, + 0x92, 0x32, 0xd5, 0x64, 0xfb, 0xc4, 0x08, 0xac, + 0x2c, 0x19, 0x6e, 0x63, 0x13, 0x75, 0xd0, 0x60, + 0x54, 0x35, 0x82, 0xc4, 0x6d, 0x03, 0x1a, 0x05, + }, + }, + { + .data_len = 511, + .digest = { + 0x1c, 0xaf, 0x94, 0x7d, 0x9c, 0xce, 0x57, 0x64, + 0xf8, 0xa8, 0x25, 0x45, 0x32, 0x86, 0x2b, 0x04, + 0xb3, 0x2e, 0x67, 0xca, 0x73, 0x04, 0x2f, 0xab, + 0xcc, 0xda, 0x9e, 0x42, 0xa1, 0xaf, 0x83, 0x5a, + }, + }, + { + .data_len = 513, + .digest = { + 0x21, 0xdf, 0xdc, 0x29, 0xd9, 0xfc, 0x7b, 0xe7, + 0x3a, 0xc4, 0xe1, 0x61, 0xc5, 0xb5, 0xe1, 0xee, + 0x7a, 0x9d, 0x0c, 0x66, 0x36, 0x63, 0xe4, 0x12, + 0x62, 0xe2, 0xf5, 0x68, 0x72, 0xfc, 0x1e, 0x18, + }, + }, + { + .data_len = 1000, + .digest = { + 0x6e, 0xc7, 0x2e, 0xac, 0xd0, 0xbb, 0x22, 0xe0, + 0xc2, 0x40, 0xb2, 0xfe, 0x8c, 0xaf, 0x9e, 0xcf, + 0x32, 0x06, 0xc6, 0x45, 0x29, 0xbd, 0xe0, 0x7f, + 0x53, 0x32, 0xc3, 0x2b, 0x2f, 0x68, 0x12, 0xcd, + }, + }, + { + .data_len = 3333, + .digest = { + 0x76, 0xba, 0x52, 0xb5, 0x09, 0xf5, 0x19, 0x09, + 0x70, 0x1c, 0x09, 0x28, 0xb4, 0xaa, 0x98, 0x6a, + 0x79, 0xe7, 0x5e, 0xcd, 0xe8, 0xa4, 0x73, 0x69, + 0x1f, 0xf8, 0x05, 0x0a, 0xb4, 0xfe, 0xf9, 0x63, + }, + }, + { + .data_len = 4096, + .digest = { + 0xf7, 0xad, 0xf9, 0xc8, 0x0e, 0x04, 0x2f, 0xdf, + 0xbe, 0x39, 0x79, 0x07, 0x0d, 0xd8, 0x1b, 0x06, + 0x42, 0x3a, 0x43, 0x93, 0xf6, 0x7c, 0xc4, 0xe5, + 0xc2, 0xd5, 0xd0, 0xa6, 0x35, 0x6c, 0xbd, 0x17, + }, + }, + { + .data_len = 4128, + .digest = { + 0x38, 0xd7, 0xab, 0x7e, 0x08, 0xdc, 0x1e, 0xab, + 0x55, 0xbb, 0x3b, 0x7b, 0x6a, 0x17, 0xcc, 0x79, + 0xa7, 0x02, 0x62, 0x66, 0x9b, 0xca, 0xee, 0xc0, + 0x3d, 0x75, 0x34, 0x2e, 0x55, 0x82, 0x26, 0x3c, + }, + }, + { + .data_len = 4160, + .digest = { + 0xf7, 0xeb, 0x2f, 0x24, 0x98, 0x54, 0x04, 0x5a, + 0x19, 0xe4, 0x12, 0x9d, 0x97, 0xbc, 0x87, 0xa5, + 0x0b, 0x85, 0x29, 0xa1, 0x36, 0x89, 0xc9, 0xba, + 0xa0, 0xe0, 0xac, 0x99, 0x7d, 0xa4, 0x51, 0x9f, + }, + }, + { + .data_len = 4224, + .digest = { + 0x8f, 0xe8, 0xa7, 0x79, 0x02, 0xbb, 0x4a, 0x56, + 0x66, 0x91, 0xef, 0x22, 0xd1, 0x09, 0x26, 0x6c, + 0xa9, 0x13, 0xd7, 0x44, 0xc7, 0x19, 0x9c, 0x0b, + 0xfb, 0x4f, 0xca, 0x72, 0x8f, 0x34, 0xf7, 0x82, + }, + }, + { + .data_len = 16384, + .digest = { + 0xaa, 0x21, 0xbb, 0x25, 0x4b, 0x66, 0x6e, 0x29, + 0x71, 0xc1, 0x44, 0x67, 0x19, 0xed, 0xe6, 0xe6, + 0x61, 0x13, 0xf4, 0xb7, 0x02, 0x94, 0x81, 0x0f, + 0xa7, 0x4d, 0xbb, 0x2c, 0xb8, 0xeb, 0x41, 0x0e, + }, + }, +}; + +static const u8 hash_testvec_consolidated[BLAKE2S_HASH_SIZE] = { + 0x84, 0x21, 0xbb, 0x73, 0x64, 0x47, 0x45, 0xe0, + 0xc1, 0x83, 0x78, 0xf1, 0xea, 0xe5, 0xfd, 0xdb, + 0x01, 0xda, 0xb7, 0x86, 0x70, 0x3b, 0x83, 0xb3, + 0xbc, 0xd9, 0xfd, 0x96, 0xbd, 0x50, 0x06, 0x67, +}; + +static const u8 blake2s_keyed_testvec_consolidated[BLAKE2S_HASH_SIZE] = { + 0xa6, 0xad, 0xcd, 0xb8, 0xd9, 0xdd, 0xc7, 0x70, + 0x07, 0x09, 0x7f, 0x9f, 0x41, 0xa9, 0x70, 0xa4, + 0x1c, 0xca, 0x61, 0xbb, 0x58, 0xb5, 0xb2, 0x1d, + 0xd1, 0x71, 0x16, 0xb0, 0x49, 0x4f, 0x9e, 0x1b, +}; diff --git a/lib/crypto/tests/blake2s_kunit.c b/lib/crypto/tests/blake2s_kunit.c new file mode 100644 index 000000000000..057c40132246 --- /dev/null +++ b/lib/crypto/tests/blake2s_kunit.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2025 Google LLC + */ +#include +#include "blake2s-testvecs.h" + +/* + * The following are compatibility functions that present BLAKE2s as an unkeyed + * hash function that produces hashes of fixed length BLAKE2S_HASH_SIZE, so that + * hash-test-template.h can be reused to test it. + */ + +static void blake2s_default(const u8 *data, size_t len, + u8 out[BLAKE2S_HASH_SIZE]) +{ + blake2s(out, data, NULL, BLAKE2S_HASH_SIZE, len, 0); +} + +static void blake2s_init_default(struct blake2s_state *state) +{ + blake2s_init(state, BLAKE2S_HASH_SIZE); +} + +/* + * Generate the HASH_KUNIT_CASES using hash-test-template.h. These test BLAKE2s + * with a key length of 0 and a hash length of BLAKE2S_HASH_SIZE. + */ +#define HASH blake2s_default +#define HASH_CTX blake2s_state +#define HASH_SIZE BLAKE2S_HASH_SIZE +#define HASH_INIT blake2s_init_default +#define HASH_UPDATE blake2s_update +#define HASH_FINAL blake2s_final +#include "hash-test-template.h" + +/* + * BLAKE2s specific test case which tests all possible combinations of key + * length and hash length. + */ +static void test_blake2s_all_key_and_hash_lens(struct kunit *test) +{ + const size_t data_len = 100; + u8 *data = &test_buf[0]; + u8 *key = data + data_len; + u8 *hash = key + BLAKE2S_KEY_SIZE; + struct blake2s_state main_state; + u8 main_hash[BLAKE2S_HASH_SIZE]; + + rand_bytes_seeded_from_len(data, data_len); + blake2s_init(&main_state, BLAKE2S_HASH_SIZE); + for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) { + rand_bytes_seeded_from_len(key, key_len); + for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) { + blake2s(hash, data, key, out_len, data_len, key_len); + blake2s_update(&main_state, hash, out_len); + } + } + blake2s_final(&main_state, main_hash); + KUNIT_ASSERT_MEMEQ(test, main_hash, blake2s_keyed_testvec_consolidated, + BLAKE2S_HASH_SIZE); +} + +/* + * BLAKE2s specific test case which tests using a guarded buffer for all allowed + * key lengths. Also tests both blake2s() and blake2s_init_key(). + */ +static void test_blake2s_with_guarded_key_buf(struct kunit *test) +{ + const size_t data_len = 100; + + rand_bytes(test_buf, data_len); + for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) { + u8 key[BLAKE2S_KEY_SIZE]; + u8 *guarded_key = &test_buf[TEST_BUF_LEN - key_len]; + u8 hash1[BLAKE2S_HASH_SIZE]; + u8 hash2[BLAKE2S_HASH_SIZE]; + struct blake2s_state state; + + rand_bytes(key, key_len); + memcpy(guarded_key, key, key_len); + + blake2s(hash1, test_buf, key, + BLAKE2S_HASH_SIZE, data_len, key_len); + blake2s(hash2, test_buf, guarded_key, + BLAKE2S_HASH_SIZE, data_len, key_len); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE); + + blake2s_init_key(&state, BLAKE2S_HASH_SIZE, + guarded_key, key_len); + blake2s_update(&state, test_buf, data_len); + blake2s_final(&state, hash2); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE); + } +} + +/* + * BLAKE2s specific test case which tests using a guarded output buffer for all + * allowed output lengths. + */ +static void test_blake2s_with_guarded_out_buf(struct kunit *test) +{ + const size_t data_len = 100; + + rand_bytes(test_buf, data_len); + for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) { + u8 hash[BLAKE2S_HASH_SIZE]; + u8 *guarded_hash = &test_buf[TEST_BUF_LEN - out_len]; + + blake2s(hash, test_buf, NULL, out_len, data_len, 0); + blake2s(guarded_hash, test_buf, NULL, out_len, data_len, 0); + KUNIT_ASSERT_MEMEQ(test, hash, guarded_hash, out_len); + } +} + +static struct kunit_case blake2s_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(test_blake2s_all_key_and_hash_lens), + KUNIT_CASE(test_blake2s_with_guarded_key_buf), + KUNIT_CASE(test_blake2s_with_guarded_out_buf), + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite blake2s_test_suite = { + .name = "blake2s", + .test_cases = blake2s_test_cases, + .suite_init = hash_suite_init, + .suite_exit = hash_suite_exit, +}; +kunit_test_suite(blake2s_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for BLAKE2s"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 7c0c01a216e6d9e1d169c0f6f3b5522e6da708ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:04 +0200 Subject: vdso/datastore: Gate time data behind CONFIG_GENERIC_GETTIMEOFDAY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the generic vDSO does not provide time functions, as for example on riscv32, then the time data store is not necessary. Avoid allocating these time data pages when not used. Fixes: df7fcbefa710 ("vdso: Add generic time data storage") Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-1-d9b65750e49f@linutronix.de --- lib/vdso/datastore.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/vdso/datastore.c b/lib/vdso/datastore.c index 3693c6caf2c4..a565c30c71a0 100644 --- a/lib/vdso/datastore.c +++ b/lib/vdso/datastore.c @@ -11,14 +11,14 @@ /* * The vDSO data page. */ -#ifdef CONFIG_HAVE_GENERIC_VDSO +#ifdef CONFIG_GENERIC_GETTIMEOFDAY static union { struct vdso_time_data data; u8 page[PAGE_SIZE]; } vdso_time_data_store __page_aligned_data; struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data; static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); -#endif /* CONFIG_HAVE_GENERIC_VDSO */ +#endif /* CONFIG_GENERIC_GETTIMEOFDAY */ #ifdef CONFIG_VDSO_GETRANDOM static union { @@ -46,7 +46,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, switch (vmf->pgoff) { case VDSO_TIME_PAGE_OFFSET: - if (!IS_ENABLED(CONFIG_HAVE_GENERIC_VDSO)) + if (!IS_ENABLED(CONFIG_GENERIC_GETTIMEOFDAY)) return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); if (timens_page) { -- cgit v1.2.3 From f145d6bf8d590954672a4d0581c92e1799e9c8da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:07 +0200 Subject: vdso/gettimeofday: Remove !CONFIG_TIME_NS stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All calls of these functions are already gated behind CONFIG_TIME_NS. The compiler will already optimize them away if time namespaces are disabled. Drop the unnecessary stubs. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-4-d9b65750e49f@linutronix.de --- lib/vdso/gettimeofday.c | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'lib') diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 02ea19f67164..1e2a40b8d2c6 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -108,8 +108,6 @@ bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock return true; } -#ifdef CONFIG_TIME_NS - #ifdef CONFIG_GENERIC_VDSO_DATA_STORE static __always_inline const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) @@ -149,20 +147,6 @@ bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock * return true; } -#else -static __always_inline -const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) -{ - return NULL; -} - -static __always_inline -bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) -{ - return false; -} -#endif static __always_inline bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, @@ -204,7 +188,6 @@ bool do_hres(const struct vdso_time_data *vd, const struct vdso_clock *vc, return true; } -#ifdef CONFIG_TIME_NS static __always_inline bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, clockid_t clk, struct __kernel_timespec *ts) @@ -233,14 +216,6 @@ bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock return true; } -#else -static __always_inline -bool do_coarse_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, - clockid_t clk, struct __kernel_timespec *ts) -{ - return false; -} -#endif static __always_inline bool do_coarse(const struct vdso_time_data *vd, const struct vdso_clock *vc, -- cgit v1.2.3 From 278f1c933c3fab6f249b995a1a13608246b76181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:10 +0200 Subject: vdso: Drop kconfig GENERIC_VDSO_32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This configuration is never used. Remove it. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-7-d9b65750e49f@linutronix.de --- lib/vdso/Kconfig | 7 ------- 1 file changed, 7 deletions(-) (limited to 'lib') diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 45df764b49ad..76157c26931d 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -12,13 +12,6 @@ config GENERIC_GETTIMEOFDAY Each architecture that enables this feature has to provide the fallback implementation. -config GENERIC_VDSO_32 - bool - depends on GENERIC_GETTIMEOFDAY && !64BIT - help - This config option helps to avoid possible performance issues - in 32 bit only architectures. - config GENERIC_COMPAT_VDSO bool help -- cgit v1.2.3 From bb5bc7bfab06b9a6a2ccecba6dc40783fe9b4231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:11 +0200 Subject: vdso: Drop kconfig GENERIC_COMPAT_VDSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This configuration is never used. Remove it. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-8-d9b65750e49f@linutronix.de --- lib/vdso/Kconfig | 5 ----- 1 file changed, 5 deletions(-) (limited to 'lib') diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 76157c26931d..2594dd7185be 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -12,11 +12,6 @@ config GENERIC_GETTIMEOFDAY Each architecture that enables this feature has to provide the fallback implementation. -config GENERIC_COMPAT_VDSO - bool - help - This config option enables the compat VDSO layer. - config GENERIC_VDSO_TIME_NS bool help -- cgit v1.2.3 From 7b338f6d4e3d6baa057e3505592a86f6410d68ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:12 +0200 Subject: vdso: Drop Kconfig GENERIC_VDSO_DATA_STORE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All users of the generic vDSO library also use the generic vDSO datastore. Remove the now unnecessary Kconfig symbol. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-9-d9b65750e49f@linutronix.de --- lib/vdso/Kconfig | 5 ----- lib/vdso/Makefile | 2 +- lib/vdso/gettimeofday.c | 2 -- 3 files changed, 1 insertion(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 2594dd7185be..48ffb0f6fa41 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -31,8 +31,3 @@ config VDSO_GETRANDOM bool help Selected by architectures that support vDSO getrandom(). - -config GENERIC_VDSO_DATA_STORE - bool - help - Selected by architectures that use the generic vDSO data store. diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index aedd40aaa950..405f743253d7 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -1,3 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_GENERIC_VDSO_DATA_STORE) += datastore.o +obj-$(CONFIG_HAVE_GENERIC_VDSO) += datastore.o diff --git a/lib/vdso/gettimeofday.c b/lib/vdso/gettimeofday.c index 1e2a40b8d2c6..95df0153f05a 100644 --- a/lib/vdso/gettimeofday.c +++ b/lib/vdso/gettimeofday.c @@ -108,13 +108,11 @@ bool vdso_get_timestamp(const struct vdso_time_data *vd, const struct vdso_clock return true; } -#ifdef CONFIG_GENERIC_VDSO_DATA_STORE static __always_inline const struct vdso_time_data *__arch_get_vdso_u_timens_data(const struct vdso_time_data *vd) { return (void *)vd + PAGE_SIZE; } -#endif /* CONFIG_GENERIC_VDSO_DATA_STORE */ static __always_inline bool do_hres_timens(const struct vdso_time_data *vdns, const struct vdso_clock *vcns, -- cgit v1.2.3 From bad53ae2dc4296acb8cbcee385e0238cea484100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:13 +0200 Subject: vdso: Drop Kconfig GENERIC_VDSO_TIME_NS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All architectures implementing time-related functionality in the vDSO are using the generic vDSO library which handles time namespaces properly. Remove the now unnecessary Kconfig symbol. Enables the use of time namespaces on architectures, which use the generic vDSO but did not enable GENERIC_VDSO_TIME_NS, namely MIPS and arm. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Catalin Marinas Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-10-d9b65750e49f@linutronix.de --- lib/vdso/Kconfig | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 48ffb0f6fa41..3d2c2b90d193 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -12,12 +12,6 @@ config GENERIC_GETTIMEOFDAY Each architecture that enables this feature has to provide the fallback implementation. -config GENERIC_VDSO_TIME_NS - bool - help - Selected by architectures which support time namespaces in the - VDSO - config GENERIC_VDSO_OVERFLOW_PROTECT bool help -- cgit v1.2.3 From 258b37c6e626625fe441e16ab90e6a542a66eaee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 26 Aug 2025 08:17:14 +0200 Subject: vdso: Gate VDSO_GETRANDOM behind HAVE_GENERIC_VDSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All architectures which want to implement getrandom() in the vDSO need to use the generic vDSO library. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20250826-vdso-cleanups-v1-11-d9b65750e49f@linutronix.de --- lib/vdso/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/vdso/Kconfig b/lib/vdso/Kconfig index 3d2c2b90d193..db87ba34ef19 100644 --- a/lib/vdso/Kconfig +++ b/lib/vdso/Kconfig @@ -19,9 +19,9 @@ config GENERIC_VDSO_OVERFLOW_PROTECT time getter functions for the price of an extra conditional in the hotpath. -endif - config VDSO_GETRANDOM bool help Selected by architectures that support vDSO getrandom(). + +endif -- cgit v1.2.3 From afc4e4a5f122183b38095daba2264123cc86d8ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:18 -0700 Subject: lib/crypto: tests: Migrate Curve25519 self-test to KUnit Move the Curve25519 test from an ad-hoc self-test to a KUnit test. Generally keep the same test logic for now, just translated to KUnit. There's one exception, which is that I dropped the incomplete test of curve25519_generic(). The approach I'm taking to cover the different implementations with the KUnit tests is to just rely on booting kernels in QEMU with different '-cpu' options, rather than try to make the tests (incompletely) test multiple implementations on one CPU. This way, both the test and the library API are simpler. This commit makes the file lib/crypto/curve25519.c no longer needed, as its only purpose was to call the self-test. However, keep it for now, since a later commit will add code to it again. Temporarily omit the default value of CRYPTO_SELFTESTS that the other lib/crypto/ KUnit tests have. It would cause a recursive kconfig dependency, since the Curve25519 code is still entangled with CRYPTO. A later commit will fix that. Link: https://lore.kernel.org/r/20250906213523.84915-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 - lib/crypto/curve25519-selftest.c | 1321 ---------------------------------- lib/crypto/curve25519.c | 3 - lib/crypto/tests/Kconfig | 9 + lib/crypto/tests/Makefile | 1 + lib/crypto/tests/curve25519_kunit.c | 1332 +++++++++++++++++++++++++++++++++++ 6 files changed, 1342 insertions(+), 1325 deletions(-) delete mode 100644 lib/crypto/curve25519-selftest.c create mode 100644 lib/crypto/tests/curve25519_kunit.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index ad27c5bf99e1..6c3be971ace0 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -87,7 +87,6 @@ endif obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o libcurve25519-y += curve25519.o -libcurve25519-$(CONFIG_CRYPTO_SELFTESTS) += curve25519-selftest.o obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o libdes-y := des.o diff --git a/lib/crypto/curve25519-selftest.c b/lib/crypto/curve25519-selftest.c deleted file mode 100644 index c85e85381e78..000000000000 --- a/lib/crypto/curve25519-selftest.c +++ /dev/null @@ -1,1321 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include - -struct curve25519_test_vector { - u8 private[CURVE25519_KEY_SIZE]; - u8 public[CURVE25519_KEY_SIZE]; - u8 result[CURVE25519_KEY_SIZE]; - bool valid; -}; -static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = { - { - .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, - 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, - 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, - 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, - .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, - 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, - 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, - 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, - .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, - 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, - 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, - 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, - .valid = true - }, - { - .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, - 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, - 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, - 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, - .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, - 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, - 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, - 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, - .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, - 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, - 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, - 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, - .valid = true - }, - { - .private = { 1 }, - .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, - 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, - 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, - 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, - .valid = true - }, - { - .private = { 1 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, - 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, - 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, - 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, - .valid = true - }, - { - .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, - 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, - 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, - 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, - .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, - 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, - 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, - 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, - .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, - 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, - 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, - 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, - .valid = true - }, - { - .private = { 1, 2, 3, 4 }, - .public = { 0 }, - .result = { 0 }, - .valid = false - }, - { - .private = { 2, 4, 6, 8 }, - .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, - 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, - 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, - 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, - .result = { 0 }, - .valid = false - }, - { - .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, - .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, - 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, - 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, - 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, - .valid = true - }, - { - .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, - .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, - 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, - 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, - 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, - .valid = true - }, - /* wycheproof - normal case */ - { - .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, - 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, - 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, - 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, - .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, - 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, - 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, - 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, - .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, - 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, - 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, - 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, - 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, - 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, - 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, - .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, - 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, - 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, - 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, - .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, - 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, - 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, - 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, - 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, - 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, - 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, - .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, - 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, - 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, - 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, - .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, - 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, - 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, - 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, - 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, - 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, - 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, - .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, - 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, - 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, - 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, - .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, - 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, - 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, - 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, - 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, - 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, - 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, - .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, - 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, - 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, - 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, - .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, - 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, - 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, - 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, - 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, - 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, - 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, - .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, - 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, - 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, - 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, - .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, - 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, - 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, - 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, - .valid = true - }, - /* wycheproof - public key = 0 */ - { - .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, - 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, - 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, - 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, - .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key = 1 */ - { - .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, - 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, - 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, - 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, - .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, - 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, - 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, - 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, - .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, - 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, - 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, - 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, - 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, - 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, - 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, - .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, - 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, - 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, - 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, - 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, - 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, - 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, - .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, - 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, - 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, - .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, - 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, - 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, - 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, - 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, - 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, - 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, - .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, - 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, - 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, - .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, - 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, - 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, - 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, - 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, - 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, - 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, - .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, - 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, - 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, - 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, - 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, - 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, - 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, - .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, - 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, - 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, - 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, - 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, - 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, - 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, - .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, - 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, - 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, - 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, - 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, - 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, - 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, - 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, - 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, - 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, - 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, - 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, - 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, - .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, - 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, - 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, - 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, - 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, - 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, - 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, - .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, - 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, - 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, - 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, - .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, - 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, - 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, - 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, - 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, - 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, - 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, - 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, - 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, - 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, - 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, - 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, - 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, - .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, - 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, - 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, - 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, - 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, - 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, - 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, - .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, - 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, - 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, - 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, - .valid = true - }, - /* wycheproof - public key with low order */ - { - .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, - 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, - 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, - 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, - .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, - 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, - 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, - 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, - 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, - 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, - 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, - .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, - 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, - 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, - 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, - 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, - 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, - 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, - .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, - 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, - 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, - 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, - .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, - 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, - 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, - 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, - .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, - 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, - 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, - 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, - .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, - 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, - 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, - 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, - .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, - 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, - 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, - 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, - .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, - 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, - 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, - 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, - 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, - 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, - 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, - .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, - 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, - 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, - 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, - 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, - 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, - 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, - .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, - 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, - 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, - 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, - .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, - 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, - 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, - 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, - .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key >= p */ - { - .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, - 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, - 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, - 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, - .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, - 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, - 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, - 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, - 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, - 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, - 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, - .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, - 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, - 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, - 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, - 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, - 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, - 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, - .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, - 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, - 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, - 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, - 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, - 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, - 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, - 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, - 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, - 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, - 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, - 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, - 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, - .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, - 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, - 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, - 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, - 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, - 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, - 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, - .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, - 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, - 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, - 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, - 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, - 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, - 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, - .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, - 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, - 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, - 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, - 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, - 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, - 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, - .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, - 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, - 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, - 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, - 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, - 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, - 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, - .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, - 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, - 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, - 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, - 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, - 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, - 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, - .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, - 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, - 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, - 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, - 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, - 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, - 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, - .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, - 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, - 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, - 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, - 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, - 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, - 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, - .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, - 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, - 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, - 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, - 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, - 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, - 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, - .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, - 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, - 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, - 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, - 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, - 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, - 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, - .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, - 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, - 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, - 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, - 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, - 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, - 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, - .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, - 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, - 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, - 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, - 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, - 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, - 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, - 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, - 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, - 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, - .valid = true - }, - /* wycheproof - RFC 7748 */ - { - .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, - 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, - 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, - 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, - .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, - 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, - 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, - 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, - .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, - 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, - 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, - 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, - .valid = true - }, - /* wycheproof - RFC 7748 */ - { - .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, - 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, - 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, - 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, - .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, - 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, - 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, - 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, - .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, - 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, - 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, - 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, - 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, - 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, - 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, - .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, - 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, - 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, - 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, - .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, - 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, - 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, - 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, - .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, - 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, - 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, - 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, - .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, - 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, - 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, - 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, - .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, - 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, - 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, - 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, - .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, - 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, - 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, - 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, - .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, - 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, - 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, - 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, - .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, - 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, - 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, - 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, - .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, - 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, - 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, - 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, - .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, - 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, - 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, - 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, - .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, - 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, - 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, - 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, - .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, - 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, - 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, - 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, - 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, - 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, - 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, - 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, - 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, - 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, - .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, - 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, - 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, - 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, - 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, - 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, - 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, - .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, - 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, - 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, - 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, - 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, - 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, - 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, - .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, - 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, - 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, - 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, - 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, - 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, - 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, - .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, - 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, - 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, - 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, - 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, - 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, - 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, - .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, - 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, - 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, - 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, - .valid = true - }, - /* wycheproof - private key == -1 (mod order) */ - { - .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, - 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, - .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, - 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, - 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, - 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, - .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, - 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, - 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, - 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, - .valid = true - }, - /* wycheproof - private key == 1 (mod order) on twist */ - { - .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, - 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, - .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, - 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, - 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, - 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, - .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, - 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, - 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, - 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, - .valid = true - } -}; - -bool __init curve25519_selftest(void) -{ - bool success = true, ret, ret2; - size_t i = 0, j; - u8 in[CURVE25519_KEY_SIZE]; - u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE], - out3[CURVE25519_KEY_SIZE]; - - for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { - memset(out, 0, CURVE25519_KEY_SIZE); - ret = curve25519(out, curve25519_test_vectors[i].private, - curve25519_test_vectors[i].public); - if (ret != curve25519_test_vectors[i].valid || - memcmp(out, curve25519_test_vectors[i].result, - CURVE25519_KEY_SIZE)) { - pr_err("curve25519 self-test %zu: FAIL\n", i + 1); - success = false; - } - } - - for (i = 0; i < 5; ++i) { - get_random_bytes(in, sizeof(in)); - ret = curve25519_generate_public(out, in); - ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); - curve25519_generic(out3, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); - if (ret != ret2 || - memcmp(out, out2, CURVE25519_KEY_SIZE) || - memcmp(out, out3, CURVE25519_KEY_SIZE)) { - pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x", - i + 1); - for (j = CURVE25519_KEY_SIZE; j-- > 0;) - printk(KERN_CONT "%02x", in[j]); - printk(KERN_CONT "\n"); - success = false; - } - } - - return success; -} diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 6850b76a80c9..25f16777865b 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -15,9 +15,6 @@ static int __init curve25519_init(void) { - if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) && - WARN_ON(!curve25519_selftest())) - return -ENODEV; return 0; } diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index fd341aa12f15..eaca60d3e0a3 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -10,6 +10,15 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST help KUnit tests for the BLAKE2s cryptographic hash function. +config CRYPTO_LIB_CURVE25519_KUNIT_TEST + tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_CURVE25519 + help + KUnit tests for the Curve25519 Diffie-Hellman function. + config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index be7de929af2c..a71fad19922b 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-or-later obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o +obj-$(CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST) += curve25519_kunit.o obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o diff --git a/lib/crypto/tests/curve25519_kunit.c b/lib/crypto/tests/curve25519_kunit.c new file mode 100644 index 000000000000..68eab75d40dc --- /dev/null +++ b/lib/crypto/tests/curve25519_kunit.c @@ -0,0 +1,1332 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include + +struct curve25519_test_vector { + u8 private[CURVE25519_KEY_SIZE]; + u8 public[CURVE25519_KEY_SIZE]; + u8 result[CURVE25519_KEY_SIZE]; + bool valid; +}; +static const struct curve25519_test_vector curve25519_test_vectors[] = { + { + .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, + 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, + 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, + 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, + .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, + 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, + 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, + 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, + .valid = true + }, + { + .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, + 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, + 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, + 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, + .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, + 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, + 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, + 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, + .valid = true + }, + { + .private = { 1 }, + .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, + 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, + 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, + 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, + .valid = true + }, + { + .private = { 1 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, + 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, + 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, + 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, + .valid = true + }, + { + .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, + .valid = true + }, + { + .private = { 1, 2, 3, 4 }, + .public = { 0 }, + .result = { 0 }, + .valid = false + }, + { + .private = { 2, 4, 6, 8 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, + .result = { 0 }, + .valid = false + }, + { + .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, + .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, + 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, + 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, + 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, + .valid = true + }, + { + .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, + .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, + 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, + 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, + 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, + .valid = true + }, + /* wycheproof - normal case */ + { + .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, + 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, + 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, + 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, + .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, + 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, + 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, + 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, + .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, + 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, + 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, + 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, + 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, + 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, + 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, + .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, + 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, + 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, + 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, + .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, + 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, + 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, + 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, + 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, + 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, + 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, + .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, + 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, + 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, + 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, + .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, + 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, + 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, + 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, + 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, + 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, + 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, + .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, + 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, + 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, + 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, + .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, + 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, + 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, + 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, + 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, + 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, + 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, + .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, + 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, + 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, + 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, + .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, + 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, + 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, + 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, + 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, + 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, + 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, + .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, + 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, + 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, + 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, + .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, + 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, + 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, + 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, + .valid = true + }, + /* wycheproof - public key = 0 */ + { + .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, + 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, + 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, + 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key = 1 */ + { + .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, + 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, + 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, + 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, + 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, + 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, + 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, + 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, + 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, + 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, + 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, + 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, + 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, + 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, + 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, + 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, + 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, + 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, + 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, + .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, + 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, + 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, + .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, + 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, + 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, + 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, + 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, + 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, + 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, + .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, + 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, + 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, + .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, + 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, + 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, + 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, + 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, + 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, + 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, + .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, + 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, + 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, + 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, + 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, + 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, + 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, + 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, + 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, + 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, + 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, + 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, + 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, + 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, + 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, + 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, + 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, + 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, + 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, + 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, + 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, + 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, + 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, + 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, + 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, + .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, + 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, + 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, + 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, + 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, + 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, + 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, + .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, + 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, + 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, + .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, + 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, + 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, + 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, + 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, + 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, + 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, + 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, + 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, + 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, + 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, + 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, + 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, + .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, + 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, + 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, + 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, + 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, + 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, + 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, + 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, + 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, + 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, + .valid = true + }, + /* wycheproof - public key with low order */ + { + .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, + 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, + 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, + 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, + 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, + 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, + 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, + 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, + 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, + 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, + 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, + 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, + 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, + 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, + 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, + 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, + 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, + 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, + 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, + 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, + 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, + 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, + 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, + 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, + 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, + 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, + 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, + 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, + 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, + 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, + 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, + 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, + 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, + 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, + 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, + 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, + 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, + 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, + 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, + 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, + 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, + 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, + 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, + 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, + 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, + 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, + 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, + 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, + 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, + 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, + 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, + 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, + 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, + 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, + 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, + 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, + 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, + 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, + 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, + 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, + 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, + 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, + 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, + 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, + 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, + 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, + 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, + 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, + 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, + 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, + 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, + 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, + 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, + 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, + 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, + 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, + 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, + 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, + 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, + 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, + 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, + 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, + .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, + 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, + 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, + 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, + 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, + 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, + 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, + .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, + 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, + 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, + 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, + 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, + 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, + 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, + .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, + 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, + 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, + 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, + 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, + 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, + 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, + 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, + 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, + 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, + 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, + 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, + 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, + 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, + 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, + 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, + 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, + 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, + 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, + 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, + 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, + 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, + 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, + 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, + 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, + 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, + 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, + 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, + 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, + 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, + 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, + 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, + 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, + 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, + 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, + 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, + 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, + 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, + 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, + 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, + .valid = true + }, + /* wycheproof - RFC 7748 */ + { + .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, + .valid = true + }, + /* wycheproof - RFC 7748 */ + { + .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, + 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, + 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, + 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, + .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, + 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, + 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, + 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, + .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, + 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, + 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, + 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, + 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, + 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, + 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, + .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, + 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, + 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, + 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, + .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, + 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, + 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, + 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, + .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, + 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, + 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, + 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, + .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, + 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, + 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, + 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, + .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, + 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, + 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, + 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, + .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, + 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, + 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, + 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, + .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, + 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, + 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, + 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, + .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, + 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, + 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, + 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, + .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, + 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, + 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, + 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, + .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, + 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, + 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, + 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, + .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, + 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, + 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, + 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, + .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, + 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, + 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, + 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, + 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, + 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, + 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, + 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, + 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, + 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, + .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, + 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, + 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, + 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, + 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, + 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, + 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, + .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, + 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, + 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, + 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, + 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, + 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, + 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, + .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, + 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, + 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, + 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, + 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, + 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, + 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, + .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, + 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, + 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, + 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, + 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, + 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, + 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, + .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, + 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, + 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, + 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, + .valid = true + }, + /* wycheproof - private key == -1 (mod order) */ + { + .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, + 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, + .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, + .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, + .valid = true + }, + /* wycheproof - private key == 1 (mod order) on twist */ + { + .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, + 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, + .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, + .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, + .valid = true + } +}; + +static void test_curve25519(struct kunit *test) +{ + for (size_t i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { + const struct curve25519_test_vector *vec = + &curve25519_test_vectors[i]; + u8 out[CURVE25519_KEY_SIZE] = {}; + bool ret; + + ret = curve25519(out, vec->private, vec->public); + KUNIT_EXPECT_EQ_MSG(test, ret, vec->valid, + "Wrong return value with test vector %zu", + i); + KUNIT_EXPECT_MEMEQ_MSG(test, out, vec->result, sizeof(out), + "Wrong output with test vector %zu", i); + } +} + +static void test_curve25519_basepoint(struct kunit *test) +{ + for (size_t i = 0; i < 5; ++i) { + u8 in[CURVE25519_KEY_SIZE]; + u8 out[CURVE25519_KEY_SIZE]; + u8 out2[CURVE25519_KEY_SIZE]; + bool ret, ret2; + + get_random_bytes(in, sizeof(in)); + ret = curve25519_generate_public(out, in); + ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); + KUNIT_EXPECT_EQ_MSG(test, ret, ret2, + "in=%*phN", CURVE25519_KEY_SIZE, in); + KUNIT_EXPECT_MEMEQ_MSG(test, out, out2, CURVE25519_KEY_SIZE, + "in=%*phN", CURVE25519_KEY_SIZE, in); + } +} + +static struct kunit_case curve25519_test_cases[] = { + KUNIT_CASE(test_curve25519), + KUNIT_CASE(test_curve25519_basepoint), + {}, +}; + +static struct kunit_suite curve25519_test_suite = { + .name = "curve25519", + .test_cases = curve25519_test_cases, +}; +kunit_test_suite(curve25519_test_suite); + +MODULE_DESCRIPTION("KUnit tests for Curve25519"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 643d79e531cc99da0dc5502dfccb6b3b305c65f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:19 -0700 Subject: lib/crypto: tests: Add Curve25519 benchmark Add a benchmark to curve25519_kunit. This brings it in line with the other crypto KUnit tests and provides an easy way to measure performance. Link: https://lore.kernel.org/r/20250906213523.84915-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/curve25519_kunit.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/tests/curve25519_kunit.c b/lib/crypto/tests/curve25519_kunit.c index 68eab75d40dc..248d05f66b35 100644 --- a/lib/crypto/tests/curve25519_kunit.c +++ b/lib/crypto/tests/curve25519_kunit.c @@ -5,6 +5,7 @@ #include #include +#include struct curve25519_test_vector { u8 private[CURVE25519_KEY_SIZE]; @@ -1316,9 +1317,39 @@ static void test_curve25519_basepoint(struct kunit *test) } } +static void benchmark_curve25519(struct kunit *test) +{ + const u8 *private = curve25519_test_vectors[0].private; + const u8 *public = curve25519_test_vectors[0].public; + const size_t warmup_niter = 5000; + const size_t benchmark_niter = 1024; + u8 out[CURVE25519_KEY_SIZE]; + bool ok = true; + u64 t; + + if (!IS_ENABLED(CONFIG_CRYPTO_LIB_BENCHMARK)) + kunit_skip(test, "not enabled"); + + /* Warm-up */ + for (size_t i = 0; i < warmup_niter; i++) + ok &= curve25519(out, private, public); + + /* Benchmark */ + preempt_disable(); + t = ktime_get_ns(); + for (size_t i = 0; i < benchmark_niter; i++) + ok &= curve25519(out, private, public); + t = ktime_get_ns() - t; + preempt_enable(); + KUNIT_EXPECT_TRUE(test, ok); + kunit_info(test, "%llu ops/s", + div64_u64((u64)benchmark_niter * NSEC_PER_SEC, t ?: 1)); +} + static struct kunit_case curve25519_test_cases[] = { KUNIT_CASE(test_curve25519), KUNIT_CASE(test_curve25519_basepoint), + KUNIT_CASE(benchmark_curve25519), {}, }; @@ -1328,5 +1359,5 @@ static struct kunit_suite curve25519_test_suite = { }; kunit_test_suite(curve25519_test_suite); -MODULE_DESCRIPTION("KUnit tests for Curve25519"); +MODULE_DESCRIPTION("KUnit tests and benchmark for Curve25519"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 8c06b330e8f79834924305362227e38e4e2469ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:20 -0700 Subject: lib/crypto: curve25519: Move a couple functions out-of-line Move curve25519() and curve25519_generate_public() from curve25519.h to curve25519.c. There's no good reason for them to be inline. Link: https://lore.kernel.org/r/20250906213523.84915-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/curve25519.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 25f16777865b..1b786389d714 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -10,8 +10,40 @@ */ #include -#include +#include +#include #include +#include + +bool __must_check +curve25519(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) + curve25519_arch(mypublic, secret, basepoint); + else + curve25519_generic(mypublic, secret, basepoint); + return crypto_memneq(mypublic, curve25519_null_point, + CURVE25519_KEY_SIZE); +} +EXPORT_SYMBOL(curve25519); + +bool __must_check +curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (unlikely(!crypto_memneq(secret, curve25519_null_point, + CURVE25519_KEY_SIZE))) + return false; + + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) + curve25519_base_arch(pub, secret); + else + curve25519_generic(pub, secret, curve25519_base_point); + return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); +} +EXPORT_SYMBOL(curve25519_generate_public); static int __init curve25519_init(void) { -- cgit v1.2.3 From 68546e5632c0b982663af575ae12cc5d81facc91 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:21 -0700 Subject: lib/crypto: curve25519: Consolidate into single module Reorganize the Curve25519 library code: - Build a single libcurve25519 module, instead of up to three modules: libcurve25519, libcurve25519-generic, and an arch-specific module. - Move the arch-specific Curve25519 code from arch/$(SRCARCH)/crypto/ to lib/crypto/$(SRCARCH)/. Centralize the build rules into lib/crypto/Makefile and lib/crypto/Kconfig. - Include the arch-specific code directly in lib/crypto/curve25519.c via a header, rather than using a separate .c file. - Eliminate the entanglement with CRYPTO. CRYPTO_LIB_CURVE25519 no longer selects CRYPTO, and the arch-specific Curve25519 code no longer depends on CRYPTO. This brings Curve25519 in line with the latest conventions for lib/crypto/, used by other algorithms. The exception is that I kept the generic code in separate translation units for now. (Some of the function names collide between the x86 and generic Curve25519 code. And the Curve25519 functions are very long anyway, so inlining doesn't matter as much for Curve25519 as it does for some other algorithms.) Link: https://lore.kernel.org/r/20250906213523.84915-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 37 +- lib/crypto/Makefile | 26 +- lib/crypto/arm/curve25519-core.S | 2062 +++++++++++++++++++++++++++ lib/crypto/arm/curve25519.h | 47 + lib/crypto/curve25519-generic.c | 25 - lib/crypto/curve25519.c | 50 +- lib/crypto/powerpc/curve25519-ppc64le_asm.S | 671 +++++++++ lib/crypto/powerpc/curve25519.h | 186 +++ lib/crypto/x86/curve25519.h | 1613 +++++++++++++++++++++ 9 files changed, 4645 insertions(+), 72 deletions(-) create mode 100644 lib/crypto/arm/curve25519-core.S create mode 100644 lib/crypto/arm/curve25519.h delete mode 100644 lib/crypto/curve25519-generic.c create mode 100644 lib/crypto/powerpc/curve25519-ppc64le_asm.S create mode 100644 lib/crypto/powerpc/curve25519.h create mode 100644 lib/crypto/x86/curve25519.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 37d85e0c9b97..eea17e36a22b 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -54,35 +54,24 @@ config CRYPTO_LIB_CHACHA_ARCH default y if S390 default y if X86_64 -config CRYPTO_ARCH_HAVE_LIB_CURVE25519 - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Curve25519 library interface, - either builtin or as a module. - -config CRYPTO_LIB_CURVE25519_GENERIC +config CRYPTO_LIB_CURVE25519 tristate select CRYPTO_LIB_UTILS help - This symbol can be depended upon by arch implementations of the - Curve25519 library interface that require the generic code as a - fallback, e.g., for SIMD implementations. If no arch specific - implementation is enabled, this implementation serves the users - of CRYPTO_LIB_CURVE25519. + The Curve25519 library functions. Select this if your module uses any + of the functions from . -config CRYPTO_LIB_CURVE25519_INTERNAL - tristate - select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n +config CRYPTO_LIB_CURVE25519_ARCH + bool + depends on CRYPTO_LIB_CURVE25519 && !UML && !KMSAN + default y if ARM && KERNEL_MODE_NEON + default y if PPC64 && CPU_LITTLE_ENDIAN + default y if X86_64 -config CRYPTO_LIB_CURVE25519 - tristate - select CRYPTO - select CRYPTO_LIB_CURVE25519_INTERNAL - help - Enable the Curve25519 library interface. This interface may be - fulfilled by either the generic implementation or an arch-specific - one, if one is available and enabled. +config CRYPTO_LIB_CURVE25519_GENERIC + bool + depends on CRYPTO_LIB_CURVE25519 + default y if !CRYPTO_LIB_CURVE25519_ARCH || ARM || X86_64 config CRYPTO_LIB_DES tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 6c3be971ace0..bded351aeace 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -76,17 +76,31 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o libchacha20poly1305-$(CONFIG_CRYPTO_SELFTESTS) += chacha20poly1305-selftest.o -obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o -libcurve25519-generic-y := curve25519-fiat32.o -libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o -libcurve25519-generic-y += curve25519-generic.o +################################################################################ + +obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o +libcurve25519-y := curve25519.o + +# Disable GCOV in odd or sensitive code +GCOV_PROFILE_curve25519.o := n + +ifeq ($(CONFIG_ARCH_SUPPORTS_INT128),y) +libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-hacl64.o +else +libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-fiat32.o +endif # clang versions prior to 18 may blow out the stack with KASAN ifeq ($(call clang-min-version, 180000),) KASAN_SANITIZE_curve25519-hacl64.o := n endif -obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o -libcurve25519-y += curve25519.o +ifeq ($(CONFIG_CRYPTO_LIB_CURVE25519_ARCH),y) +CFLAGS_curve25519.o += -I$(src)/$(SRCARCH) +libcurve25519-$(CONFIG_ARM) += arm/curve25519-core.o +libcurve25519-$(CONFIG_PPC) += powerpc/curve25519-ppc64le_asm.o +endif + +################################################################################ obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o libdes-y := des.o diff --git a/lib/crypto/arm/curve25519-core.S b/lib/crypto/arm/curve25519-core.S new file mode 100644 index 000000000000..b697fa5d059a --- /dev/null +++ b/lib/crypto/arm/curve25519-core.S @@ -0,0 +1,2062 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include + +.text +.arch armv7-a +.fpu neon +.align 4 + +ENTRY(curve25519_neon) + push {r4-r11, lr} + mov ip, sp + sub r3, sp, #704 + and r3, r3, #0xfffffff0 + mov sp, r3 + movw r4, #0 + movw r5, #254 + vmov.i32 q0, #1 + vshr.u64 q1, q0, #7 + vshr.u64 q0, q0, #8 + vmov.i32 d4, #19 + vmov.i32 d5, #38 + add r6, sp, #480 + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128] + add r6, r3, #0 + vmov.i32 q2, #0 + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 d4, [r6, : 64] + add r6, r3, #0 + movw r7, #960 + sub r7, r7, #2 + neg r7, r7 + sub r7, r7, r7, LSL #7 + str r7, [r6] + add r6, sp, #672 + vld1.8 {d4-d5}, [r1]! + vld1.8 {d6-d7}, [r1] + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d6-d7}, [r6, : 128] + sub r1, r6, #16 + ldrb r6, [r1] + and r6, r6, #248 + strb r6, [r1] + ldrb r6, [r1, #31] + and r6, r6, #127 + orr r6, r6, #64 + strb r6, [r1, #31] + vmov.i64 q2, #0xffffffff + vshr.u64 q3, q2, #7 + vshr.u64 q2, q2, #6 + vld1.8 {d8}, [r2] + vld1.8 {d10}, [r2] + add r2, r2, #6 + vld1.8 {d12}, [r2] + vld1.8 {d14}, [r2] + add r2, r2, #6 + vld1.8 {d16}, [r2] + add r2, r2, #4 + vld1.8 {d18}, [r2] + vld1.8 {d20}, [r2] + add r2, r2, #6 + vld1.8 {d22}, [r2] + add r2, r2, #2 + vld1.8 {d24}, [r2] + vld1.8 {d26}, [r2] + vshr.u64 q5, q5, #26 + vshr.u64 q6, q6, #3 + vshr.u64 q7, q7, #29 + vshr.u64 q8, q8, #6 + vshr.u64 q10, q10, #25 + vshr.u64 q11, q11, #3 + vshr.u64 q12, q12, #12 + vshr.u64 q13, q13, #38 + vand q4, q4, q2 + vand q6, q6, q2 + vand q8, q8, q2 + vand q10, q10, q2 + vand q2, q12, q2 + vand q5, q5, q3 + vand q7, q7, q3 + vand q9, q9, q3 + vand q11, q11, q3 + vand q3, q13, q3 + add r2, r3, #48 + vadd.i64 q12, q4, q1 + vadd.i64 q13, q10, q1 + vshr.s64 q12, q12, #26 + vshr.s64 q13, q13, #26 + vadd.i64 q5, q5, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q14, q5, q0 + vadd.i64 q11, q11, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q11, q0 + vsub.i64 q4, q4, q12 + vshr.s64 q12, q14, #25 + vsub.i64 q10, q10, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q14, q6, q1 + vadd.i64 q2, q2, q13 + vsub.i64 q5, q5, q12 + vshr.s64 q12, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q1 + vadd.i64 q7, q7, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q15, q7, q0 + vsub.i64 q11, q11, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q12 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q3, q0 + vadd.i64 q8, q8, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q15, q8, q1 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q7, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q9, q9, q12 + vtrn.32 d12, d14 + vshl.i64 q12, q12, #26 + vtrn.32 d13, d15 + vadd.i64 q0, q9, q0 + vadd.i64 q4, q4, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q6, q13, #4 + vsub.i64 q7, q8, q12 + vshr.s64 q0, q0, #25 + vadd.i64 q4, q4, q6 + vadd.i64 q6, q10, q0 + vshl.i64 q0, q0, #25 + vadd.i64 q8, q6, q1 + vadd.i64 q4, q4, q13 + vshl.i64 q10, q13, #25 + vadd.i64 q1, q4, q1 + vsub.i64 q0, q9, q0 + vshr.s64 q8, q8, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d14, d0 + vshr.s64 q1, q1, #26 + vtrn.32 d15, d1 + vadd.i64 q0, q11, q8 + vst1.8 d14, [r2, : 64] + vshl.i64 q7, q8, #26 + vadd.i64 q5, q5, q1 + vtrn.32 d4, d6 + vshl.i64 q1, q1, #26 + vtrn.32 d5, d7 + vsub.i64 q3, q6, q7 + add r2, r2, #16 + vsub.i64 q1, q4, q1 + vst1.8 d4, [r2, : 64] + vtrn.32 d6, d0 + vtrn.32 d7, d1 + sub r2, r2, #8 + vtrn.32 d2, d10 + vtrn.32 d3, d11 + vst1.8 d6, [r2, : 64] + sub r2, r2, #24 + vst1.8 d2, [r2, : 64] + add r2, r3, #96 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #144 + vmov.i32 q0, #0 + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #240 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #48 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d4, [r6, : 64] +.Lmainloop: + mov r2, r5, LSR #3 + and r6, r5, #7 + ldrb r2, [r1, r2] + mov r2, r2, LSR r6 + and r2, r2, #1 + str r5, [sp, #456] + eor r4, r4, r2 + str r2, [sp, #460] + neg r2, r4 + add r4, r3, #96 + add r5, r3, #192 + add r6, r3, #144 + vld1.8 {d8-d9}, [r4, : 128]! + add r7, r3, #240 + vld1.8 {d10-d11}, [r5, : 128]! + veor q6, q4, q5 + vld1.8 {d14-d15}, [r6, : 128]! + vdup.i32 q8, r2 + vld1.8 {d18-d19}, [r7, : 128]! + veor q10, q7, q9 + vld1.8 {d22-d23}, [r4, : 128]! + vand q6, q6, q8 + vld1.8 {d24-d25}, [r5, : 128]! + vand q10, q10, q8 + vld1.8 {d26-d27}, [r6, : 128]! + veor q4, q4, q6 + vld1.8 {d28-d29}, [r7, : 128]! + veor q5, q5, q6 + vld1.8 {d0}, [r4, : 64] + veor q6, q7, q10 + vld1.8 {d2}, [r5, : 64] + veor q7, q9, q10 + vld1.8 {d4}, [r6, : 64] + veor q9, q11, q12 + vld1.8 {d6}, [r7, : 64] + veor q10, q0, q1 + sub r2, r4, #32 + vand q9, q9, q8 + sub r4, r5, #32 + vand q10, q10, q8 + sub r5, r6, #32 + veor q11, q11, q9 + sub r6, r7, #32 + veor q0, q0, q10 + veor q9, q12, q9 + veor q1, q1, q10 + veor q10, q13, q14 + veor q12, q2, q3 + vand q10, q10, q8 + vand q8, q12, q8 + veor q12, q13, q10 + veor q2, q2, q8 + veor q10, q14, q10 + veor q3, q3, q8 + vadd.i32 q8, q4, q6 + vsub.i32 q4, q4, q6 + vst1.8 {d16-d17}, [r2, : 128]! + vadd.i32 q6, q11, q12 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q4, q11, q12 + vst1.8 {d12-d13}, [r2, : 128]! + vadd.i32 q6, q0, q2 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q0, q0, q2 + vst1.8 d12, [r2, : 64] + vadd.i32 q2, q5, q7 + vst1.8 d0, [r5, : 64] + vsub.i32 q0, q5, q7 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q9, q10 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q9, q10 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q1, q3 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q1, q3 + vst1.8 d4, [r4, : 64] + vst1.8 d0, [r6, : 64] + add r2, sp, #512 + add r4, r3, #96 + add r5, r3, #144 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #288 + vshl.i64 q12, q12, #25 + add r4, r3, #336 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #240 + add r4, r3, #96 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #144 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #192 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128] + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #144 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, r3, #288 + add r4, r3, #336 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vsub.i32 q1, q1, q2 + add r5, r3, #240 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vsub.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #144 + add r4, r3, #96 + add r5, r3, #144 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q2, q0, q1 + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d6-d7}, [r4, : 128]! + vsub.i32 q4, q1, q3 + vadd.i32 q1, q1, q3 + vld1.8 {d6}, [r2, : 64] + vld1.8 {d10}, [r4, : 64] + vsub.i32 q6, q3, q5 + vadd.i32 q3, q3, q5 + vst1.8 {d4-d5}, [r5, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d8-d9}, [r5, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d12, [r5, : 64] + vst1.8 d6, [r6, : 64] + add r2, r3, #0 + add r4, r3, #240 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #336 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #288 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #288 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, sp, #512 + add r4, r3, #144 + add r5, r3, #192 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #144 + vshl.i64 q12, q12, #25 + add r4, r3, #192 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #336 + add r4, r3, #288 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q1, q1, q2 + add r5, r3, #288 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vadd.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #48 + add r4, r3, #144 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #288 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #240 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #240 + vshl.i64 q7, q7, #25 + add r4, r3, #144 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + ldr r2, [sp, #456] + ldr r4, [sp, #460] + subs r5, r2, #1 + bge .Lmainloop + add r1, r3, #144 + add r2, r3, #336 + vld1.8 {d0-d1}, [r1, : 128]! + vld1.8 {d2-d3}, [r1, : 128]! + vld1.8 {d4}, [r1, : 64] + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 d4, [r2, : 64] + movw r1, #0 +.Linvertloop: + add r2, r3, #144 + movw r4, #0 + movw r5, #2 + cmp r1, #1 + moveq r5, #1 + addeq r2, r3, #336 + addeq r4, r3, #48 + cmp r1, #2 + moveq r5, #1 + addeq r2, r3, #48 + cmp r1, #3 + moveq r5, #5 + addeq r4, r3, #336 + cmp r1, #4 + moveq r5, #10 + cmp r1, #5 + moveq r5, #20 + cmp r1, #6 + moveq r5, #10 + addeq r2, r3, #336 + addeq r4, r3, #336 + cmp r1, #7 + moveq r5, #50 + cmp r1, #8 + moveq r5, #100 + cmp r1, #9 + moveq r5, #50 + addeq r2, r3, #336 + cmp r1, #10 + moveq r5, #5 + addeq r2, r3, #48 + cmp r1, #11 + moveq r5, #0 + addeq r2, r3, #96 + add r6, r3, #144 + add r7, r3, #288 + vld1.8 {d0-d1}, [r6, : 128]! + vld1.8 {d2-d3}, [r6, : 128]! + vld1.8 {d4}, [r6, : 64] + vst1.8 {d0-d1}, [r7, : 128]! + vst1.8 {d2-d3}, [r7, : 128]! + vst1.8 d4, [r7, : 64] + cmp r5, #0 + beq .Lskipsquaringloop +.Lsquaringloop: + add r6, r3, #288 + add r7, r3, #288 + add r8, r3, #288 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r7, : 128]! + vld1.8 {d6-d7}, [r7, : 128]! + vld1.8 {d9}, [r7, : 64] + vld1.8 {d10-d11}, [r6, : 128]! + add r7, sp, #384 + vld1.8 {d12-d13}, [r6, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r6, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r7, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r7, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r7, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r7, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r7, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r6, r7, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r6, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r6, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r6, r6, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r6, : 64] + sub r6, r6, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r7, sp, #480 + vld1.8 {d14-d15}, [r7, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r6, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r6, : 64]! + add r6, sp, #496 + vld1.8 {d20-d21}, [r6, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r6, r8, #8 + vst1.8 d0, [r6, : 64] + vsub.i64 d19, d19, d9 + add r6, r6, #16 + vst1.8 d16, [r6, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r6, r6, #8 + vst1.8 d6, [r6, : 64] + add r6, r6, #16 + vst1.8 d18, [r6, : 64] + vzip.i32 q2, q5 + sub r6, r6, #32 + vst1.8 d4, [r6, : 64] + subs r5, r5, #1 + bhi .Lsquaringloop +.Lskipsquaringloop: + mov r2, r2 + add r5, r3, #288 + add r6, r3, #144 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r5, : 128]! + vld1.8 {d6-d7}, [r5, : 128]! + vld1.8 {d9}, [r5, : 64] + vld1.8 {d10-d11}, [r2, : 128]! + add r5, sp, #384 + vld1.8 {d12-d13}, [r2, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r2, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r5, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r5, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r5, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r5, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r5, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r2, r5, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r2, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r2, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r2, r2, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r2, : 64] + sub r2, r2, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r5, sp, #480 + vld1.8 {d14-d15}, [r5, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r2, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r2, : 64]! + add r2, sp, #496 + vld1.8 {d20-d21}, [r2, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r2, r6, #8 + vst1.8 d0, [r2, : 64] + vsub.i64 d19, d19, d9 + add r2, r2, #16 + vst1.8 d16, [r2, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r2, r2, #8 + vst1.8 d6, [r2, : 64] + add r2, r2, #16 + vst1.8 d18, [r2, : 64] + vzip.i32 q2, q5 + sub r2, r2, #32 + vst1.8 d4, [r2, : 64] + cmp r4, #0 + beq .Lskippostcopy + add r2, r3, #144 + mov r4, r4 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskippostcopy: + cmp r1, #1 + bne .Lskipfinalcopy + add r2, r3, #288 + add r4, r3, #144 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskipfinalcopy: + add r1, r1, #1 + cmp r1, #12 + blo .Linvertloop + add r1, r3, #144 + ldr r2, [r1], #4 + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + ldr r1, [r1] + add r11, r1, r1, LSL #4 + add r11, r11, r1, LSL #1 + add r11, r11, #16777216 + mov r11, r11, ASR #25 + add r11, r11, r2 + mov r11, r11, ASR #26 + add r11, r11, r3 + mov r11, r11, ASR #25 + add r11, r11, r4 + mov r11, r11, ASR #26 + add r11, r11, r5 + mov r11, r11, ASR #25 + add r11, r11, r6 + mov r11, r11, ASR #26 + add r11, r11, r7 + mov r11, r11, ASR #25 + add r11, r11, r8 + mov r11, r11, ASR #26 + add r11, r11, r9 + mov r11, r11, ASR #25 + add r11, r11, r10 + mov r11, r11, ASR #26 + add r11, r11, r1 + mov r11, r11, ASR #25 + add r2, r2, r11 + add r2, r2, r11, LSL #1 + add r2, r2, r11, LSL #4 + mov r11, r2, ASR #26 + add r3, r3, r11 + sub r2, r2, r11, LSL #26 + mov r11, r3, ASR #25 + add r4, r4, r11 + sub r3, r3, r11, LSL #25 + mov r11, r4, ASR #26 + add r5, r5, r11 + sub r4, r4, r11, LSL #26 + mov r11, r5, ASR #25 + add r6, r6, r11 + sub r5, r5, r11, LSL #25 + mov r11, r6, ASR #26 + add r7, r7, r11 + sub r6, r6, r11, LSL #26 + mov r11, r7, ASR #25 + add r8, r8, r11 + sub r7, r7, r11, LSL #25 + mov r11, r8, ASR #26 + add r9, r9, r11 + sub r8, r8, r11, LSL #26 + mov r11, r9, ASR #25 + add r10, r10, r11 + sub r9, r9, r11, LSL #25 + mov r11, r10, ASR #26 + add r1, r1, r11 + sub r10, r10, r11, LSL #26 + mov r11, r1, ASR #25 + sub r1, r1, r11, LSL #25 + add r2, r2, r3, LSL #26 + mov r3, r3, LSR #6 + add r3, r3, r4, LSL #19 + mov r4, r4, LSR #13 + add r4, r4, r5, LSL #13 + mov r5, r5, LSR #19 + add r5, r5, r6, LSL #6 + add r6, r7, r8, LSL #25 + mov r7, r8, LSR #7 + add r7, r7, r9, LSL #19 + mov r8, r9, LSR #13 + add r8, r8, r10, LSL #12 + mov r9, r10, LSR #20 + add r1, r9, r1, LSL #6 + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r1, [r0, #28] + movw r0, #0 + mov sp, ip + pop {r4-r11, pc} +ENDPROC(curve25519_neon) diff --git a/lib/crypto/arm/curve25519.h b/lib/crypto/arm/curve25519.h new file mode 100644 index 000000000000..f6d66494eb8f --- /dev/null +++ b/lib/crypto/arm/curve25519.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], + const u8 scalar[CURVE25519_KEY_SIZE], + const u8 point[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + curve25519_neon(out, scalar, point); + kernel_neon_end(); + } else { + curve25519_generic(out, scalar, point); + } +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_arch(pub, secret, curve25519_base_point); +} + +#define curve25519_mod_init_arch curve25519_mod_init_arch +static void curve25519_mod_init_arch(void) +{ + if (elf_hwcap & HWCAP_NEON) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/curve25519-generic.c b/lib/crypto/curve25519-generic.c deleted file mode 100644 index f8aa70c9f559..000000000000 --- a/lib/crypto/curve25519-generic.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - * - * This is an implementation of the Curve25519 ECDH algorithm, using either - * a 32-bit implementation or a 64-bit implementation with 128-bit integers, - * depending on what is supported by the target compiler. - * - * Information: https://cr.yp.to/ecdh.html - */ - -#include -#include -#include - -const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; -const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; - -EXPORT_SYMBOL(curve25519_null_point); -EXPORT_SYMBOL(curve25519_base_point); -EXPORT_SYMBOL(curve25519_generic); - -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Curve25519 scalar multiplication"); -MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 1b786389d714..01e265dfbcd9 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -2,8 +2,9 @@ /* * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. * - * This is an implementation of the Curve25519 ECDH algorithm, using either - * a 32-bit implementation or a 64-bit implementation with 128-bit integers, + * This is an implementation of the Curve25519 ECDH algorithm, using either an + * architecture-optimized implementation or a generic implementation. The + * generic implementation is either 32-bit, or 64-bit with 128-bit integers, * depending on what is supported by the target compiler. * * Information: https://cr.yp.to/ecdh.html @@ -15,15 +16,32 @@ #include #include +static const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; +static const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; + +#ifdef CONFIG_CRYPTO_LIB_CURVE25519_ARCH +#include "curve25519.h" /* $(SRCARCH)/curve25519.h */ +#else +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + curve25519_generic(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_generic(pub, secret, curve25519_base_point); +} +#endif + bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) - curve25519_arch(mypublic, secret, basepoint); - else - curve25519_generic(mypublic, secret, basepoint); + curve25519_arch(mypublic, secret, basepoint); return crypto_memneq(mypublic, curve25519_null_point, CURVE25519_KEY_SIZE); } @@ -36,27 +54,25 @@ curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], if (unlikely(!crypto_memneq(secret, curve25519_null_point, CURVE25519_KEY_SIZE))) return false; - - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) - curve25519_base_arch(pub, secret); - else - curve25519_generic(pub, secret, curve25519_base_point); + curve25519_base_arch(pub, secret); return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); } EXPORT_SYMBOL(curve25519_generate_public); -static int __init curve25519_init(void) +#ifdef curve25519_mod_init_arch +static int __init curve25519_mod_init(void) { + curve25519_mod_init_arch(); return 0; } +subsys_initcall(curve25519_mod_init); -static void __exit curve25519_exit(void) +static void __exit curve25519_mod_exit(void) { } - -module_init(curve25519_init); -module_exit(curve25519_exit); +module_exit(curve25519_mod_exit); +#endif MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Curve25519 scalar multiplication"); +MODULE_DESCRIPTION("Curve25519 algorithm"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/powerpc/curve25519-ppc64le_asm.S b/lib/crypto/powerpc/curve25519-ppc64le_asm.S new file mode 100644 index 000000000000..06c1febe24b9 --- /dev/null +++ b/lib/crypto/powerpc/curve25519-ppc64le_asm.S @@ -0,0 +1,671 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +# +# This code is taken from CRYPTOGAMs[1] and is included here using the option +# in the license to distribute the code under the GPL. Therefore this program +# is free software; you can redistribute it and/or modify it under the terms of +# the GNU General Public License version 2 as published by the Free Software +# Foundation. +# +# [1] https://github.com/dot-asm/cryptogams/ + +# Copyright (c) 2006-2017, CRYPTOGAMS by +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the CRYPTOGAMS nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see https://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# +# ==================================================================== +# Written and Modified by Danny Tsen +# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes +# and x25519_cswap +# +# Copyright 2024- IBM Corp. +# +# X25519 lower-level primitives for PPC64. +# + +#include + +.text + +.align 5 +SYM_FUNC_START(x25519_fe51_mul) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 6,0(5) + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mulld 22,7,6 + mulhdu 23,7,6 + + mulld 24,8,6 + mulhdu 25,8,6 + + mulld 30,11,6 + mulhdu 31,11,6 + ld 4,8(5) + mulli 11,11,19 + + mulld 26,9,6 + mulhdu 27,9,6 + + mulld 28,10,6 + mulhdu 29,10,6 + mulld 12,11,4 + mulhdu 21,11,4 + addc 22,22,12 + adde 23,23,21 + + mulld 12,7,4 + mulhdu 21,7,4 + addc 24,24,12 + adde 25,25,21 + + mulld 12,10,4 + mulhdu 21,10,4 + ld 6,16(5) + mulli 10,10,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,8,4 + mulhdu 21,8,4 + addc 26,26,12 + adde 27,27,21 + + mulld 12,9,4 + mulhdu 21,9,4 + addc 28,28,12 + adde 29,29,21 + mulld 12,10,6 + mulhdu 21,10,6 + addc 22,22,12 + adde 23,23,21 + + mulld 12,11,6 + mulhdu 21,11,6 + addc 24,24,12 + adde 25,25,21 + + mulld 12,9,6 + mulhdu 21,9,6 + ld 4,24(5) + mulli 9,9,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,7,6 + mulhdu 21,7,6 + addc 26,26,12 + adde 27,27,21 + + mulld 12,8,6 + mulhdu 21,8,6 + addc 28,28,12 + adde 29,29,21 + mulld 12,9,4 + mulhdu 21,9,4 + addc 22,22,12 + adde 23,23,21 + + mulld 12,10,4 + mulhdu 21,10,4 + addc 24,24,12 + adde 25,25,21 + + mulld 12,8,4 + mulhdu 21,8,4 + ld 6,32(5) + mulli 8,8,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,11,4 + mulhdu 21,11,4 + addc 26,26,12 + adde 27,27,21 + + mulld 12,7,4 + mulhdu 21,7,4 + addc 28,28,12 + adde 29,29,21 + mulld 12,8,6 + mulhdu 21,8,6 + addc 22,22,12 + adde 23,23,21 + + mulld 12,9,6 + mulhdu 21,9,6 + addc 24,24,12 + adde 25,25,21 + + mulld 12,10,6 + mulhdu 21,10,6 + addc 26,26,12 + adde 27,27,21 + + mulld 12,11,6 + mulhdu 21,11,6 + addc 28,28,12 + adde 29,29,21 + + mulld 12,7,6 + mulhdu 21,7,6 + addc 30,30,12 + adde 31,31,21 + +.Lfe51_reduce: + li 0,-1 + srdi 0,0,13 + + srdi 12,26,51 + and 9,26,0 + insrdi 12,27,51,0 + srdi 21,22,51 + and 7,22,0 + insrdi 21,23,51,0 + addc 28,28,12 + addze 29,29 + addc 24,24,21 + addze 25,25 + + srdi 12,28,51 + and 10,28,0 + insrdi 12,29,51,0 + srdi 21,24,51 + and 8,24,0 + insrdi 21,25,51,0 + addc 30,30,12 + addze 31,31 + add 9,9,21 + + srdi 12,30,51 + and 11,30,0 + insrdi 12,31,51,0 + mulli 12,12,19 + + add 7,7,12 + + srdi 21,9,51 + and 9,9,0 + add 10,10,21 + + srdi 12,7,51 + and 7,7,0 + add 8,8,12 + + std 9,16(3) + std 10,24(3) + std 11,32(3) + std 7,0(3) + std 8,8(3) + + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + addi 1,1,144 + blr +SYM_FUNC_END(x25519_fe51_mul) + +.align 5 +SYM_FUNC_START(x25519_fe51_sqr) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + add 6,7,7 + mulli 21,11,19 + + mulld 22,7,7 + mulhdu 23,7,7 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + add 6,8,8 + mulld 12,11,21 + mulhdu 11,11,21 + addc 28,28,12 + adde 29,29,11 + + mulli 5,10,19 + + mulld 12,8,8 + mulhdu 11,8,8 + addc 26,26,12 + adde 27,27,11 + mulld 12,9,6 + mulhdu 11,9,6 + addc 28,28,12 + adde 29,29,11 + mulld 12,10,6 + mulhdu 11,10,6 + addc 30,30,12 + adde 31,31,11 + mulld 12,21,6 + mulhdu 11,21,6 + add 6,10,10 + addc 22,22,12 + adde 23,23,11 + mulld 12,10,5 + mulhdu 10,10,5 + addc 24,24,12 + adde 25,25,10 + mulld 12,6,21 + mulhdu 10,6,21 + add 6,9,9 + addc 26,26,12 + adde 27,27,10 + + mulld 12,9,9 + mulhdu 10,9,9 + addc 30,30,12 + adde 31,31,10 + mulld 12,5,6 + mulhdu 10,5,6 + addc 22,22,12 + adde 23,23,10 + mulld 12,21,6 + mulhdu 10,21,6 + addc 24,24,12 + adde 25,25,10 + + b .Lfe51_reduce +SYM_FUNC_END(x25519_fe51_sqr) + +.align 5 +SYM_FUNC_START(x25519_fe51_mul121666) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + lis 6,1 + ori 6,6,56130 + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mulld 22,7,6 + mulhdu 23,7,6 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + + b .Lfe51_reduce +SYM_FUNC_END(x25519_fe51_mul121666) + +.align 5 +SYM_FUNC_START(x25519_fe51_sqr_times) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mtctr 5 + +.Lsqr_times_loop: + add 6,7,7 + mulli 21,11,19 + + mulld 22,7,7 + mulhdu 23,7,7 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + add 6,8,8 + mulld 12,11,21 + mulhdu 11,11,21 + addc 28,28,12 + adde 29,29,11 + + mulli 5,10,19 + + mulld 12,8,8 + mulhdu 11,8,8 + addc 26,26,12 + adde 27,27,11 + mulld 12,9,6 + mulhdu 11,9,6 + addc 28,28,12 + adde 29,29,11 + mulld 12,10,6 + mulhdu 11,10,6 + addc 30,30,12 + adde 31,31,11 + mulld 12,21,6 + mulhdu 11,21,6 + add 6,10,10 + addc 22,22,12 + adde 23,23,11 + mulld 12,10,5 + mulhdu 10,10,5 + addc 24,24,12 + adde 25,25,10 + mulld 12,6,21 + mulhdu 10,6,21 + add 6,9,9 + addc 26,26,12 + adde 27,27,10 + + mulld 12,9,9 + mulhdu 10,9,9 + addc 30,30,12 + adde 31,31,10 + mulld 12,5,6 + mulhdu 10,5,6 + addc 22,22,12 + adde 23,23,10 + mulld 12,21,6 + mulhdu 10,21,6 + addc 24,24,12 + adde 25,25,10 + + # fe51_reduce + li 0,-1 + srdi 0,0,13 + + srdi 12,26,51 + and 9,26,0 + insrdi 12,27,51,0 + srdi 21,22,51 + and 7,22,0 + insrdi 21,23,51,0 + addc 28,28,12 + addze 29,29 + addc 24,24,21 + addze 25,25 + + srdi 12,28,51 + and 10,28,0 + insrdi 12,29,51,0 + srdi 21,24,51 + and 8,24,0 + insrdi 21,25,51,0 + addc 30,30,12 + addze 31,31 + add 9,9,21 + + srdi 12,30,51 + and 11,30,0 + insrdi 12,31,51,0 + mulli 12,12,19 + + add 7,7,12 + + srdi 21,9,51 + and 9,9,0 + add 10,10,21 + + srdi 12,7,51 + and 7,7,0 + add 8,8,12 + + bdnz .Lsqr_times_loop + + std 9,16(3) + std 10,24(3) + std 11,32(3) + std 7,0(3) + std 8,8(3) + + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + addi 1,1,144 + blr +SYM_FUNC_END(x25519_fe51_sqr_times) + +.align 5 +SYM_FUNC_START(x25519_fe51_frombytes) + + li 12, -1 + srdi 12, 12, 13 # 0x7ffffffffffff + + ld 5, 0(4) + ld 6, 8(4) + ld 7, 16(4) + ld 8, 24(4) + + srdi 10, 5, 51 + and 5, 5, 12 # h0 + + sldi 11, 6, 13 + or 11, 10, 11 # h1t + srdi 10, 6, 38 + and 6, 11, 12 # h1 + + sldi 11, 7, 26 + or 10, 10, 11 # h2t + + srdi 11, 7, 25 + and 7, 10, 12 # h2 + sldi 10, 8, 39 + or 11, 11, 10 # h3t + + srdi 9, 8, 12 + and 8, 11, 12 # h3 + and 9, 9, 12 # h4 + + std 5, 0(3) + std 6, 8(3) + std 7, 16(3) + std 8, 24(3) + std 9, 32(3) + + blr +SYM_FUNC_END(x25519_fe51_frombytes) + +.align 5 +SYM_FUNC_START(x25519_fe51_tobytes) + + ld 5, 0(4) + ld 6, 8(4) + ld 7, 16(4) + ld 8, 24(4) + ld 9, 32(4) + + li 12, -1 + srdi 12, 12, 13 # 0x7ffffffffffff + + # Full reducuction + addi 10, 5, 19 + srdi 10, 10, 51 + add 10, 10, 6 + srdi 10, 10, 51 + add 10, 10, 7 + srdi 10, 10, 51 + add 10, 10, 8 + srdi 10, 10, 51 + add 10, 10, 9 + srdi 10, 10, 51 + + mulli 10, 10, 19 + add 5, 5, 10 + srdi 11, 5, 51 + add 6, 6, 11 + srdi 11, 6, 51 + add 7, 7, 11 + srdi 11, 7, 51 + add 8, 8, 11 + srdi 11, 8, 51 + add 9, 9, 11 + + and 5, 5, 12 + and 6, 6, 12 + and 7, 7, 12 + and 8, 8, 12 + and 9, 9, 12 + + sldi 10, 6, 51 + or 5, 5, 10 # s0 + + srdi 11, 6, 13 + sldi 10, 7, 38 + or 6, 11, 10 # s1 + + srdi 11, 7, 26 + sldi 10, 8, 25 + or 7, 11, 10 # s2 + + srdi 11, 8, 39 + sldi 10, 9, 12 + or 8, 11, 10 # s4 + + std 5, 0(3) + std 6, 8(3) + std 7, 16(3) + std 8, 24(3) + + blr +SYM_FUNC_END(x25519_fe51_tobytes) + +.align 5 +SYM_FUNC_START(x25519_cswap) + + li 7, 5 + neg 6, 5 + mtctr 7 + +.Lswap_loop: + ld 8, 0(3) + ld 9, 0(4) + xor 10, 8, 9 + and 10, 10, 6 + xor 11, 8, 10 + xor 12, 9, 10 + std 11, 0(3) + addi 3, 3, 8 + std 12, 0(4) + addi 4, 4, 8 + bdnz .Lswap_loop + + blr +SYM_FUNC_END(x25519_cswap) diff --git a/lib/crypto/powerpc/curve25519.h b/lib/crypto/powerpc/curve25519.h new file mode 100644 index 000000000000..dee6234c48e9 --- /dev/null +++ b/lib/crypto/powerpc/curve25519.h @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2024- IBM Corp. + * + * X25519 scalar multiplication with 51 bits limbs for PPC64le. + * Based on RFC7748 and AArch64 optimized implementation for X25519 + * - Algorithm 1 Scalar multiplication of a variable point + */ + +#include +#include +#include + +#include +#include + +typedef uint64_t fe51[5]; + +asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g); +asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f); +asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f); +asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n); +asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s); +asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h); +asmlinkage void x25519_cswap(fe51 p, fe51 q, unsigned int bit); + +#define fmul x25519_fe51_mul +#define fsqr x25519_fe51_sqr +#define fmul121666 x25519_fe51_mul121666 +#define fe51_tobytes x25519_fe51_tobytes + +static void fadd(fe51 h, const fe51 f, const fe51 g) +{ + h[0] = f[0] + g[0]; + h[1] = f[1] + g[1]; + h[2] = f[2] + g[2]; + h[3] = f[3] + g[3]; + h[4] = f[4] + g[4]; +} + +/* + * Prime = 2 ** 255 - 19, 255 bits + * (0x7fffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffed) + * + * Prime in 5 51-bit limbs + */ +static fe51 prime51 = { 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff}; + +static void fsub(fe51 h, const fe51 f, const fe51 g) +{ + h[0] = (f[0] + ((prime51[0] * 2))) - g[0]; + h[1] = (f[1] + ((prime51[1] * 2))) - g[1]; + h[2] = (f[2] + ((prime51[2] * 2))) - g[2]; + h[3] = (f[3] + ((prime51[3] * 2))) - g[3]; + h[4] = (f[4] + ((prime51[4] * 2))) - g[4]; +} + +static void fe51_frombytes(fe51 h, const uint8_t *s) +{ + /* + * Make sure 64-bit aligned. + */ + unsigned char sbuf[32+8]; + unsigned char *sb = PTR_ALIGN((void *)sbuf, 8); + + memcpy(sb, s, 32); + x25519_fe51_frombytes(h, sb); +} + +static void finv(fe51 o, const fe51 i) +{ + fe51 a0, b, c, t00; + + fsqr(a0, i); + x25519_fe51_sqr_times(t00, a0, 2); + + fmul(b, t00, i); + fmul(a0, b, a0); + + fsqr(t00, a0); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 5); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 10); + + fmul(c, t00, b); + x25519_fe51_sqr_times(t00, c, 20); + + fmul(t00, t00, c); + x25519_fe51_sqr_times(t00, t00, 10); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 50); + + fmul(c, t00, b); + x25519_fe51_sqr_times(t00, c, 100); + + fmul(t00, t00, c); + x25519_fe51_sqr_times(t00, t00, 50); + + fmul(t00, t00, b); + x25519_fe51_sqr_times(t00, t00, 5); + + fmul(o, t00, a0); +} + +static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]) +{ + fe51 x1, x2, z2, x3, z3; + uint8_t s[32]; + unsigned int swap = 0; + int i; + + memcpy(s, scalar, 32); + s[0] &= 0xf8; + s[31] &= 0x7f; + s[31] |= 0x40; + fe51_frombytes(x1, point); + + z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0; + x3[0] = x1[0]; + x3[1] = x1[1]; + x3[2] = x1[2]; + x3[3] = x1[3]; + x3[4] = x1[4]; + + x2[0] = z3[0] = 1; + x2[1] = z3[1] = 0; + x2[2] = z3[2] = 0; + x2[3] = z3[3] = 0; + x2[4] = z3[4] = 0; + + for (i = 254; i >= 0; --i) { + unsigned int k_t = 1 & (s[i / 8] >> (i & 7)); + fe51 a, b, c, d, e; + fe51 da, cb, aa, bb; + fe51 dacb_p, dacb_m; + + swap ^= k_t; + x25519_cswap(x2, x3, swap); + x25519_cswap(z2, z3, swap); + swap = k_t; + + fsub(b, x2, z2); // B = x_2 - z_2 + fadd(a, x2, z2); // A = x_2 + z_2 + fsub(d, x3, z3); // D = x_3 - z_3 + fadd(c, x3, z3); // C = x_3 + z_3 + + fsqr(bb, b); // BB = B^2 + fsqr(aa, a); // AA = A^2 + fmul(da, d, a); // DA = D * A + fmul(cb, c, b); // CB = C * B + + fsub(e, aa, bb); // E = AA - BB + fmul(x2, aa, bb); // x2 = AA * BB + fadd(dacb_p, da, cb); // DA + CB + fsub(dacb_m, da, cb); // DA - CB + + fmul121666(z3, e); // 121666 * E + fsqr(z2, dacb_m); // (DA - CB)^2 + fsqr(x3, dacb_p); // x3 = (DA + CB)^2 + fadd(b, bb, z3); // BB + 121666 * E + fmul(z3, x1, z2); // z3 = x1 * (DA - CB)^2 + fmul(z2, e, b); // z2 = e * (BB + (DA + CB)^2) + } + + finv(z2, z2); + fmul(x2, x2, z2); + fe51_tobytes(out, x2); +} + +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + curve25519_fe51(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_fe51(pub, secret, curve25519_base_point); +} diff --git a/lib/crypto/x86/curve25519.h b/lib/crypto/x86/curve25519.h new file mode 100644 index 000000000000..5c0b8408852d --- /dev/null +++ b/lib/crypto/x86/curve25519.h @@ -0,0 +1,1613 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + */ + +#include +#include +#include + +#include +#include + +static __always_inline u64 eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; +} + +static __always_inline u64 gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; +} + +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) +{ + u64 carry_r; + + asm volatile( + /* Clear registers to propagate the carry bit */ + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %k1, %k1;" + + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" + + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r"(f2), "=&r"(carry_r) + : "r"(out), "r"(f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc"); + + return carry_r; +} + +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + /* Step 2: Add carry*38 to the original sum */ + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes the field subtraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw subtraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Subtract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results: */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes the field multiplication of four-element f1 with value in f2 + * Requires f2 to be smaller than 2^17 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) +{ + register u64 f2_r asm("rdx") = f2; + + asm volatile( + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", + "memory", "cc"); +} + +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) +{ + asm volatile( + /* Transfer bit into CF flag */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Step 1: Compute all partial products */ + " movq 32(%0), %%rdx;" /* f[0] */ + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%0), %%rdx;" /* f[3] */ + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) +{ + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); +} + +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) +{ + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); +} + +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) +{ + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); +} + +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) +{ + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); +} + +static void finv(u64 *o, const u64 *i, u64 *tmp) +{ + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); +} + +static void store_felem(u64 *b, u64 *f) +{ + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 f31; + u64 top_bit; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void encode_point(u8 *o, const u64 *i) +{ + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); +} + +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) +{ + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; + } + } + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); +} + +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ + +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL +}; + +static void curve25519_ever64_base(u8 *out, const u8 *priv) +{ + u64 swap = 1; + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; + + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; + + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); + + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); + swap = swap ^ bit; + cswap2(swap, xz1, xz2); + swap = bit; + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); + ++j; + } + j = 0; + } + + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); + + memzero_explicit(tmp, sizeof(tmp)); +} + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); + +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64(mypublic, secret, basepoint); + else + curve25519_generic(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64_base(pub, secret); + else + curve25519_generic(pub, secret, curve25519_base_point); +} + +#define curve25519_mod_init_arch curve25519_mod_init_arch +static void curve25519_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) + static_branch_enable(&curve25519_use_bmi2_adx); +} -- cgit v1.2.3 From cb2d6b132a44a140aed3562ef932754d39ddccf3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:22 -0700 Subject: lib/crypto: tests: Enable Curve25519 test when CRYPTO_SELFTESTS Now that the Curve25519 library has been disentangled from CRYPTO, adding CRYPTO_SELFTESTS as a default value of CRYPTO_LIB_CURVE25519_KUNIT_TEST no longer causes a recursive kconfig dependency. Do this, which makes this option consistent with the other crypto KUnit test options in the same file. Link: https://lore.kernel.org/r/20250906213523.84915-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index eaca60d3e0a3..578af717e13a 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -13,7 +13,7 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST config CRYPTO_LIB_CURVE25519_KUNIT_TEST tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_CURVE25519 help -- cgit v1.2.3 From b3a7bb71bfcd56c8266af8cf2a5dee3802e7a449 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Aug 2025 09:43:57 -0700 Subject: KUnit: Introduce ffs()-family tests Add KUnit tests for ffs()-family bit scanning functions: ffs(), __ffs(), fls(), __fls(), fls64(), __ffs64(), and ffz(). The tests validate mathematical relationships (e.g. ffs(x) == __ffs(x) + 1), and test zero values, power-of-2 patterns, maximum values, and sparse bit patterns. Build and run tested with everything I could reach with KUnit: $ ./tools/testing/kunit/kunit.py run --arch=x86_64 ffs $ ./tools/testing/kunit/kunit.py run --arch=i386 ffs $ ./tools/testing/kunit/kunit.py run --arch=arm64 --make_options "CROSS_COMPILE=aarch64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=arm --make_options "CROSS_COMPILE=arm-linux-gnueabihf-" ffs $ ./tools/testing/kunit/kunit.py run --arch=powerpc ffs $ ./tools/testing/kunit/kunit.py run --arch=powerpc32 ffs $ ./tools/testing/kunit/kunit.py run --arch=powerpcle ffs $ ./tools/testing/kunit/kunit.py run --arch=riscv --make_options "CROSS_COMPILE=riscv64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=riscv32 --make_options "CROSS_COMPILE=riscv64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=s390 --make_options "CROSS_COMPILE=s390x-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=m68k ffs $ ./tools/testing/kunit/kunit.py run --arch=loongarch ffs $ ./tools/testing/kunit/kunit.py run --arch=mips --make_options "CROSS_COMPILE=mipsel-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=sparc --make_options "CROSS_COMPILE=sparc64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=sparc64 --make_options "CROSS_COMPILE=sparc64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=alpha --make_options "CROSS_COMPILE=alpha-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=sh --make_options "CROSS_COMPILE=sh4-linux-gnu-" ffs Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250804164417.1612371-1-kees@kernel.org Signed-off-by: Kees Cook --- lib/Kconfig.debug | 14 ++ lib/tests/Makefile | 1 + lib/tests/ffs_kunit.c | 526 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 541 insertions(+) create mode 100644 lib/tests/ffs_kunit.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dc0e0c6ed075..2391872c7b3c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2479,6 +2479,20 @@ config STRING_HELPERS_KUNIT_TEST depends on KUNIT default KUNIT_ALL_TESTS +config FFS_KUNIT_TEST + tristate "KUnit test ffs-family functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This builds KUnit tests for ffs-family bit manipulation functions + including ffs(), __ffs(), fls(), __fls(), fls64(), and __ffs64(). + + These tests validate mathematical correctness, edge case handling, + and cross-architecture consistency of bit scanning functions. + + For more information on KUnit and unit tests in general, + please refer to Documentation/dev-tools/kunit/. + config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" diff --git a/lib/tests/Makefile b/lib/tests/Makefile index fa6d728a8b5b..f7460831cfdd 100644 --- a/lib/tests/Makefile +++ b/lib/tests/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_BLACKHOLE_DEV_KUNIT_TEST) += blackhole_dev_kunit.o obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o +obj-$(CONFIG_FFS_KUNIT_TEST) += ffs_kunit.o CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced) CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-overread) CFLAGS_fortify_kunit.o += $(call cc-disable-warning, stringop-truncation) diff --git a/lib/tests/ffs_kunit.c b/lib/tests/ffs_kunit.c new file mode 100644 index 000000000000..ed11456b116e --- /dev/null +++ b/lib/tests/ffs_kunit.c @@ -0,0 +1,526 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for ffs()-family functions + */ +#include +#include + +/* + * Test data structures + */ +struct ffs_test_case { + unsigned long input; + int expected_ffs; /* ffs() result (1-based) */ + int expected_fls; /* fls() result (1-based) */ + const char *description; +}; + +struct ffs64_test_case { + u64 input; + int expected_fls64; /* fls64() result (1-based) */ + unsigned int expected_ffs64_0based; /* __ffs64() result (0-based) */ + const char *description; +}; + +/* + * Basic edge cases - core functionality validation + */ +static const struct ffs_test_case basic_test_cases[] = { + /* Zero case - special handling */ + {0x00000000, 0, 0, "zero value"}, + + /* Single bit patterns - powers of 2 */ + {0x00000001, 1, 1, "bit 0 set"}, + {0x00000002, 2, 2, "bit 1 set"}, + {0x00000004, 3, 3, "bit 2 set"}, + {0x00000008, 4, 4, "bit 3 set"}, + {0x00000010, 5, 5, "bit 4 set"}, + {0x00000020, 6, 6, "bit 5 set"}, + {0x00000040, 7, 7, "bit 6 set"}, + {0x00000080, 8, 8, "bit 7 set"}, + {0x00000100, 9, 9, "bit 8 set"}, + {0x00008000, 16, 16, "bit 15 set"}, + {0x00010000, 17, 17, "bit 16 set"}, + {0x40000000, 31, 31, "bit 30 set"}, + {0x80000000, 32, 32, "bit 31 set (sign bit)"}, + + /* Maximum values */ + {0xFFFFFFFF, 1, 32, "all bits set"}, + + /* Multiple bit patterns */ + {0x00000003, 1, 2, "bits 0-1 set"}, + {0x00000007, 1, 3, "bits 0-2 set"}, + {0x0000000F, 1, 4, "bits 0-3 set"}, + {0x000000FF, 1, 8, "bits 0-7 set"}, + {0x0000FFFF, 1, 16, "bits 0-15 set"}, + {0x7FFFFFFF, 1, 31, "bits 0-30 set"}, + + /* Sparse patterns */ + {0x00000101, 1, 9, "bits 0,8 set"}, + {0x00001001, 1, 13, "bits 0,12 set"}, + {0x80000001, 1, 32, "bits 0,31 set"}, + {0x40000002, 2, 31, "bits 1,30 set"}, +}; + +/* + * 64-bit test cases + */ +static const struct ffs64_test_case ffs64_test_cases[] = { + /* Zero case */ + {0x0000000000000000ULL, 0, 0, "zero value"}, + + /* Single bit patterns */ + {0x0000000000000001ULL, 1, 0, "bit 0 set"}, + {0x0000000000000002ULL, 2, 1, "bit 1 set"}, + {0x0000000000000004ULL, 3, 2, "bit 2 set"}, + {0x0000000000000008ULL, 4, 3, "bit 3 set"}, + {0x0000000000008000ULL, 16, 15, "bit 15 set"}, + {0x0000000000010000ULL, 17, 16, "bit 16 set"}, + {0x0000000080000000ULL, 32, 31, "bit 31 set"}, + {0x0000000100000000ULL, 33, 32, "bit 32 set"}, + {0x0000000200000000ULL, 34, 33, "bit 33 set"}, + {0x4000000000000000ULL, 63, 62, "bit 62 set"}, + {0x8000000000000000ULL, 64, 63, "bit 63 set (sign bit)"}, + + /* Maximum values */ + {0xFFFFFFFFFFFFFFFFULL, 64, 0, "all bits set"}, + + /* Cross 32-bit boundary patterns */ + {0x00000000FFFFFFFFULL, 32, 0, "lower 32 bits set"}, + {0xFFFFFFFF00000000ULL, 64, 32, "upper 32 bits set"}, + {0x8000000000000001ULL, 64, 0, "bits 0,63 set"}, + {0x4000000000000002ULL, 63, 1, "bits 1,62 set"}, + + /* Mixed patterns */ + {0x00000001FFFFFFFFULL, 33, 0, "bit 32 + lower 32 bits"}, + {0xFFFFFFFF80000000ULL, 64, 31, "upper 32 bits + bit 31"}, +}; + +/* + * Helper function to validate ffs results with detailed error messages + */ +static void validate_ffs_result(struct kunit *test, unsigned long input, + int actual, int expected, const char *func_name, + const char *description) +{ + KUNIT_EXPECT_EQ_MSG(test, actual, expected, + "%s(0x%08lx) [%s]: expected %d, got %d", + func_name, input, description, expected, actual); +} + +/* + * Helper function to validate 64-bit ffs results + */ +static void validate_ffs64_result(struct kunit *test, u64 input, + int actual, int expected, const char *func_name, + const char *description) +{ + KUNIT_EXPECT_EQ_MSG(test, actual, expected, + "%s(0x%016llx) [%s]: expected %d, got %d", + func_name, input, description, expected, actual); +} + +/* + * Helper function to validate mathematical relationships between functions + */ +static void validate_ffs_relationships(struct kunit *test, unsigned long input) +{ + int ffs_result; + int fls_result; + unsigned int ffs_0based; + unsigned int fls_0based; + + if (input == 0) { + /* Special case: zero input */ + KUNIT_EXPECT_EQ(test, ffs(input), 0); + KUNIT_EXPECT_EQ(test, fls(input), 0); + /* __ffs and __fls are undefined for 0, but often return specific values */ + return; + } + + ffs_result = ffs(input); + fls_result = fls(input); + ffs_0based = __ffs(input); + fls_0based = __fls(input); + + /* Relationship: ffs(x) == __ffs(x) + 1 for x != 0 */ + KUNIT_EXPECT_EQ_MSG(test, ffs_result, ffs_0based + 1, + "ffs(0x%08lx) != __ffs(0x%08lx) + 1: %d != %u + 1", + input, input, ffs_result, ffs_0based); + + /* Relationship: fls(x) == __fls(x) + 1 for x != 0 */ + KUNIT_EXPECT_EQ_MSG(test, fls_result, fls_0based + 1, + "fls(0x%08lx) != __fls(0x%08lx) + 1: %d != %u + 1", + input, input, fls_result, fls_0based); + + /* Range validation */ + KUNIT_EXPECT_GE(test, ffs_result, 1); + KUNIT_EXPECT_LE(test, ffs_result, BITS_PER_LONG); + KUNIT_EXPECT_GE(test, fls_result, 1); + KUNIT_EXPECT_LE(test, fls_result, BITS_PER_LONG); +} + +/* + * Helper function to validate 64-bit relationships + */ +static void validate_ffs64_relationships(struct kunit *test, u64 input) +{ + int fls64_result; + unsigned int ffs64_0based; + + if (input == 0) { + KUNIT_EXPECT_EQ(test, fls64(input), 0); + return; + } + + fls64_result = fls64(input); + ffs64_0based = __ffs64(input); + + /* Range validation */ + KUNIT_EXPECT_GE(test, fls64_result, 1); + KUNIT_EXPECT_LE(test, fls64_result, 64); + KUNIT_EXPECT_LT(test, ffs64_0based, 64); + + /* + * Relationships with 32-bit functions should hold for small values + * on all architectures. + */ + if (input <= 0xFFFFFFFFULL) { + unsigned long input_32 = (unsigned long)input; + KUNIT_EXPECT_EQ_MSG(test, fls64(input), fls(input_32), + "fls64(0x%llx) != fls(0x%lx): %d != %d", + input, input_32, fls64(input), fls(input_32)); + + if (input != 0) { + KUNIT_EXPECT_EQ_MSG(test, __ffs64(input), __ffs(input_32), + "__ffs64(0x%llx) != __ffs(0x%lx): %lu != %lu", + input, input_32, + (unsigned long)__ffs64(input), + (unsigned long)__ffs(input_32)); + } + } +} + +/* + * Test basic correctness of all ffs-family functions + */ +static void ffs_basic_correctness_test(struct kunit *test) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(basic_test_cases); i++) { + const struct ffs_test_case *tc = &basic_test_cases[i]; + + /* Test ffs() */ + validate_ffs_result(test, tc->input, ffs(tc->input), + tc->expected_ffs, "ffs", tc->description); + + /* Test fls() */ + validate_ffs_result(test, tc->input, fls(tc->input), + tc->expected_fls, "fls", tc->description); + + /* Test __ffs() - skip zero case as it's undefined */ + if (tc->input != 0) { + /* Calculate expected __ffs() result: __ffs(x) == ffs(x) - 1 */ + unsigned int expected_ffs_0based = tc->expected_ffs - 1; + validate_ffs_result(test, tc->input, __ffs(tc->input), + expected_ffs_0based, "__ffs", tc->description); + } + + /* Test __fls() - skip zero case as it's undefined */ + if (tc->input != 0) { + /* Calculate expected __fls() result: __fls(x) == fls(x) - 1 */ + unsigned int expected_fls_0based = tc->expected_fls - 1; + validate_ffs_result(test, tc->input, __fls(tc->input), + expected_fls_0based, "__fls", tc->description); + } + } +} + +/* + * Test 64-bit function correctness + */ +static void ffs64_correctness_test(struct kunit *test) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ffs64_test_cases); i++) { + const struct ffs64_test_case *tc = &ffs64_test_cases[i]; + + /* Test fls64() */ + validate_ffs64_result(test, tc->input, fls64(tc->input), + tc->expected_fls64, "fls64", tc->description); + + /* Test __ffs64() - skip zero case as it's undefined */ + if (tc->input != 0) { + validate_ffs64_result(test, tc->input, __ffs64(tc->input), + tc->expected_ffs64_0based, "__ffs64", + tc->description); + } + } +} + +/* + * Test mathematical relationships between functions + */ +static void ffs_mathematical_relationships_test(struct kunit *test) +{ + int i; + + /* Test basic cases */ + for (i = 0; i < ARRAY_SIZE(basic_test_cases); i++) { + validate_ffs_relationships(test, basic_test_cases[i].input); + } + + /* Test 64-bit cases */ + for (i = 0; i < ARRAY_SIZE(ffs64_test_cases); i++) { + validate_ffs64_relationships(test, ffs64_test_cases[i].input); + } +} + +/* + * Test edge cases and boundary conditions + */ +static void ffs_edge_cases_test(struct kunit *test) +{ + unsigned long test_patterns[] = { + /* Powers of 2 */ + 1UL, 2UL, 4UL, 8UL, 16UL, 32UL, 64UL, 128UL, + 256UL, 512UL, 1024UL, 2048UL, 4096UL, 8192UL, + + /* Powers of 2 minus 1 */ + 1UL, 3UL, 7UL, 15UL, 31UL, 63UL, 127UL, 255UL, + 511UL, 1023UL, 2047UL, 4095UL, 8191UL, + + /* Boundary values */ + 0x7FFFFFFFUL, /* Maximum positive 32-bit */ + 0x80000000UL, /* Minimum negative 32-bit */ + 0xFFFFFFFFUL, /* Maximum 32-bit unsigned */ + }; + int i; + + for (i = 0; i < ARRAY_SIZE(test_patterns); i++) { + validate_ffs_relationships(test, test_patterns[i]); + } +} + +/* + * Test 64-bit edge cases + */ +static void ffs64_edge_cases_test(struct kunit *test) +{ + u64 test_patterns_64[] = { + /* 64-bit powers of 2 */ + 0x0000000100000000ULL, /* 2^32 */ + 0x0000000200000000ULL, /* 2^33 */ + 0x0000000400000000ULL, /* 2^34 */ + 0x0000001000000000ULL, /* 2^36 */ + 0x0000010000000000ULL, /* 2^40 */ + 0x0001000000000000ULL, /* 2^48 */ + 0x0100000000000000ULL, /* 2^56 */ + 0x4000000000000000ULL, /* 2^62 */ + 0x8000000000000000ULL, /* 2^63 */ + + /* Cross-boundary patterns */ + 0x00000000FFFFFFFFULL, /* Lower 32 bits */ + 0xFFFFFFFF00000000ULL, /* Upper 32 bits */ + 0x7FFFFFFFFFFFFFFFULL, /* Maximum positive 64-bit */ + 0xFFFFFFFFFFFFFFFFULL, /* Maximum 64-bit unsigned */ + }; + int i; + + for (i = 0; i < ARRAY_SIZE(test_patterns_64); i++) { + validate_ffs64_relationships(test, test_patterns_64[i]); + } +} + +/* + * ffz() test data - Find First Zero bit test cases + */ +struct ffz_test_case { + unsigned long input; + unsigned long expected_ffz; + const char *description; +}; + +static const struct ffz_test_case ffz_test_cases[] = { + /* Zero bits in specific positions */ + {0xFFFFFFFE, 0, "bit 0 is zero"}, /* ...11111110 */ + {0xFFFFFFFD, 1, "bit 1 is zero"}, /* ...11111101 */ + {0xFFFFFFFB, 2, "bit 2 is zero"}, /* ...11111011 */ + {0xFFFFFFF7, 3, "bit 3 is zero"}, /* ...11110111 */ + {0xFFFFFFEF, 4, "bit 4 is zero"}, /* ...11101111 */ + {0xFFFFFFDF, 5, "bit 5 is zero"}, /* ...11011111 */ + {0xFFFFFFBF, 6, "bit 6 is zero"}, /* ...10111111 */ + {0xFFFFFF7F, 7, "bit 7 is zero"}, /* ...01111111 */ + {0xFFFFFEFF, 8, "bit 8 is zero"}, /* Gap in bit 8 */ + {0xFFFF7FFF, 15, "bit 15 is zero"}, /* Gap in bit 15 */ + {0xFFFEFFFF, 16, "bit 16 is zero"}, /* Gap in bit 16 */ + {0xBFFFFFFF, 30, "bit 30 is zero"}, /* Gap in bit 30 */ + {0x7FFFFFFF, 31, "bit 31 is zero"}, /* 01111111... */ + + /* Multiple zero patterns */ + {0xFFFFFFFC, 0, "bits 0-1 are zero"}, /* ...11111100 */ + {0xFFFFFFF8, 0, "bits 0-2 are zero"}, /* ...11111000 */ + {0xFFFFFFF0, 0, "bits 0-3 are zero"}, /* ...11110000 */ + {0xFFFFFF00, 0, "bits 0-7 are zero"}, /* ...00000000 */ + {0xFFFF0000, 0, "bits 0-15 are zero"}, /* Lower 16 bits zero */ + + /* All zeros (special case) */ + {0x00000000, 0, "all bits zero"}, + + /* Complex patterns */ + {0xFFFDFFFF, 17, "bit 17 is zero"}, /* Gap in bit 17 */ + {0xFFF7FFFF, 19, "bit 19 is zero"}, /* Gap in bit 19 */ + {0xF7FFFFFF, 27, "bit 27 is zero"}, /* Gap in bit 27 */ + {0xDFFFFFFF, 29, "bit 29 is zero"}, /* Gap in bit 29 */ +}; + +/* + * Test basic correctness of ffz() function + */ +static void ffz_basic_correctness_test(struct kunit *test) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ffz_test_cases); i++) { + const struct ffz_test_case *tc = &ffz_test_cases[i]; + unsigned long result = ffz(tc->input); + + KUNIT_EXPECT_EQ_MSG(test, result, tc->expected_ffz, + "ffz(0x%08lx) [%s]: expected %lu, got %lu", + tc->input, tc->description, tc->expected_ffz, result); + } +} + +/* + * Test mathematical relationships between ffz() and other functions + */ +static void validate_ffz_relationships(struct kunit *test, unsigned long input) +{ + unsigned long ffz_result; + + if (input == 0) { + /* ffz(0) should return 0 (first zero bit is at position 0) */ + KUNIT_EXPECT_EQ(test, ffz(input), 0); + return; + } + + if (input == ~0UL) { + /* ffz(~0) is undefined (no zero bits) - just verify it doesn't crash */ + ffz_result = ffz(input); + /* Implementation-defined behavior, just ensure it completes */ + return; + } + + ffz_result = ffz(input); + + /* Range validation - result should be within valid bit range */ + KUNIT_EXPECT_LT(test, ffz_result, BITS_PER_LONG); + + /* Verify the bit at ffz_result position is actually zero */ + KUNIT_EXPECT_EQ_MSG(test, (input >> ffz_result) & 1, 0, + "ffz(0x%08lx) = %lu, but bit %lu is not zero", + input, ffz_result, ffz_result); + + /* Core relationship: if we set the ffz bit, ffz should find a different bit */ + if (ffz_result < BITS_PER_LONG - 1) { + unsigned long modified = input | (1UL << ffz_result); + if (modified != ~0UL) { /* Skip if all bits would be set */ + unsigned long new_ffz = ffz(modified); + KUNIT_EXPECT_NE_MSG(test, new_ffz, ffz_result, + "ffz(0x%08lx) = %lu, but setting that bit doesn't change ffz result", + input, ffz_result); + } + } +} + +static void ffz_mathematical_relationships_test(struct kunit *test) +{ + unsigned long test_patterns[] = { + /* Powers of 2 with one bit clear */ + 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFB, 0xFFFFFFF7, + 0xFFFFFFEF, 0xFFFFFFDF, 0xFFFFFFBF, 0xFFFFFF7F, + + /* Multiple patterns */ + 0xFFFFFF00, 0xFFFFF000, 0xFFFF0000, 0xFFF00000, + 0x7FFFFFFF, 0x3FFFFFFF, 0x1FFFFFFF, 0x0FFFFFFF, + + /* Complex bit patterns */ + 0xAAAAAAAA, 0x55555555, 0xCCCCCCCC, 0x33333333, + 0xF0F0F0F0, 0x0F0F0F0F, 0xFF00FF00, 0x00FF00FF, + }; + int i; + + /* Test basic test cases */ + for (i = 0; i < ARRAY_SIZE(ffz_test_cases); i++) { + validate_ffz_relationships(test, ffz_test_cases[i].input); + } + + /* Test additional patterns */ + for (i = 0; i < ARRAY_SIZE(test_patterns); i++) { + validate_ffz_relationships(test, test_patterns[i]); + } +} + +/* + * Test edge cases and boundary conditions for ffz() + */ +static void ffz_edge_cases_test(struct kunit *test) +{ + unsigned long edge_patterns[] = { + /* Boundary values */ + 0x00000000, /* All zeros */ + 0x80000000, /* Only MSB set */ + 0x00000001, /* Only LSB set */ + 0x7FFFFFFF, /* MSB clear */ + 0xFFFFFFFE, /* LSB clear */ + + /* Powers of 2 complement patterns (one zero bit each) */ + ~(1UL << 0), ~(1UL << 1), ~(1UL << 2), ~(1UL << 3), + ~(1UL << 4), ~(1UL << 8), ~(1UL << 16), ~(1UL << 31), + + /* Walking zero patterns */ + 0xFFFFFFFE, 0xFFFFFFFD, 0xFFFFFFFB, 0xFFFFFFF7, + 0xFFFFFFEF, 0xFFFFFFDF, 0xFFFFFFBF, 0xFFFFFF7F, + 0xFFFFFEFF, 0xFFFFFDFF, 0xFFFFFBFF, 0xFFFFF7FF, + + /* Multiple zeros */ + 0xFFFFFF00, 0xFFFFF000, 0xFFFF0000, 0xFFF00000, + 0xFF000000, 0xF0000000, 0x00000000, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(edge_patterns); i++) { + validate_ffz_relationships(test, edge_patterns[i]); + } +} + +/* + * KUnit test case definitions + */ +static struct kunit_case ffs_test_cases[] = { + KUNIT_CASE(ffs_basic_correctness_test), + KUNIT_CASE(ffs64_correctness_test), + KUNIT_CASE(ffs_mathematical_relationships_test), + KUNIT_CASE(ffs_edge_cases_test), + KUNIT_CASE(ffs64_edge_cases_test), + KUNIT_CASE(ffz_basic_correctness_test), + KUNIT_CASE(ffz_mathematical_relationships_test), + KUNIT_CASE(ffz_edge_cases_test), + KUNIT_CASE(ffs_attribute_const_test), + {} +}; + +/* + * KUnit test suite definition + */ +static struct kunit_suite ffs_test_suite = { + .name = "ffs", + .test_cases = ffs_test_cases, +}; + +kunit_test_suites(&ffs_test_suite); + +MODULE_DESCRIPTION("KUnit tests for ffs()-family functions"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 6606c8c7e81886565f5cbdb0c0ce82e280c2b229 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Aug 2025 09:43:58 -0700 Subject: bitops: Add __attribute_const__ to generic ffs()-family implementations While tracking down a problem where constant expressions used by BUILD_BUG_ON() suddenly stopped working[1], we found that an added static initializer was convincing the compiler that it couldn't track the state of the prior statically initialized value. Tracing this down found that ffs() was used in the initializer macro, but since it wasn't marked with __attribute__const__, the compiler had to assume the function might change variable states as a side-effect (which is not true for ffs(), which provides deterministic math results). Add missing __attribute_const__ annotations to generic implementations of ffs(), __ffs(), fls(), and __fls() functions. These are pure mathematical functions that always return the same result for the same input with no side effects, making them eligible for compiler optimization. Build tested with x86_64 defconfig using GCC 14.2.0, which should validate the implementations when used by ARM, ARM64, LoongArch, Microblaze, NIOS2, and SPARC32 architectures. Link: https://github.com/KSPP/linux/issues/364 [1] Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250804164417.1612371-2-kees@kernel.org Signed-off-by: Kees Cook --- lib/clz_ctz.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c index fb8c0c5c2bd2..8778ec44bf63 100644 --- a/lib/clz_ctz.c +++ b/lib/clz_ctz.c @@ -15,28 +15,28 @@ #include int __weak __ctzsi2(int val); -int __weak __ctzsi2(int val) +int __weak __attribute_const__ __ctzsi2(int val) { return __ffs(val); } EXPORT_SYMBOL(__ctzsi2); int __weak __clzsi2(int val); -int __weak __clzsi2(int val) +int __weak __attribute_const__ __clzsi2(int val) { return 32 - fls(val); } EXPORT_SYMBOL(__clzsi2); int __weak __clzdi2(u64 val); -int __weak __clzdi2(u64 val) +int __weak __attribute_const__ __clzdi2(u64 val) { return 64 - fls64(val); } EXPORT_SYMBOL(__clzdi2); int __weak __ctzdi2(u64 val); -int __weak __ctzdi2(u64 val) +int __weak __attribute_const__ __ctzdi2(u64 val) { return __ffs64(val); } -- cgit v1.2.3 From 95719dfa323709c06ec34cc96e73e0788e19934f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 4 Aug 2025 09:44:13 -0700 Subject: KUnit: ffs: Validate all the __attribute_const__ annotations While tracking down a problem where constant expressions used by BUILD_BUG_ON() suddenly stopped working[1], we found that an added static initializer was convincing the compiler that it couldn't track the state of the prior statically initialized value. Tracing this down found that ffs() was used in the initializer macro, but since it wasn't marked with __attribute_const__, the compiler had to assume the function might change variable states as a side-effect (which is not true for ffs(), which provides deterministic math results). Validate all the __attibute_const__ annotations were found for all architectures by reproducing the specific problem encountered in the original bug report. Build and run tested with everything I could reach with KUnit: $ ./tools/testing/kunit/kunit.py run --arch=x86_64 ffs $ ./tools/testing/kunit/kunit.py run --arch=i386 ffs $ ./tools/testing/kunit/kunit.py run --arch=arm64 --make_options "CROSS_COMPILE=aarch64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=arm --make_options "CROSS_COMPILE=arm-linux-gnueabi-" ffs $ ./tools/testing/kunit/kunit.py run --arch=powerpc ffs $ ./tools/testing/kunit/kunit.py run --arch=powerpc32 ffs $ ./tools/testing/kunit/kunit.py run --arch=powerpcle ffs $ ./tools/testing/kunit/kunit.py run --arch=m68k ffs $ ./tools/testing/kunit/kunit.py run --arch=loongarch ffs $ ./tools/testing/kunit/kunit.py run --arch=s390 --make_options "CROSS_COMPILE=s390x-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=riscv --make_options "CROSS_COMPILE=riscv64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=riscv32 --make_options "CROSS_COMPILE=riscv64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=sparc --make_options "CROSS_COMPILE=sparc64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=sparc64 --make_options "CROSS_COMPILE=sparc64-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=alpha --make_options "CROSS_COMPILE=alpha-linux-gnu-" ffs $ ./tools/testing/kunit/kunit.py run --arch=sh --make_options "CROSS_COMPILE=sh4-linux-gnu-" ffs Closes: https://github.com/KSPP/linux/issues/364 Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250804164417.1612371-17-kees@kernel.org Signed-off-by: Kees Cook --- lib/tests/ffs_kunit.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/tests/ffs_kunit.c b/lib/tests/ffs_kunit.c index ed11456b116e..9a329cdc09c2 100644 --- a/lib/tests/ffs_kunit.c +++ b/lib/tests/ffs_kunit.c @@ -496,6 +496,46 @@ static void ffz_edge_cases_test(struct kunit *test) } } +/* + * To have useful build error output, split the tests into separate + * functions so it's clear which are missing __attribute_const__. + */ +#define CREATE_WRAPPER(func) \ +static noinline bool build_test_##func(void) \ +{ \ + int init_##func = 32; \ + int result_##func = func(6); \ + \ + /* Does the static initializer vanish after calling func? */ \ + BUILD_BUG_ON(init_##func < 32); \ + \ + /* "Consume" the results so optimizer doesn't drop them. */ \ + barrier_data(&init_##func); \ + barrier_data(&result_##func); \ + \ + return true; \ +} +CREATE_WRAPPER(ffs) +CREATE_WRAPPER(fls) +CREATE_WRAPPER(__ffs) +CREATE_WRAPPER(__fls) +CREATE_WRAPPER(ffz) +#undef CREATE_WRAPPER + +/* + * Make sure that __attribute_const__ has be applied to all the + * functions. This is a regression test for: + * https://github.com/KSPP/linux/issues/364 + */ +static void ffs_attribute_const_test(struct kunit *test) +{ + KUNIT_EXPECT_TRUE(test, build_test_ffs()); + KUNIT_EXPECT_TRUE(test, build_test_fls()); + KUNIT_EXPECT_TRUE(test, build_test___ffs()); + KUNIT_EXPECT_TRUE(test, build_test___fls()); + KUNIT_EXPECT_TRUE(test, build_test_ffz()); +} + /* * KUnit test case definitions */ -- cgit v1.2.3 From e8875795160b484f691938ce07a381e77821f947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 21 Aug 2025 15:28:11 +0200 Subject: lib: test_objpool: Avoid direct access to hrtimer clockbase MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The field timer->base->get_time is a private implementation detail and should not be accessed outside of the hrtimer core. Switch to the equivalent helper. Signed-off-by: Thomas Weißschuh Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250821-hrtimer-cleanup-get_time-v2-4-3ae822e5bfbd@linutronix.de --- lib/test_objpool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_objpool.c b/lib/test_objpool.c index 8f688187fa87..6a34a7582fdb 100644 --- a/lib/test_objpool.c +++ b/lib/test_objpool.c @@ -164,7 +164,7 @@ static enum hrtimer_restart ot_hrtimer_handler(struct hrtimer *hrt) /* do bulk-testings for objects pop/push */ item->worker(item, 1); - hrtimer_forward(hrt, hrt->base->get_time(), item->hrtcycle); + hrtimer_forward_now(hrt, item->hrtcycle); return HRTIMER_RESTART; } -- cgit v1.2.3 From b475272f03ca5d0c437c8f899ff229b21010ec83 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 27 Aug 2025 07:12:58 -0700 Subject: iov_iter: remove iov_iter_is_aligned No more callers. Signed-off-by: Keith Busch Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Mike Snitzer Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- lib/iov_iter.c | 95 ---------------------------------------------------------- 1 file changed, 95 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f9193f952f49..2fe66a6b8789 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -784,101 +784,6 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) } EXPORT_SYMBOL(iov_iter_discard); -static bool iov_iter_aligned_iovec(const struct iov_iter *i, unsigned addr_mask, - unsigned len_mask) -{ - const struct iovec *iov = iter_iov(i); - size_t size = i->count; - size_t skip = i->iov_offset; - - do { - size_t len = iov->iov_len - skip; - - if (len > size) - len = size; - if (len & len_mask) - return false; - if ((unsigned long)(iov->iov_base + skip) & addr_mask) - return false; - - iov++; - size -= len; - skip = 0; - } while (size); - - return true; -} - -static bool iov_iter_aligned_bvec(const struct iov_iter *i, unsigned addr_mask, - unsigned len_mask) -{ - const struct bio_vec *bvec = i->bvec; - unsigned skip = i->iov_offset; - size_t size = i->count; - - do { - size_t len = bvec->bv_len - skip; - - if (len > size) - len = size; - if (len & len_mask) - return false; - if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) - return false; - - bvec++; - size -= len; - skip = 0; - } while (size); - - return true; -} - -/** - * iov_iter_is_aligned() - Check if the addresses and lengths of each segments - * are aligned to the parameters. - * - * @i: &struct iov_iter to restore - * @addr_mask: bit mask to check against the iov element's addresses - * @len_mask: bit mask to check against the iov element's lengths - * - * Return: false if any addresses or lengths intersect with the provided masks - */ -bool iov_iter_is_aligned(const struct iov_iter *i, unsigned addr_mask, - unsigned len_mask) -{ - if (likely(iter_is_ubuf(i))) { - if (i->count & len_mask) - return false; - if ((unsigned long)(i->ubuf + i->iov_offset) & addr_mask) - return false; - return true; - } - - if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) - return iov_iter_aligned_iovec(i, addr_mask, len_mask); - - if (iov_iter_is_bvec(i)) - return iov_iter_aligned_bvec(i, addr_mask, len_mask); - - /* With both xarray and folioq types, we're dealing with whole folios. */ - if (iov_iter_is_xarray(i)) { - if (i->count & len_mask) - return false; - if ((i->xarray_start + i->iov_offset) & addr_mask) - return false; - } - if (iov_iter_is_folioq(i)) { - if (i->count & len_mask) - return false; - if (i->iov_offset & addr_mask) - return false; - } - - return true; -} -EXPORT_SYMBOL_GPL(iov_iter_is_aligned); - static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i) { const struct iovec *iov = iter_iov(i); -- cgit v1.2.3 From 2cd8231796b5e7133b1c3d66ad7d2a3c42c97258 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Wed, 6 Aug 2025 14:41:47 +0200 Subject: mm/slub: allow to set node and align in k[v]realloc Reimplement k[v]realloc_node() to be able to set node and alignment should a user need to do so. In order to do that while retaining the maximal backward compatibility, add k[v]realloc_node_align() functions and redefine the rest of API using these new ones. While doing that, we also keep the number of _noprof variants to a minimum, which implies some changes to the existing users of older _noprof functions, that basically being bcachefs. With that change we also provide the ability for the Rust part of the kernel to set node and alignment in its K[v]xxx [re]allocations. Link: https://lkml.kernel.org/r/20250806124147.1724658-1-vitaly.wool@konsulko.se Signed-off-by: Vitaly Wool Reviewed-by: Vlastimil Babka Cc: Alice Ryhl Cc: Danilo Krummrich Cc: Herbert Xu Cc: Jann Horn Cc: Kent Overstreet Cc: Liam Howlett Cc: Lorenzo Stoakes Cc: Uladzislau Rezki (Sony) Signed-off-by: Andrew Morton --- lib/rhashtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 3e555d012ed6..fde0f0e556f8 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -184,8 +184,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, static struct lock_class_key __key; tbl = alloc_hooks_tag(ht->alloc_tag, - kvmalloc_node_noprof(struct_size(tbl, buckets, nbuckets), - gfp|__GFP_ZERO, NUMA_NO_NODE)); + kvmalloc_node_align_noprof(struct_size(tbl, buckets, nbuckets), + 1, gfp|__GFP_ZERO, NUMA_NO_NODE)); size = nbuckets; -- cgit v1.2.3 From 9863124e0bfb94a0c1e84186d2f56f400b2e0112 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Mon, 4 Aug 2025 21:00:17 +0800 Subject: xarray: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. No functional changes. Link: https://lkml.kernel.org/r/20250804130018.484321-1-rongqianfeng@vivo.com Signed-off-by: Qianfeng Rong Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- lib/xarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/xarray.c b/lib/xarray.c index ae3d80f4b4ee..9a8b4916540c 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -370,7 +370,7 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift) if (node) { xas->xa_alloc = NULL; } else { - gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN; + gfp_t gfp = GFP_NOWAIT; if (xas->xa->xa_flags & XA_FLAGS_ACCOUNT) gfp |= __GFP_ACCOUNT; -- cgit v1.2.3 From 950c31e8f1280c45697e9353e2d4973ff79312bf Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Mon, 11 Aug 2025 11:25:09 +0300 Subject: lib/test_kho: fixes for error handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update kho_test_save() so that folios array won't be freed when returning from the function and the fdt will be freed on error * Reset state->nr_folios to 0 in kho_test_generate_data() on error * Simplify allocation of folios info in fdt. Link: https://lkml.kernel.org/r/20250811082510.4154080-3-rppt@kernel.org Fixes: b753522bed0b ("kho: add test for kexec handover") Signed-off-by: Mike Rapoport (Microsoft) Reported-by: Pratyush Yadav Closes: https://lore.kernel.org/all/mafs0zfcjcepf.fsf@kernel.org Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Baoquan He Cc: Changyuan Lyu Cc: Pasha Tatashin Cc: Shuah Khan Cc: Thomas Weißschuh Signed-off-by: Andrew Morton --- lib/test_kho.c | 52 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/test_kho.c b/lib/test_kho.c index c2eb899c3b45..fe8504e3407b 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -67,13 +67,20 @@ static struct notifier_block kho_test_nb = { static int kho_test_save_data(struct kho_test_state *state, void *fdt) { - phys_addr_t *folios_info __free(kvfree) = NULL; + phys_addr_t *folios_info; int err = 0; - folios_info = kvmalloc_array(state->nr_folios, sizeof(*folios_info), - GFP_KERNEL); - if (!folios_info) - return -ENOMEM; + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property_placeholder(fdt, "folios_info", + state->nr_folios * sizeof(*folios_info), + (void **)&folios_info); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + if (err) + return err; for (int i = 0; i < state->nr_folios; i++) { struct folio *folio = state->folios[i]; @@ -83,17 +90,9 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt) err = kho_preserve_folio(folio); if (err) - return err; + break; } - err |= fdt_begin_node(fdt, "data"); - err |= fdt_property(fdt, "nr_folios", &state->nr_folios, - sizeof(state->nr_folios)); - err |= fdt_property(fdt, "folios_info", folios_info, - state->nr_folios * sizeof(*folios_info)); - err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); - err |= fdt_end_node(fdt); - return err; } @@ -140,7 +139,10 @@ static int kho_test_generate_data(struct kho_test_state *state) unsigned int size; void *addr; - /* cap allocation so that we won't exceed max_mem */ + /* + * Since get_order() rounds up, make sure that actual + * allocation is smaller so that we won't exceed max_mem + */ if (alloc_size + (PAGE_SIZE << order) > max_mem) { order = get_order(max_mem - alloc_size); if (order) @@ -165,13 +167,14 @@ static int kho_test_generate_data(struct kho_test_state *state) err_free_folios: for (int i = 0; i < state->nr_folios; i++) folio_put(state->folios[i]); + state->nr_folios = 0; return -ENOMEM; } static int kho_test_save(void) { struct kho_test_state *state = &kho_test_state; - struct folio **folios __free(kvfree) = NULL; + struct folio **folios; unsigned long max_nr; int err; @@ -185,13 +188,23 @@ static int kho_test_save(void) err = kho_test_generate_data(state); if (err) - return err; + goto err_free_folios; err = kho_test_prepare_fdt(state); if (err) - return err; + goto err_free_folios; - return register_kho_notifier(&kho_test_nb); + err = register_kho_notifier(&kho_test_nb); + if (err) + goto err_free_fdt; + + return 0; + +err_free_fdt: + folio_put(state->fdt); +err_free_folios: + kvfree(folios); + return err; } static int kho_test_restore_data(const void *fdt, int node) @@ -291,6 +304,7 @@ static void kho_test_cleanup(void) folio_put(kho_test_state.folios[i]); kvfree(kho_test_state.folios); + folio_put(kho_test_state.fdt); } static void __exit kho_test_exit(void) -- cgit v1.2.3 From 0ee82798282aeede8b11553593f7920b15b2240f Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Wed, 13 Aug 2025 17:45:43 +0800 Subject: lib/test_maple_tree.c: remove redundant semicolons Remove unnecessary semicolons. Link: https://lkml.kernel.org/r/20250813094543.555906-1-liaoyuanhong@vivo.com Signed-off-by: Liao Yuanhong Reviewed-by: Dev Jain Reviewed-by: Liam R. Howlett Signed-off-by: Andrew Morton --- lib/test_maple_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index cb3936595b0d..1433ecc854cb 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -3562,7 +3562,7 @@ static noinline void __init check_state_handling(struct maple_tree *mt) MT_BUG_ON(mt, mas.last != 0x1500); MT_BUG_ON(mt, !mas_is_active(&mas)); - /* find: start ->active on value */; + /* find: start ->active on value */ mas_set(&mas, 1200); entry = mas_find(&mas, ULONG_MAX); MT_BUG_ON(mt, entry != ptr); -- cgit v1.2.3 From 1aca4021f8456470fe92ba795886be0e595b927d Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Tue, 19 Aug 2025 15:04:57 +0800 Subject: lib/test_hmm: drop redundant conversion to bool The result of integer comparison already evaluates to bool. No need for explicit conversion. No functional impact. Link: https://lkml.kernel.org/r/20250819070457.486348-1-zhao.xichao@vivo.com Signed-off-by: Xichao Zhao Reviewed-by: Alistair Popple Cc: Jason Gunthorpe Cc: Leon Romanovsky Signed-off-by: Andrew Morton --- lib/test_hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 761725bc713c..83e3d8208a54 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -140,7 +140,7 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce, static bool dmirror_is_private_zone(struct dmirror_device *mdevice) { return (mdevice->zone_device_type == - HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false; + HMM_DMIRROR_MEMORY_DEVICE_PRIVATE); } static enum migrate_vma_direction -- cgit v1.2.3 From ef49b7b39d50b9e4f9d63e64f5d8acafe3c71158 Mon Sep 17 00:00:00 2001 From: Sidhartha Kumar Date: Tue, 26 Aug 2025 15:13:44 +0000 Subject: maple_tree: fix MAPLE_PARENT_RANGE32 and parent pointer docs MAPLE_PARENT_RANGE32 should be 0x02 as a 32 bit node is indicated by the bit pattern 0b010 which is the hex value 0x02. There are no users currently, so there is no associated bug with this wrong value. Fix typo Note -> Node and replace x with b to indicate binary values. Link: https://lkml.kernel.org/r/20250826151344.403286-1-sidhartha.kumar@oracle.com Fixes: 54a611b60590 ("Maple Tree: add new data structure") Signed-off-by: Sidhartha Kumar Reviewed-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- lib/maple_tree.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b4ee2d29d7a9..c57a4615bdff 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -405,11 +405,11 @@ static __always_inline bool mt_is_alloc(struct maple_tree *mt) * a reuse of the last bit in the node type. This is possible by using bit 1 to * indicate if bit 2 is part of the type or the slot. * - * Note types: - * 0x??1 = Root - * 0x?00 = 16 bit nodes - * 0x010 = 32 bit nodes - * 0x110 = 64 bit nodes + * Node types: + * 0b??1 = Root + * 0b?00 = 16 bit nodes + * 0b010 = 32 bit nodes + * 0b110 = 64 bit nodes * * Slot size and alignment * 0b??1 : Root @@ -427,7 +427,7 @@ static __always_inline bool mt_is_alloc(struct maple_tree *mt) #define MAPLE_PARENT_16B_SLOT_MASK 0xFC #define MAPLE_PARENT_RANGE64 0x06 -#define MAPLE_PARENT_RANGE32 0x04 +#define MAPLE_PARENT_RANGE32 0x02 #define MAPLE_PARENT_NOT_RANGE16 0x02 /* -- cgit v1.2.3 From 06ef8b9aa25ea7342ffd604784edcfdbc8348920 Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Tue, 5 Aug 2025 10:30:31 +0800 Subject: ref_tracker: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. No functional changes. Link: https://lkml.kernel.org/r/20250805023031.331718-1-rongqianfeng@vivo.com Signed-off-by: Qianfeng Rong Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton --- lib/ref_tracker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/ref_tracker.c b/lib/ref_tracker.c index cce12287708e..258fb0e7abdf 100644 --- a/lib/ref_tracker.c +++ b/lib/ref_tracker.c @@ -75,7 +75,7 @@ ref_tracker_get_stats(struct ref_tracker_dir *dir, unsigned int limit) struct ref_tracker *tracker; stats = kmalloc(struct_size(stats, stacks, limit), - GFP_NOWAIT | __GFP_NOWARN); + GFP_NOWAIT); if (!stats) return ERR_PTR(-ENOMEM); stats->total = 0; @@ -159,7 +159,7 @@ __ref_tracker_dir_pr_ostream(struct ref_tracker_dir *dir, return; } - sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT | __GFP_NOWARN); + sbuf = kmalloc(STACK_BUF_SIZE, GFP_NOWAIT); for (i = 0, skipped = stats->total; i < stats->count; ++i) { stack = stats->stacks[i].stack_handle; @@ -306,7 +306,7 @@ int ref_tracker_free(struct ref_tracker_dir *dir, } nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 1); stack_handle = stack_depot_save(entries, nr_entries, - GFP_NOWAIT | __GFP_NOWARN); + GFP_NOWAIT); spin_lock_irqsave(&dir->lock, flags); if (tracker->dead) { -- cgit v1.2.3 From ca78a04ce5c42b00addc063a7610a5ae12dafc39 Mon Sep 17 00:00:00 2001 From: Liao Yuanhong Date: Mon, 11 Aug 2025 16:27:38 +0800 Subject: lib/digsig: remove unnecessary memset kzalloc() has already been initialized to full 0 space, there is no need to use memset() to initialize again. Link: https://lkml.kernel.org/r/20250811082739.378284-1-liaoyuanhong@vivo.com Signed-off-by: Liao Yuanhong Signed-off-by: Andrew Morton --- lib/digsig.c | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/digsig.c b/lib/digsig.c index 04b5e55ed95f..2b36f9cc91e9 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -159,7 +159,6 @@ static int digsig_verify_rsa(struct key *key, len = mlen; head = len - l; - memset(out1, 0, head); memcpy(out1 + head, p, l); kfree(p); -- cgit v1.2.3 From 6c0022d6dc341668d704b1490674a21805ef645b Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Tue, 12 Aug 2025 16:40:45 +0800 Subject: lib/fault-inject-usercopy.c: use PTR_ERR_OR_ZERO() to simplify code Use the standard error pointer macro to shorten the code and simplify. Link: https://lkml.kernel.org/r/20250812084045.64218-1-zhao.xichao@vivo.com Signed-off-by: Xichao Zhao Signed-off-by: Andrew Morton --- lib/fault-inject-usercopy.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c index 77558b6c29ca..75403ec50f49 100644 --- a/lib/fault-inject-usercopy.c +++ b/lib/fault-inject-usercopy.c @@ -22,10 +22,8 @@ static int __init fail_usercopy_debugfs(void) dir = fault_create_debugfs_attr("fail_usercopy", NULL, &fail_usercopy.attr); - if (IS_ERR(dir)) - return PTR_ERR(dir); - return 0; + return PTR_ERR_OR_ZERO(dir); } late_initcall(fail_usercopy_debugfs); -- cgit v1.2.3 From 7da017eaa43a215f68d42ebfd08d381134ea6356 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 14 Aug 2025 17:50:33 +0800 Subject: test_firmware: use str_true_false() helper Replace ternary (condition ? "true" : "false") expressions with the str_true_false() helper from string_choices.h. This improves readability by replacing the three-operand ternary with a single function call, ensures consistent string output, and allows potential string deduplication by the linker, resulting in a slightly smaller binary. Link: https://lkml.kernel.org/r/20250814095033.244034-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Kuan-Wei Chiu Signed-off-by: Andrew Morton --- lib/test_firmware.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/test_firmware.c b/lib/test_firmware.c index 211222e63328..be4f93124901 100644 --- a/lib/test_firmware.c +++ b/lib/test_firmware.c @@ -26,6 +26,7 @@ #include #include #include +#include MODULE_IMPORT_NS("TEST_FIRMWARE"); @@ -304,17 +305,17 @@ static ssize_t config_show(struct device *dev, "FW_ACTION_NOUEVENT"); len += scnprintf(buf + len, PAGE_SIZE - len, "into_buf:\t\t%s\n", - test_fw_config->into_buf ? "true" : "false"); + str_true_false(test_fw_config->into_buf)); len += scnprintf(buf + len, PAGE_SIZE - len, "buf_size:\t%zu\n", test_fw_config->buf_size); len += scnprintf(buf + len, PAGE_SIZE - len, "file_offset:\t%zu\n", test_fw_config->file_offset); len += scnprintf(buf + len, PAGE_SIZE - len, "partial:\t\t%s\n", - test_fw_config->partial ? "true" : "false"); + str_true_false(test_fw_config->partial)); len += scnprintf(buf + len, PAGE_SIZE - len, "sync_direct:\t\t%s\n", - test_fw_config->sync_direct ? "true" : "false"); + str_true_false(test_fw_config->sync_direct)); len += scnprintf(buf + len, PAGE_SIZE - len, "read_fw_idx:\t%u\n", test_fw_config->read_fw_idx); if (test_fw_config->upload_name) -- cgit v1.2.3 From 0ca863b7c638803722dc5596b2d520812cf283b7 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 14 Aug 2025 17:38:27 +0800 Subject: alloc_tag: use str_on_off() helper Replace the ternary (enable ? "on" : "off") with the str_on_off() helper from string_choices.h. This improves readability by replacing the three-operand ternary with a single function call, ensures consistent string output, and allows potential string deduplication by the linker, resulting in a slightly smaller binary. Link: https://lkml.kernel.org/r/20250814093827.237980-1-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Cc: Kent Overstreet Cc: Suren Baghdasaryan Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index e9b33848700a..d4449205eca2 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -726,7 +727,7 @@ static int __init setup_early_mem_profiling(char *str) } mem_profiling_support = true; pr_info("Memory allocation profiling is enabled %s compression and is turned %s!\n", - compressed ? "with" : "without", enable ? "on" : "off"); + compressed ? "with" : "without", str_on_off(enable)); } if (enable != mem_alloc_profiling_enabled()) { -- cgit v1.2.3 From 31cf021b6161ac63c2ab8c64415cca0e3e136636 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Mon, 25 Aug 2025 10:56:58 +0800 Subject: lib/sys_info: handle sys_info_mask==0 case Generalization of panic_print's dump function [1] has been merged, and this patchset is to address some remaining issues, like adding note of the obsoletion of 'panic_print' cmdline parameter, refining the kernel document for panic_print, and hardening some string management. This patch (of 4): It is a normal case that bitmask parameter is 0, so pre-initialize the names[] to null string to cover this case. Also remove the superfluous "+1" in names[sizeof(sys_info_avail) + 1], which is needed for 'strlen()', but not for 'sizeof()'. Link: https://lkml.kernel.org/r/20250825025701.81921-1-feng.tang@linux.alibaba.com Link: https://lkml.kernel.org/r/20250825025701.81921-2-feng.tang@linux.alibaba.com Link: Link: https://lkml.kernel.org/r/20250703021004.42328-1-feng.tang@linux.alibaba.com [1] Signed-off-by: Feng Tang Suggested-by: Petr Mladek Reviewed-by: Petr Mladek Cc: Askar Safin Cc: John Ogness Cc: Jonathan Corbet Cc: Lance Yang Cc: "Paul E . McKenney" Cc: Steven Rostedt Signed-off-by: Andrew Morton --- lib/sys_info.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/sys_info.c b/lib/sys_info.c index 5bf503fd7ec1..496f9151c9b6 100644 --- a/lib/sys_info.c +++ b/lib/sys_info.c @@ -55,7 +55,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, void *buffer, size_t *lenp, loff_t *ppos) { - char names[sizeof(sys_info_avail) + 1]; + char names[sizeof(sys_info_avail)]; struct ctl_table table; unsigned long *si_bits_global; @@ -81,6 +81,7 @@ int sysctl_sys_info_handler(const struct ctl_table *ro_table, int write, char *delim = ""; int i, len = 0; + names[0] = '\0'; for (i = 0; i < ARRAY_SIZE(si_names); i++) { if (*si_bits_global & si_names[i].bit) { len += scnprintf(names + len, sizeof(names) - len, -- cgit v1.2.3 From c6be36e2997662f423edfa3979a63935873ff648 Mon Sep 17 00:00:00 2001 From: Jinchao Wang Date: Mon, 25 Aug 2025 10:29:35 +0800 Subject: panic/printk: replace this_cpu_in_panic() with panic_on_this_cpu() The helper this_cpu_in_panic() duplicated logic already provided by panic_on_this_cpu(). Remove this_cpu_in_panic() and switch all users to panic_on_this_cpu(). This simplifies the code and avoids having two helpers for the same check. Link: https://lkml.kernel.org/r/20250825022947.1596226-8-wangjinchao600@gmail.com Signed-off-by: Jinchao Wang Cc: Anna Schumaker Cc: Baoquan He Cc: "Darrick J. Wong" Cc: Dave Young Cc: Doug Anderson Cc: "Guilherme G. Piccoli" Cc: Helge Deller Cc: Ingo Molnar Cc: Jason Gunthorpe Cc: Joanthan Cameron Cc: Joel Granados Cc: John Ogness Cc: Kees Cook Cc: Li Huafei Cc: "Luck, Tony" Cc: Luo Gengkun Cc: Max Kellermann Cc: Nam Cao Cc: oushixiong Cc: Petr Mladek Cc: Qianqiang Liu Cc: Sergey Senozhatsky Cc: Sohil Mehta Cc: Steven Rostedt Cc: Tejun Heo Cc: Thomas Gleinxer Cc: Thomas Zimemrmann Cc: Thorsten Blum Cc: Ville Syrjala Cc: Vivek Goyal Cc: Yicong Yang Cc: Yunhui Cui Cc: Yury Norov (NVIDIA) Signed-off-by: Andrew Morton --- lib/dump_stack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/dump_stack.c b/lib/dump_stack.c index b3a85fe8b673..f0c78b5b5324 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -102,7 +102,7 @@ static void __dump_stack(const char *log_lvl) */ asmlinkage __visible void dump_stack_lvl(const char *log_lvl) { - bool in_panic = this_cpu_in_panic(); + bool in_panic = panic_on_this_cpu(); unsigned long flags; /* -- cgit v1.2.3 From 3e3f55f8b73fa0e5c4df50f8b6c001f0a1f18adf Mon Sep 17 00:00:00 2001 From: Guan-Chun Wu <409411716@gms.tku.edu.tw> Date: Wed, 27 Aug 2025 00:17:41 +0800 Subject: btree: simplify merge logic by using btree_last() return value Previously btree_merge() called btree_last() only to test existence, then performed an extra btree_lookup() to fetch the value. This patch changes it to directly use the value returned by btree_last(), avoiding redundant lookups and simplifying the merge loop. Link: https://lkml.kernel.org/r/20250826161741.686704-1-409411716@gms.tku.edu.tw Signed-off-by: Guan-Chun Wu <409411716@gms.tku.edu.tw> Cc: Kuan-Wei Chiu Signed-off-by: Andrew Morton --- lib/btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/btree.c b/lib/btree.c index bb81d3393ac5..9c80c0c7bba8 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -653,9 +653,9 @@ int btree_merge(struct btree_head *target, struct btree_head *victim, * walks to remove a single object from the victim. */ for (;;) { - if (!btree_last(victim, geo, key)) + val = btree_last(victim, geo, key); + if (!val) break; - val = btree_lookup(victim, geo, key); err = btree_insert(target, geo, key, val, gfp); if (err) return err; -- cgit v1.2.3 From 031cdd3bc3f369553933c1b0f4cb18000162c8ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 8 Sep 2025 09:03:38 +0200 Subject: kunit: Enable PCI on UML without triggering WARN() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various KUnit tests require PCI infrastructure to work. All normal platforms enable PCI by default, but UML does not. Enabling PCI from .kunitconfig files is problematic as it would not be portable. So in commit 6fc3a8636a7b ("kunit: tool: Enable virtio/PCI by default on UML") PCI was enabled by way of CONFIG_UML_PCI_OVER_VIRTIO=y. However CONFIG_UML_PCI_OVER_VIRTIO requires additional configuration of CONFIG_UML_PCI_OVER_VIRTIO_DEVICE_ID or will otherwise trigger a WARN() in virtio_pcidev_init(). However there is no one correct value for UML_PCI_OVER_VIRTIO_DEVICE_ID which could be used by default. This warning is confusing when debugging test failures. On the other hand, the functionality of CONFIG_UML_PCI_OVER_VIRTIO is not used at all, given that it is completely non-functional as indicated by the WARN() in question. Instead it is only used as a way to enable CONFIG_UML_PCI which itself is not directly configurable. Instead of going through CONFIG_UML_PCI_OVER_VIRTIO, introduce a custom configuration option which enables CONFIG_UML_PCI without triggering warnings or building dead code. Link: https://lore.kernel.org/r/20250908-kunit-uml-pci-v2-1-d8eba5f73c9d@linutronix.de Signed-off-by: Thomas Weißschuh Reviewed-by: Johannes Berg Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/Kconfig | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index c10ede4b1d22..1823539e96da 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -106,4 +106,11 @@ config KUNIT_DEFAULT_TIMEOUT If unsure, the default timeout of 300 seconds is suitable for most cases. +config KUNIT_UML_PCI + bool "KUnit UML PCI Support" + depends on UML + select UML_PCI + help + Enables the PCI subsystem on UML for use by KUnit tests. + endif # KUNIT -- cgit v1.2.3 From 285cae57a51664cc94e85de0ff994f9965b3aca8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Tue, 16 Sep 2025 08:15:46 +0200 Subject: kunit: Extend kconfig help text for KUNIT_UML_PCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checkpatch.pl expects at least 4 lines of help text. Extend the help text to make checkpatch.pl happy. Link: https://lore.kernel.org/r/20250916-kunit-pci-kconfig-v1-1-6d1369f06f2a@linutronix.de Fixes: 031cdd3bc3f3 ("kunit: Enable PCI on UML without triggering WARN()") Suggested-by: Shuah Khan Link: https://lore.kernel.org/lkml/3dc95227-2be9-48a0-bdea-3f283d9b2a38@linuxfoundation.org/ Signed-off-by: Thomas Weißschuh Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/kunit/Kconfig b/lib/kunit/Kconfig index 1823539e96da..7a6af361d2fc 100644 --- a/lib/kunit/Kconfig +++ b/lib/kunit/Kconfig @@ -112,5 +112,9 @@ config KUNIT_UML_PCI select UML_PCI help Enables the PCI subsystem on UML for use by KUnit tests. + Some KUnit tests require the PCI core which is not enabled by + default on UML. + + If unsure, say N. endif # KUNIT -- cgit v1.2.3 From f8a03516a530cc36bc9015c84ba7540ee3e8d7bd Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 18 Jul 2025 15:27:07 +0800 Subject: raid6: riscv: Clean up unused header file inclusion These two C files don't reference things defined in simd.h or types.h so remove these redundant #inclusions. Fixes: 6093faaf9593 ("raid6: Add RISC-V SIMD syndrome and recovery calculations") Reviewed-by: Alexandre Ghiti Signed-off-by: Chunyan Zhang Reviewed-by: Nutty Liu Link: https://lore.kernel.org/r/20250718072711.3865118-2-zhangchunyan@iscas.ac.cn Signed-off-by: Paul Walmsley --- lib/raid6/recov_rvv.c | 2 -- lib/raid6/rvv.c | 3 --- 2 files changed, 5 deletions(-) (limited to 'lib') diff --git a/lib/raid6/recov_rvv.c b/lib/raid6/recov_rvv.c index 5d54c4b437df..5f779719c3d3 100644 --- a/lib/raid6/recov_rvv.c +++ b/lib/raid6/recov_rvv.c @@ -4,9 +4,7 @@ * Author: Chunyan Zhang */ -#include #include -#include #include static int rvv_has_vector(void) diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index 7d82efa5b14f..b193ea176d5d 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -9,11 +9,8 @@ * Copyright 2002-2004 H. Peter Anvin */ -#include #include -#include #include -#include #include "rvv.h" #define NSIZE (riscv_v_vsize / 32) /* NSIZE = vlenb */ -- cgit v1.2.3 From 2dfb75cd5695fa9db2ad90d1339330eda7a0239d Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Fri, 18 Jul 2025 15:27:08 +0800 Subject: raid6: riscv: replace one load with a move to speed up the caculation Since wp$$==wq$$, it doesn't need to load the same data twice, use move instruction to replace one of the loads to let the program run faster. Reviewed-by: Alexandre Ghiti Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20250718072711.3865118-3-zhangchunyan@iscas.ac.cn Signed-off-by: Paul Walmsley --- lib/raid6/rvv.c | 60 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/raid6/rvv.c b/lib/raid6/rvv.c index b193ea176d5d..89da5fc247aa 100644 --- a/lib/raid6/rvv.c +++ b/lib/raid6/rvv.c @@ -44,7 +44,7 @@ static void raid6_rvv1_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) @@ -117,7 +117,7 @@ static void raid6_rvv1_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]) @@ -218,9 +218,9 @@ static void raid6_rvv2_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -310,9 +310,9 @@ static void raid6_rvv2_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -440,13 +440,13 @@ static void raid6_rvv4_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -566,13 +566,13 @@ static void raid6_rvv4_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -754,21 +754,21 @@ static void raid6_rvv8_gen_syndrome_real(int disks, unsigned long bytes, void ** asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" "vle8.v v16, (%[wp4])\n" - "vle8.v v17, (%[wp4])\n" + "vmv.v.v v17, v16\n" "vle8.v v20, (%[wp5])\n" - "vle8.v v21, (%[wp5])\n" + "vmv.v.v v21, v20\n" "vle8.v v24, (%[wp6])\n" - "vle8.v v25, (%[wp6])\n" + "vmv.v.v v25, v24\n" "vle8.v v28, (%[wp7])\n" - "vle8.v v29, (%[wp7])\n" + "vmv.v.v v29, v28\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), @@ -948,21 +948,21 @@ static void raid6_rvv8_xor_syndrome_real(int disks, int start, int stop, asm volatile (".option push\n" ".option arch,+v\n" "vle8.v v0, (%[wp0])\n" - "vle8.v v1, (%[wp0])\n" + "vmv.v.v v1, v0\n" "vle8.v v4, (%[wp1])\n" - "vle8.v v5, (%[wp1])\n" + "vmv.v.v v5, v4\n" "vle8.v v8, (%[wp2])\n" - "vle8.v v9, (%[wp2])\n" + "vmv.v.v v9, v8\n" "vle8.v v12, (%[wp3])\n" - "vle8.v v13, (%[wp3])\n" + "vmv.v.v v13, v12\n" "vle8.v v16, (%[wp4])\n" - "vle8.v v17, (%[wp4])\n" + "vmv.v.v v17, v16\n" "vle8.v v20, (%[wp5])\n" - "vle8.v v21, (%[wp5])\n" + "vmv.v.v v21, v20\n" "vle8.v v24, (%[wp6])\n" - "vle8.v v25, (%[wp6])\n" + "vmv.v.v v25, v24\n" "vle8.v v28, (%[wp7])\n" - "vle8.v v29, (%[wp7])\n" + "vmv.v.v v29, v28\n" ".option pop\n" : : [wp0]"r"(&dptr[z0][d + 0 * NSIZE]), -- cgit v1.2.3 From 4ca24d6abbca5df76c4b189dd94fb055613de297 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 15 Sep 2025 11:08:14 -0500 Subject: lib/crypto: sha256: Add support for 2-way interleaved hashing Many arm64 and x86_64 CPUs can compute two SHA-256 hashes in nearly the same speed as one, if the instructions are interleaved. This is because SHA-256 is serialized block-by-block, and two interleaved hashes take much better advantage of the CPU's instruction-level parallelism. Meanwhile, a very common use case for SHA-256 hashing in the Linux kernel is dm-verity and fs-verity. Both use a Merkle tree that has a fixed block size, usually 4096 bytes with an empty or 32-byte salt prepended. Usually, many blocks need to be hashed at a time. This is an ideal scenario for 2-way interleaved hashing. To enable this optimization, add a new function sha256_finup_2x() to the SHA-256 library API. It computes the hash of two equal-length messages, starting from a common initial context. For now it always falls back to sequential processing. Later patches will wire up arm64 and x86_64 optimized implementations. Note that the interleaving factor could in principle be higher than 2x. However, that runs into many practical difficulties and CPU throughput limitations. Thus, both the implementations I'm adding are 2x. In the interest of using the simplest solution, the API matches that. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250915160819.140019-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/sha256.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/crypto/sha256.c b/lib/crypto/sha256.c index 8fa15165d23e..881b935418ce 100644 --- a/lib/crypto/sha256.c +++ b/lib/crypto/sha256.c @@ -25,13 +25,20 @@ static const struct sha256_block_state sha224_iv = { }, }; -static const struct sha256_block_state sha256_iv = { - .h = { - SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, - SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, +static const struct sha256_ctx initial_sha256_ctx = { + .ctx = { + .state = { + .h = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, + }, + }, + .bytecount = 0, }, }; +#define sha256_iv (initial_sha256_ctx.ctx.state) + static const u32 sha256_K[64] = { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, @@ -261,8 +268,62 @@ void sha256(const u8 *data, size_t len, u8 out[SHA256_DIGEST_SIZE]) } EXPORT_SYMBOL(sha256); -/* pre-boot environment (as indicated by __DISABLE_EXPORTS) doesn't need HMAC */ +/* + * Pre-boot environment (as indicated by __DISABLE_EXPORTS being defined) + * doesn't need either HMAC support or interleaved hashing support + */ #ifndef __DISABLE_EXPORTS + +#ifndef sha256_finup_2x_arch +static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, + const u8 *data1, const u8 *data2, size_t len, + u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]) +{ + return false; +} +static bool sha256_finup_2x_is_optimized_arch(void) +{ + return false; +} +#endif + +/* Sequential fallback implementation of sha256_finup_2x() */ +static noinline_for_stack void sha256_finup_2x_sequential( + const struct __sha256_ctx *ctx, const u8 *data1, const u8 *data2, + size_t len, u8 out1[SHA256_DIGEST_SIZE], u8 out2[SHA256_DIGEST_SIZE]) +{ + struct __sha256_ctx mut_ctx; + + mut_ctx = *ctx; + __sha256_update(&mut_ctx, data1, len); + __sha256_final(&mut_ctx, out1, SHA256_DIGEST_SIZE); + + mut_ctx = *ctx; + __sha256_update(&mut_ctx, data2, len); + __sha256_final(&mut_ctx, out2, SHA256_DIGEST_SIZE); +} + +void sha256_finup_2x(const struct sha256_ctx *ctx, const u8 *data1, + const u8 *data2, size_t len, u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]) +{ + if (ctx == NULL) + ctx = &initial_sha256_ctx; + + if (likely(sha256_finup_2x_arch(&ctx->ctx, data1, data2, len, out1, + out2))) + return; + sha256_finup_2x_sequential(&ctx->ctx, data1, data2, len, out1, out2); +} +EXPORT_SYMBOL_GPL(sha256_finup_2x); + +bool sha256_finup_2x_is_optimized(void) +{ + return sha256_finup_2x_is_optimized_arch(); +} +EXPORT_SYMBOL_GPL(sha256_finup_2x_is_optimized); + static void __hmac_sha256_preparekey(struct sha256_block_state *istate, struct sha256_block_state *ostate, const u8 *raw_key, size_t raw_key_len, -- cgit v1.2.3 From 34c3f1e346e7b2b8a420b1ea341b341525baf92b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 15 Sep 2025 11:08:15 -0500 Subject: lib/crypto: arm64/sha256: Add support for 2-way interleaved hashing Add an implementation of sha256_finup_2x_arch() for arm64. It interleaves the computation of two SHA-256 hashes using the ARMv8 SHA-256 instructions. dm-verity and fs-verity will take advantage of this for greatly improved performance on capable CPUs. This increases the throughput of SHA-256 hashing 4096-byte messages by the following amounts on the following CPUs: ARM Cortex-X1: 70% ARM Cortex-X3: 68% ARM Cortex-A76: 65% ARM Cortex-A715: 43% ARM Cortex-A510: 25% ARM Cortex-A55: 8% Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250915160819.140019-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm64/sha256-ce.S | 284 ++++++++++++++++++++++++++++++++++++++++++- lib/crypto/arm64/sha256.h | 37 ++++++ 2 files changed, 315 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm64/sha256-ce.S b/lib/crypto/arm64/sha256-ce.S index b99d9589c421..410174ba5237 100644 --- a/lib/crypto/arm64/sha256-ce.S +++ b/lib/crypto/arm64/sha256-ce.S @@ -70,18 +70,22 @@ .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 + .macro load_round_constants tmp + adr_l \tmp, .Lsha2_rcon + ld1 { v0.4s- v3.4s}, [\tmp], #64 + ld1 { v4.4s- v7.4s}, [\tmp], #64 + ld1 { v8.4s-v11.4s}, [\tmp], #64 + ld1 {v12.4s-v15.4s}, [\tmp] + .endm + /* * size_t __sha256_ce_transform(struct sha256_block_state *state, * const u8 *data, size_t nblocks); */ .text SYM_FUNC_START(__sha256_ce_transform) - /* load round constants */ - adr_l x8, .Lsha2_rcon - ld1 { v0.4s- v3.4s}, [x8], #64 - ld1 { v4.4s- v7.4s}, [x8], #64 - ld1 { v8.4s-v11.4s}, [x8], #64 - ld1 {v12.4s-v15.4s}, [x8] + + load_round_constants x8 /* load state */ ld1 {dgav.4s, dgbv.4s}, [x0] @@ -134,3 +138,271 @@ CPU_LE( rev32 v19.16b, v19.16b ) mov x0, x2 ret SYM_FUNC_END(__sha256_ce_transform) + + .unreq dga + .unreq dgav + .unreq dgb + .unreq dgbv + .unreq t0 + .unreq t1 + .unreq dg0q + .unreq dg0v + .unreq dg1q + .unreq dg1v + .unreq dg2q + .unreq dg2v + + // parameters for sha256_ce_finup2x() + ctx .req x0 + data1 .req x1 + data2 .req x2 + len .req w3 + out1 .req x4 + out2 .req x5 + + // other scalar variables + count .req x6 + final_step .req w7 + + // x8-x9 are used as temporaries. + + // v0-v15 are used to cache the SHA-256 round constants. + // v16-v19 are used for the message schedule for the first message. + // v20-v23 are used for the message schedule for the second message. + // v24-v31 are used for the state and temporaries as given below. + // *_a are for the first message and *_b for the second. + state0_a_q .req q24 + state0_a .req v24 + state1_a_q .req q25 + state1_a .req v25 + state0_b_q .req q26 + state0_b .req v26 + state1_b_q .req q27 + state1_b .req v27 + t0_a .req v28 + t0_b .req v29 + t1_a_q .req q30 + t1_a .req v30 + t1_b_q .req q31 + t1_b .req v31 + +#define OFFSETOF_BYTECOUNT 32 // offsetof(struct __sha256_ctx, bytecount) +#define OFFSETOF_BUF 40 // offsetof(struct __sha256_ctx, buf) +// offsetof(struct __sha256_ctx, state) is assumed to be 0. + + // Do 4 rounds of SHA-256 for each of two messages (interleaved). m0_a + // and m0_b contain the current 4 message schedule words for the first + // and second message respectively. + // + // If not all the message schedule words have been computed yet, then + // this also computes 4 more message schedule words for each message. + // m1_a-m3_a contain the next 3 groups of 4 message schedule words for + // the first message, and likewise m1_b-m3_b for the second. After + // consuming the current value of m0_a, this macro computes the group + // after m3_a and writes it to m0_a, and likewise for *_b. This means + // that the next (m0_a, m1_a, m2_a, m3_a) is the current (m1_a, m2_a, + // m3_a, m0_a), and likewise for *_b, so the caller must cycle through + // the registers accordingly. + .macro do_4rounds_2x i, k, m0_a, m1_a, m2_a, m3_a, \ + m0_b, m1_b, m2_b, m3_b + add t0_a\().4s, \m0_a\().4s, \k\().4s + add t0_b\().4s, \m0_b\().4s, \k\().4s + .if \i < 48 + sha256su0 \m0_a\().4s, \m1_a\().4s + sha256su0 \m0_b\().4s, \m1_b\().4s + sha256su1 \m0_a\().4s, \m2_a\().4s, \m3_a\().4s + sha256su1 \m0_b\().4s, \m2_b\().4s, \m3_b\().4s + .endif + mov t1_a.16b, state0_a.16b + mov t1_b.16b, state0_b.16b + sha256h state0_a_q, state1_a_q, t0_a\().4s + sha256h state0_b_q, state1_b_q, t0_b\().4s + sha256h2 state1_a_q, t1_a_q, t0_a\().4s + sha256h2 state1_b_q, t1_b_q, t0_b\().4s + .endm + + .macro do_16rounds_2x i, k0, k1, k2, k3 + do_4rounds_2x \i + 0, \k0, v16, v17, v18, v19, v20, v21, v22, v23 + do_4rounds_2x \i + 4, \k1, v17, v18, v19, v16, v21, v22, v23, v20 + do_4rounds_2x \i + 8, \k2, v18, v19, v16, v17, v22, v23, v20, v21 + do_4rounds_2x \i + 12, \k3, v19, v16, v17, v18, v23, v20, v21, v22 + .endm + +// +// void sha256_ce_finup2x(const struct __sha256_ctx *ctx, +// const u8 *data1, const u8 *data2, int len, +// u8 out1[SHA256_DIGEST_SIZE], +// u8 out2[SHA256_DIGEST_SIZE]); +// +// This function computes the SHA-256 digests of two messages |data1| and +// |data2| that are both |len| bytes long, starting from the initial context +// |ctx|. |len| must be at least SHA256_BLOCK_SIZE. +// +// The instructions for the two SHA-256 operations are interleaved. On many +// CPUs, this is almost twice as fast as hashing each message individually due +// to taking better advantage of the CPU's SHA-256 and SIMD throughput. +// +SYM_FUNC_START(sha256_ce_finup2x) + sub sp, sp, #128 + mov final_step, #0 + load_round_constants x8 + + // Load the initial state from ctx->state. + ld1 {state0_a.4s-state1_a.4s}, [ctx] + + // Load ctx->bytecount. Take the mod 64 of it to get the number of + // bytes that are buffered in ctx->buf. Also save it in a register with + // len added to it. + ldr x8, [ctx, #OFFSETOF_BYTECOUNT] + add count, x8, len, sxtw + and x8, x8, #63 + cbz x8, .Lfinup2x_enter_loop // No bytes buffered? + + // x8 bytes (1 to 63) are currently buffered in ctx->buf. Load them + // followed by the first 64 - x8 bytes of data. Since len >= 64, we + // just load 64 bytes from each of ctx->buf, data1, and data2 + // unconditionally and rearrange the data as needed. + add x9, ctx, #OFFSETOF_BUF + ld1 {v16.16b-v19.16b}, [x9] + st1 {v16.16b-v19.16b}, [sp] + + ld1 {v16.16b-v19.16b}, [data1], #64 + add x9, sp, x8 + st1 {v16.16b-v19.16b}, [x9] + ld1 {v16.4s-v19.4s}, [sp] + + ld1 {v20.16b-v23.16b}, [data2], #64 + st1 {v20.16b-v23.16b}, [x9] + ld1 {v20.4s-v23.4s}, [sp] + + sub len, len, #64 + sub data1, data1, x8 + sub data2, data2, x8 + add len, len, w8 + mov state0_b.16b, state0_a.16b + mov state1_b.16b, state1_a.16b + b .Lfinup2x_loop_have_data + +.Lfinup2x_enter_loop: + sub len, len, #64 + mov state0_b.16b, state0_a.16b + mov state1_b.16b, state1_a.16b +.Lfinup2x_loop: + // Load the next two data blocks. + ld1 {v16.4s-v19.4s}, [data1], #64 + ld1 {v20.4s-v23.4s}, [data2], #64 +.Lfinup2x_loop_have_data: + // Convert the words of the data blocks from big endian. +CPU_LE( rev32 v16.16b, v16.16b ) +CPU_LE( rev32 v17.16b, v17.16b ) +CPU_LE( rev32 v18.16b, v18.16b ) +CPU_LE( rev32 v19.16b, v19.16b ) +CPU_LE( rev32 v20.16b, v20.16b ) +CPU_LE( rev32 v21.16b, v21.16b ) +CPU_LE( rev32 v22.16b, v22.16b ) +CPU_LE( rev32 v23.16b, v23.16b ) +.Lfinup2x_loop_have_bswapped_data: + + // Save the original state for each block. + st1 {state0_a.4s-state1_b.4s}, [sp] + + // Do the SHA-256 rounds on each block. + do_16rounds_2x 0, v0, v1, v2, v3 + do_16rounds_2x 16, v4, v5, v6, v7 + do_16rounds_2x 32, v8, v9, v10, v11 + do_16rounds_2x 48, v12, v13, v14, v15 + + // Add the original state for each block. + ld1 {v16.4s-v19.4s}, [sp] + add state0_a.4s, state0_a.4s, v16.4s + add state1_a.4s, state1_a.4s, v17.4s + add state0_b.4s, state0_b.4s, v18.4s + add state1_b.4s, state1_b.4s, v19.4s + + // Update len and loop back if more blocks remain. + sub len, len, #64 + tbz len, #31, .Lfinup2x_loop // len >= 0? + + // Check if any final blocks need to be handled. + // final_step = 2: all done + // final_step = 1: need to do count-only padding block + // final_step = 0: need to do the block with 0x80 padding byte + tbnz final_step, #1, .Lfinup2x_done + tbnz final_step, #0, .Lfinup2x_finalize_countonly + add len, len, #64 + cbz len, .Lfinup2x_finalize_blockaligned + + // Not block-aligned; 1 <= len <= 63 data bytes remain. Pad the block. + // To do this, write the padding starting with the 0x80 byte to + // &sp[64]. Then for each message, copy the last 64 data bytes to sp + // and load from &sp[64 - len] to get the needed padding block. This + // code relies on the data buffers being >= 64 bytes in length. + sub w8, len, #64 // w8 = len - 64 + add data1, data1, w8, sxtw // data1 += len - 64 + add data2, data2, w8, sxtw // data2 += len - 64 +CPU_LE( mov x9, #0x80 ) +CPU_LE( fmov d16, x9 ) +CPU_BE( movi v16.16b, #0 ) +CPU_BE( mov x9, #0x8000000000000000 ) +CPU_BE( mov v16.d[1], x9 ) + movi v17.16b, #0 + stp q16, q17, [sp, #64] + stp q17, q17, [sp, #96] + sub x9, sp, w8, sxtw // x9 = &sp[64 - len] + cmp len, #56 + b.ge 1f // will count spill into its own block? + lsl count, count, #3 +CPU_LE( rev count, count ) + str count, [x9, #56] + mov final_step, #2 // won't need count-only block + b 2f +1: + mov final_step, #1 // will need count-only block +2: + ld1 {v16.16b-v19.16b}, [data1] + st1 {v16.16b-v19.16b}, [sp] + ld1 {v16.4s-v19.4s}, [x9] + ld1 {v20.16b-v23.16b}, [data2] + st1 {v20.16b-v23.16b}, [sp] + ld1 {v20.4s-v23.4s}, [x9] + b .Lfinup2x_loop_have_data + + // Prepare a padding block, either: + // + // {0x80, 0, 0, 0, ..., count (as __be64)} + // This is for a block aligned message. + // + // { 0, 0, 0, 0, ..., count (as __be64)} + // This is for a message whose length mod 64 is >= 56. + // + // Pre-swap the endianness of the words. +.Lfinup2x_finalize_countonly: + movi v16.2d, #0 + b 1f +.Lfinup2x_finalize_blockaligned: + mov x8, #0x80000000 + fmov d16, x8 +1: + movi v17.2d, #0 + movi v18.2d, #0 + ror count, count, #29 // ror(lsl(count, 3), 32) + mov v19.d[0], xzr + mov v19.d[1], count + mov v20.16b, v16.16b + movi v21.2d, #0 + movi v22.2d, #0 + mov v23.16b, v19.16b + mov final_step, #2 + b .Lfinup2x_loop_have_bswapped_data + +.Lfinup2x_done: + // Write the two digests with all bytes in the correct order. +CPU_LE( rev32 state0_a.16b, state0_a.16b ) +CPU_LE( rev32 state1_a.16b, state1_a.16b ) +CPU_LE( rev32 state0_b.16b, state0_b.16b ) +CPU_LE( rev32 state1_b.16b, state1_b.16b ) + st1 {state0_a.4s-state1_a.4s}, [out1] + st1 {state0_b.4s-state1_b.4s}, [out2] + add sp, sp, #128 + ret +SYM_FUNC_END(sha256_ce_finup2x) diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h index a211966c124a..a7bc3a90ada6 100644 --- a/lib/crypto/arm64/sha256.h +++ b/lib/crypto/arm64/sha256.h @@ -44,6 +44,43 @@ static void sha256_blocks(struct sha256_block_state *state, } } +static_assert(offsetof(struct __sha256_ctx, state) == 0); +static_assert(offsetof(struct __sha256_ctx, bytecount) == 32); +static_assert(offsetof(struct __sha256_ctx, buf) == 40); +asmlinkage void sha256_ce_finup2x(const struct __sha256_ctx *ctx, + const u8 *data1, const u8 *data2, int len, + u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]); + +#define sha256_finup_2x_arch sha256_finup_2x_arch +static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, + const u8 *data1, const u8 *data2, size_t len, + u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]) +{ + /* + * The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX. + * Further limit len to 65536 to avoid spending too long with preemption + * disabled. (Of course, in practice len is nearly always 4096 anyway.) + */ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_ce) && len >= SHA256_BLOCK_SIZE && + len <= 65536 && likely(may_use_simd())) { + kernel_neon_begin(); + sha256_ce_finup2x(ctx, data1, data2, len, out1, out2); + kernel_neon_end(); + kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE); + kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE); + return true; + } + return false; +} + +static bool sha256_finup_2x_is_optimized_arch(void) +{ + return static_key_enabled(&have_ce); +} + #ifdef CONFIG_KERNEL_MODE_NEON #define sha256_mod_init_arch sha256_mod_init_arch static inline void sha256_mod_init_arch(void) -- cgit v1.2.3 From bc6d6a4172a7fa599fe66f16e9a2c1cdc6983207 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 15 Sep 2025 11:08:16 -0500 Subject: lib/crypto: x86/sha256: Add support for 2-way interleaved hashing Add an implementation of sha256_finup_2x_arch() for x86_64. It interleaves the computation of two SHA-256 hashes using the x86 SHA-NI instructions. dm-verity and fs-verity will take advantage of this for greatly improved performance on capable CPUs. This increases the throughput of SHA-256 hashing 4096-byte messages by the following amounts on the following CPUs: Intel Ice Lake (server): 4% Intel Sapphire Rapids: 38% Intel Emerald Rapids: 38% AMD Zen 1 (Threadripper 1950X): 84% AMD Zen 4 (EPYC 9B14): 98% AMD Zen 5 (Ryzen 9 9950X): 64% For now, this seems to benefit AMD more than Intel. This seems to be because current AMD CPUs support concurrent execution of the SHA-NI instructions, but unfortunately current Intel CPUs don't, except for the sha256msg2 instruction. Hopefully future Intel CPUs will support SHA-NI on more execution ports. Zen 1 supports 2 concurrent sha256rnds2, and Zen 4 supports 4 concurrent sha256rnds2, which suggests that even better performance may be achievable on Zen 4 by interleaving more than two hashes. However, doing so poses a number of trade-offs, and furthermore Zen 5 goes back to supporting "only" 2 concurrent sha256rnds2. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250915160819.140019-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/x86/sha256-ni-asm.S | 368 +++++++++++++++++++++++++++++++++++++++++ lib/crypto/x86/sha256.h | 39 +++++ 2 files changed, 407 insertions(+) (limited to 'lib') diff --git a/lib/crypto/x86/sha256-ni-asm.S b/lib/crypto/x86/sha256-ni-asm.S index 4bd9490ffc66..de5f707e7ef7 100644 --- a/lib/crypto/x86/sha256-ni-asm.S +++ b/lib/crypto/x86/sha256-ni-asm.S @@ -165,6 +165,374 @@ SYM_FUNC_START(sha256_ni_transform) RET SYM_FUNC_END(sha256_ni_transform) +#undef DIGEST_PTR +#undef DATA_PTR +#undef NUM_BLKS +#undef SHA256CONSTANTS +#undef MSG +#undef STATE0 +#undef STATE1 +#undef MSG0 +#undef MSG1 +#undef MSG2 +#undef MSG3 +#undef TMP +#undef SHUF_MASK +#undef ABEF_SAVE +#undef CDGH_SAVE + +// parameters for sha256_ni_finup2x() +#define CTX %rdi +#define DATA1 %rsi +#define DATA2 %rdx +#define LEN %ecx +#define LEN8 %cl +#define LEN64 %rcx +#define OUT1 %r8 +#define OUT2 %r9 + +// other scalar variables +#define SHA256CONSTANTS %rax +#define COUNT %r10 +#define COUNT32 %r10d +#define FINAL_STEP %r11d + +// rbx is used as a temporary. + +#define MSG %xmm0 // sha256rnds2 implicit operand +#define STATE0_A %xmm1 +#define STATE1_A %xmm2 +#define STATE0_B %xmm3 +#define STATE1_B %xmm4 +#define TMP_A %xmm5 +#define TMP_B %xmm6 +#define MSG0_A %xmm7 +#define MSG1_A %xmm8 +#define MSG2_A %xmm9 +#define MSG3_A %xmm10 +#define MSG0_B %xmm11 +#define MSG1_B %xmm12 +#define MSG2_B %xmm13 +#define MSG3_B %xmm14 +#define SHUF_MASK %xmm15 + +#define OFFSETOF_STATE 0 // offsetof(struct __sha256_ctx, state) +#define OFFSETOF_BYTECOUNT 32 // offsetof(struct __sha256_ctx, bytecount) +#define OFFSETOF_BUF 40 // offsetof(struct __sha256_ctx, buf) + +// Do 4 rounds of SHA-256 for each of two messages (interleaved). m0_a and m0_b +// contain the current 4 message schedule words for the first and second message +// respectively. +// +// If not all the message schedule words have been computed yet, then this also +// computes 4 more message schedule words for each message. m1_a-m3_a contain +// the next 3 groups of 4 message schedule words for the first message, and +// likewise m1_b-m3_b for the second. After consuming the current value of +// m0_a, this macro computes the group after m3_a and writes it to m0_a, and +// likewise for *_b. This means that the next (m0_a, m1_a, m2_a, m3_a) is the +// current (m1_a, m2_a, m3_a, m0_a), and likewise for *_b, so the caller must +// cycle through the registers accordingly. +.macro do_4rounds_2x i, m0_a, m1_a, m2_a, m3_a, m0_b, m1_b, m2_b, m3_b + movdqa (\i-32)*4(SHA256CONSTANTS), TMP_A + movdqa TMP_A, TMP_B + paddd \m0_a, TMP_A + paddd \m0_b, TMP_B +.if \i < 48 + sha256msg1 \m1_a, \m0_a + sha256msg1 \m1_b, \m0_b +.endif + movdqa TMP_A, MSG + sha256rnds2 STATE0_A, STATE1_A + movdqa TMP_B, MSG + sha256rnds2 STATE0_B, STATE1_B + pshufd $0x0E, TMP_A, MSG + sha256rnds2 STATE1_A, STATE0_A + pshufd $0x0E, TMP_B, MSG + sha256rnds2 STATE1_B, STATE0_B +.if \i < 48 + movdqa \m3_a, TMP_A + movdqa \m3_b, TMP_B + palignr $4, \m2_a, TMP_A + palignr $4, \m2_b, TMP_B + paddd TMP_A, \m0_a + paddd TMP_B, \m0_b + sha256msg2 \m3_a, \m0_a + sha256msg2 \m3_b, \m0_b +.endif +.endm + +// +// void sha256_ni_finup2x(const struct __sha256_ctx *ctx, +// const u8 *data1, const u8 *data2, int len, +// u8 out1[SHA256_DIGEST_SIZE], +// u8 out2[SHA256_DIGEST_SIZE]); +// +// This function computes the SHA-256 digests of two messages |data1| and +// |data2| that are both |len| bytes long, starting from the initial context +// |ctx|. |len| must be at least SHA256_BLOCK_SIZE. +// +// The instructions for the two SHA-256 operations are interleaved. On many +// CPUs, this is almost twice as fast as hashing each message individually due +// to taking better advantage of the CPU's SHA-256 and SIMD throughput. +// +SYM_FUNC_START(sha256_ni_finup2x) + // Allocate 128 bytes of stack space, 16-byte aligned. + push %rbx + push %rbp + mov %rsp, %rbp + sub $128, %rsp + and $~15, %rsp + + // Load the shuffle mask for swapping the endianness of 32-bit words. + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + + // Set up pointer to the round constants. + lea K256+32*4(%rip), SHA256CONSTANTS + + // Initially we're not processing the final blocks. + xor FINAL_STEP, FINAL_STEP + + // Load the initial state from ctx->state. + movdqu OFFSETOF_STATE+0*16(CTX), STATE0_A // DCBA + movdqu OFFSETOF_STATE+1*16(CTX), STATE1_A // HGFE + movdqa STATE0_A, TMP_A + punpcklqdq STATE1_A, STATE0_A // FEBA + punpckhqdq TMP_A, STATE1_A // DCHG + pshufd $0x1B, STATE0_A, STATE0_A // ABEF + pshufd $0xB1, STATE1_A, STATE1_A // CDGH + + // Load ctx->bytecount. Take the mod 64 of it to get the number of + // bytes that are buffered in ctx->buf. Also save it in a register with + // LEN added to it. + mov LEN, LEN + mov OFFSETOF_BYTECOUNT(CTX), %rbx + lea (%rbx, LEN64, 1), COUNT + and $63, %ebx + jz .Lfinup2x_enter_loop // No bytes buffered? + + // %ebx bytes (1 to 63) are currently buffered in ctx->buf. Load them + // followed by the first 64 - %ebx bytes of data. Since LEN >= 64, we + // just load 64 bytes from each of ctx->buf, DATA1, and DATA2 + // unconditionally and rearrange the data as needed. + + movdqu OFFSETOF_BUF+0*16(CTX), MSG0_A + movdqu OFFSETOF_BUF+1*16(CTX), MSG1_A + movdqu OFFSETOF_BUF+2*16(CTX), MSG2_A + movdqu OFFSETOF_BUF+3*16(CTX), MSG3_A + movdqa MSG0_A, 0*16(%rsp) + movdqa MSG1_A, 1*16(%rsp) + movdqa MSG2_A, 2*16(%rsp) + movdqa MSG3_A, 3*16(%rsp) + + movdqu 0*16(DATA1), MSG0_A + movdqu 1*16(DATA1), MSG1_A + movdqu 2*16(DATA1), MSG2_A + movdqu 3*16(DATA1), MSG3_A + movdqu MSG0_A, 0*16(%rsp,%rbx) + movdqu MSG1_A, 1*16(%rsp,%rbx) + movdqu MSG2_A, 2*16(%rsp,%rbx) + movdqu MSG3_A, 3*16(%rsp,%rbx) + movdqa 0*16(%rsp), MSG0_A + movdqa 1*16(%rsp), MSG1_A + movdqa 2*16(%rsp), MSG2_A + movdqa 3*16(%rsp), MSG3_A + + movdqu 0*16(DATA2), MSG0_B + movdqu 1*16(DATA2), MSG1_B + movdqu 2*16(DATA2), MSG2_B + movdqu 3*16(DATA2), MSG3_B + movdqu MSG0_B, 0*16(%rsp,%rbx) + movdqu MSG1_B, 1*16(%rsp,%rbx) + movdqu MSG2_B, 2*16(%rsp,%rbx) + movdqu MSG3_B, 3*16(%rsp,%rbx) + movdqa 0*16(%rsp), MSG0_B + movdqa 1*16(%rsp), MSG1_B + movdqa 2*16(%rsp), MSG2_B + movdqa 3*16(%rsp), MSG3_B + + sub $64, %rbx // rbx = buffered - 64 + sub %rbx, DATA1 // DATA1 += 64 - buffered + sub %rbx, DATA2 // DATA2 += 64 - buffered + add %ebx, LEN // LEN += buffered - 64 + movdqa STATE0_A, STATE0_B + movdqa STATE1_A, STATE1_B + jmp .Lfinup2x_loop_have_data + +.Lfinup2x_enter_loop: + sub $64, LEN + movdqa STATE0_A, STATE0_B + movdqa STATE1_A, STATE1_B +.Lfinup2x_loop: + // Load the next two data blocks. + movdqu 0*16(DATA1), MSG0_A + movdqu 0*16(DATA2), MSG0_B + movdqu 1*16(DATA1), MSG1_A + movdqu 1*16(DATA2), MSG1_B + movdqu 2*16(DATA1), MSG2_A + movdqu 2*16(DATA2), MSG2_B + movdqu 3*16(DATA1), MSG3_A + movdqu 3*16(DATA2), MSG3_B + add $64, DATA1 + add $64, DATA2 +.Lfinup2x_loop_have_data: + // Convert the words of the data blocks from big endian. + pshufb SHUF_MASK, MSG0_A + pshufb SHUF_MASK, MSG0_B + pshufb SHUF_MASK, MSG1_A + pshufb SHUF_MASK, MSG1_B + pshufb SHUF_MASK, MSG2_A + pshufb SHUF_MASK, MSG2_B + pshufb SHUF_MASK, MSG3_A + pshufb SHUF_MASK, MSG3_B +.Lfinup2x_loop_have_bswapped_data: + + // Save the original state for each block. + movdqa STATE0_A, 0*16(%rsp) + movdqa STATE0_B, 1*16(%rsp) + movdqa STATE1_A, 2*16(%rsp) + movdqa STATE1_B, 3*16(%rsp) + + // Do the SHA-256 rounds on each block. +.irp i, 0, 16, 32, 48 + do_4rounds_2x (\i + 0), MSG0_A, MSG1_A, MSG2_A, MSG3_A, \ + MSG0_B, MSG1_B, MSG2_B, MSG3_B + do_4rounds_2x (\i + 4), MSG1_A, MSG2_A, MSG3_A, MSG0_A, \ + MSG1_B, MSG2_B, MSG3_B, MSG0_B + do_4rounds_2x (\i + 8), MSG2_A, MSG3_A, MSG0_A, MSG1_A, \ + MSG2_B, MSG3_B, MSG0_B, MSG1_B + do_4rounds_2x (\i + 12), MSG3_A, MSG0_A, MSG1_A, MSG2_A, \ + MSG3_B, MSG0_B, MSG1_B, MSG2_B +.endr + + // Add the original state for each block. + paddd 0*16(%rsp), STATE0_A + paddd 1*16(%rsp), STATE0_B + paddd 2*16(%rsp), STATE1_A + paddd 3*16(%rsp), STATE1_B + + // Update LEN and loop back if more blocks remain. + sub $64, LEN + jge .Lfinup2x_loop + + // Check if any final blocks need to be handled. + // FINAL_STEP = 2: all done + // FINAL_STEP = 1: need to do count-only padding block + // FINAL_STEP = 0: need to do the block with 0x80 padding byte + cmp $1, FINAL_STEP + jg .Lfinup2x_done + je .Lfinup2x_finalize_countonly + add $64, LEN + jz .Lfinup2x_finalize_blockaligned + + // Not block-aligned; 1 <= LEN <= 63 data bytes remain. Pad the block. + // To do this, write the padding starting with the 0x80 byte to + // &sp[64]. Then for each message, copy the last 64 data bytes to sp + // and load from &sp[64 - LEN] to get the needed padding block. This + // code relies on the data buffers being >= 64 bytes in length. + mov $64, %ebx + sub LEN, %ebx // ebx = 64 - LEN + sub %rbx, DATA1 // DATA1 -= 64 - LEN + sub %rbx, DATA2 // DATA2 -= 64 - LEN + mov $0x80, FINAL_STEP // using FINAL_STEP as a temporary + movd FINAL_STEP, MSG0_A + pxor MSG1_A, MSG1_A + movdqa MSG0_A, 4*16(%rsp) + movdqa MSG1_A, 5*16(%rsp) + movdqa MSG1_A, 6*16(%rsp) + movdqa MSG1_A, 7*16(%rsp) + cmp $56, LEN + jge 1f // will COUNT spill into its own block? + shl $3, COUNT + bswap COUNT + mov COUNT, 56(%rsp,%rbx) + mov $2, FINAL_STEP // won't need count-only block + jmp 2f +1: + mov $1, FINAL_STEP // will need count-only block +2: + movdqu 0*16(DATA1), MSG0_A + movdqu 1*16(DATA1), MSG1_A + movdqu 2*16(DATA1), MSG2_A + movdqu 3*16(DATA1), MSG3_A + movdqa MSG0_A, 0*16(%rsp) + movdqa MSG1_A, 1*16(%rsp) + movdqa MSG2_A, 2*16(%rsp) + movdqa MSG3_A, 3*16(%rsp) + movdqu 0*16(%rsp,%rbx), MSG0_A + movdqu 1*16(%rsp,%rbx), MSG1_A + movdqu 2*16(%rsp,%rbx), MSG2_A + movdqu 3*16(%rsp,%rbx), MSG3_A + + movdqu 0*16(DATA2), MSG0_B + movdqu 1*16(DATA2), MSG1_B + movdqu 2*16(DATA2), MSG2_B + movdqu 3*16(DATA2), MSG3_B + movdqa MSG0_B, 0*16(%rsp) + movdqa MSG1_B, 1*16(%rsp) + movdqa MSG2_B, 2*16(%rsp) + movdqa MSG3_B, 3*16(%rsp) + movdqu 0*16(%rsp,%rbx), MSG0_B + movdqu 1*16(%rsp,%rbx), MSG1_B + movdqu 2*16(%rsp,%rbx), MSG2_B + movdqu 3*16(%rsp,%rbx), MSG3_B + jmp .Lfinup2x_loop_have_data + + // Prepare a padding block, either: + // + // {0x80, 0, 0, 0, ..., count (as __be64)} + // This is for a block aligned message. + // + // { 0, 0, 0, 0, ..., count (as __be64)} + // This is for a message whose length mod 64 is >= 56. + // + // Pre-swap the endianness of the words. +.Lfinup2x_finalize_countonly: + pxor MSG0_A, MSG0_A + jmp 1f + +.Lfinup2x_finalize_blockaligned: + mov $0x80000000, %ebx + movd %ebx, MSG0_A +1: + pxor MSG1_A, MSG1_A + pxor MSG2_A, MSG2_A + ror $29, COUNT + movq COUNT, MSG3_A + pslldq $8, MSG3_A + movdqa MSG0_A, MSG0_B + pxor MSG1_B, MSG1_B + pxor MSG2_B, MSG2_B + movdqa MSG3_A, MSG3_B + mov $2, FINAL_STEP + jmp .Lfinup2x_loop_have_bswapped_data + +.Lfinup2x_done: + // Write the two digests with all bytes in the correct order. + movdqa STATE0_A, TMP_A + movdqa STATE0_B, TMP_B + punpcklqdq STATE1_A, STATE0_A // GHEF + punpcklqdq STATE1_B, STATE0_B + punpckhqdq TMP_A, STATE1_A // ABCD + punpckhqdq TMP_B, STATE1_B + pshufd $0xB1, STATE0_A, STATE0_A // HGFE + pshufd $0xB1, STATE0_B, STATE0_B + pshufd $0x1B, STATE1_A, STATE1_A // DCBA + pshufd $0x1B, STATE1_B, STATE1_B + pshufb SHUF_MASK, STATE0_A + pshufb SHUF_MASK, STATE0_B + pshufb SHUF_MASK, STATE1_A + pshufb SHUF_MASK, STATE1_B + movdqu STATE0_A, 1*16(OUT1) + movdqu STATE0_B, 1*16(OUT2) + movdqu STATE1_A, 0*16(OUT1) + movdqu STATE1_B, 0*16(OUT2) + + mov %rbp, %rsp + pop %rbp + pop %rbx + RET +SYM_FUNC_END(sha256_ni_finup2x) + .section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index 669bc06538b6..ecea65b608db 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -8,6 +8,8 @@ #include #include +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha_ni); + DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); #define DEFINE_X86_SHA256_FN(c_fn, asm_fn) \ @@ -36,11 +38,48 @@ static void sha256_blocks(struct sha256_block_state *state, static_call(sha256_blocks_x86)(state, data, nblocks); } +static_assert(offsetof(struct __sha256_ctx, state) == 0); +static_assert(offsetof(struct __sha256_ctx, bytecount) == 32); +static_assert(offsetof(struct __sha256_ctx, buf) == 40); +asmlinkage void sha256_ni_finup2x(const struct __sha256_ctx *ctx, + const u8 *data1, const u8 *data2, int len, + u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]); + +#define sha256_finup_2x_arch sha256_finup_2x_arch +static bool sha256_finup_2x_arch(const struct __sha256_ctx *ctx, + const u8 *data1, const u8 *data2, size_t len, + u8 out1[SHA256_DIGEST_SIZE], + u8 out2[SHA256_DIGEST_SIZE]) +{ + /* + * The assembly requires len >= SHA256_BLOCK_SIZE && len <= INT_MAX. + * Further limit len to 65536 to avoid spending too long with preemption + * disabled. (Of course, in practice len is nearly always 4096 anyway.) + */ + if (static_branch_likely(&have_sha_ni) && len >= SHA256_BLOCK_SIZE && + len <= 65536 && likely(irq_fpu_usable())) { + kernel_fpu_begin(); + sha256_ni_finup2x(ctx, data1, data2, len, out1, out2); + kernel_fpu_end(); + kmsan_unpoison_memory(out1, SHA256_DIGEST_SIZE); + kmsan_unpoison_memory(out2, SHA256_DIGEST_SIZE); + return true; + } + return false; +} + +static bool sha256_finup_2x_is_optimized_arch(void) +{ + return static_key_enabled(&have_sha_ni); +} + #define sha256_mod_init_arch sha256_mod_init_arch static inline void sha256_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { static_call_update(sha256_blocks_x86, sha256_blocks_ni); + static_branch_enable(&have_sha_ni); } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { -- cgit v1.2.3 From 6733968be7cb0ce559c755f639c41629fa11761e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 15 Sep 2025 11:08:17 -0500 Subject: lib/crypto: tests: Add tests and benchmark for sha256_finup_2x() Update sha256_kunit to include test cases and a benchmark for the new sha256_finup_2x() function. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250915160819.140019-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/sha256_kunit.c | 184 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) (limited to 'lib') diff --git a/lib/crypto/tests/sha256_kunit.c b/lib/crypto/tests/sha256_kunit.c index 1cd4caee6010..dcedfca06df6 100644 --- a/lib/crypto/tests/sha256_kunit.c +++ b/lib/crypto/tests/sha256_kunit.c @@ -5,6 +5,7 @@ #include #include "sha256-testvecs.h" +/* Generate the HASH_KUNIT_CASES using hash-test-template.h. */ #define HASH sha256 #define HASH_CTX sha256_ctx #define HASH_SIZE SHA256_DIGEST_SIZE @@ -21,9 +22,192 @@ #define HMAC_USINGRAWKEY hmac_sha256_usingrawkey #include "hash-test-template.h" +static void free_guarded_buf(void *buf) +{ + vfree(buf); +} + +/* + * Allocate a KUnit-managed buffer that has length @len bytes immediately + * followed by an unmapped page, and assert that the allocation succeeds. + */ +static void *alloc_guarded_buf(struct kunit *test, size_t len) +{ + size_t full_len = round_up(len, PAGE_SIZE); + void *buf = vmalloc(full_len); + + KUNIT_ASSERT_NOT_NULL(test, buf); + KUNIT_ASSERT_EQ(test, 0, + kunit_add_action_or_reset(test, free_guarded_buf, buf)); + return buf + full_len - len; +} + +/* + * Test for sha256_finup_2x(). Specifically, choose various data lengths and + * salt lengths, and for each one, verify that sha256_finup_2x() produces the + * same results as sha256_update() and sha256_final(). + * + * Use guarded buffers for all inputs and outputs to reliably detect any + * out-of-bounds reads or writes, even if they occur in assembly code. + */ +static void test_sha256_finup_2x(struct kunit *test) +{ + const size_t max_data_len = 16384; + u8 *data1_buf, *data2_buf, *hash1, *hash2; + u8 expected_hash1[SHA256_DIGEST_SIZE]; + u8 expected_hash2[SHA256_DIGEST_SIZE]; + u8 salt[SHA256_BLOCK_SIZE]; + struct sha256_ctx *ctx; + + data1_buf = alloc_guarded_buf(test, max_data_len); + data2_buf = alloc_guarded_buf(test, max_data_len); + hash1 = alloc_guarded_buf(test, SHA256_DIGEST_SIZE); + hash2 = alloc_guarded_buf(test, SHA256_DIGEST_SIZE); + ctx = alloc_guarded_buf(test, sizeof(*ctx)); + + rand_bytes(data1_buf, max_data_len); + rand_bytes(data2_buf, max_data_len); + rand_bytes(salt, sizeof(salt)); + + for (size_t i = 0; i < 500; i++) { + size_t salt_len = rand_length(sizeof(salt)); + size_t data_len = rand_length(max_data_len); + const u8 *data1 = data1_buf + max_data_len - data_len; + const u8 *data2 = data2_buf + max_data_len - data_len; + struct sha256_ctx orig_ctx; + + sha256_init(ctx); + sha256_update(ctx, salt, salt_len); + orig_ctx = *ctx; + + sha256_finup_2x(ctx, data1, data2, data_len, hash1, hash2); + KUNIT_ASSERT_MEMEQ_MSG( + test, ctx, &orig_ctx, sizeof(*ctx), + "sha256_finup_2x() modified its ctx argument"); + + sha256_update(ctx, data1, data_len); + sha256_final(ctx, expected_hash1); + sha256_update(&orig_ctx, data2, data_len); + sha256_final(&orig_ctx, expected_hash2); + KUNIT_ASSERT_MEMEQ_MSG( + test, hash1, expected_hash1, SHA256_DIGEST_SIZE, + "Wrong hash1 with salt_len=%zu data_len=%zu", salt_len, + data_len); + KUNIT_ASSERT_MEMEQ_MSG( + test, hash2, expected_hash2, SHA256_DIGEST_SIZE, + "Wrong hash2 with salt_len=%zu data_len=%zu", salt_len, + data_len); + } +} + +/* Test sha256_finup_2x() with ctx == NULL */ +static void test_sha256_finup_2x_defaultctx(struct kunit *test) +{ + const size_t data_len = 128; + struct sha256_ctx ctx; + u8 hash1_a[SHA256_DIGEST_SIZE]; + u8 hash2_a[SHA256_DIGEST_SIZE]; + u8 hash1_b[SHA256_DIGEST_SIZE]; + u8 hash2_b[SHA256_DIGEST_SIZE]; + + rand_bytes(test_buf, 2 * data_len); + + sha256_init(&ctx); + sha256_finup_2x(&ctx, test_buf, &test_buf[data_len], data_len, hash1_a, + hash2_a); + + sha256_finup_2x(NULL, test_buf, &test_buf[data_len], data_len, hash1_b, + hash2_b); + + KUNIT_ASSERT_MEMEQ(test, hash1_a, hash1_b, SHA256_DIGEST_SIZE); + KUNIT_ASSERT_MEMEQ(test, hash2_a, hash2_b, SHA256_DIGEST_SIZE); +} + +/* + * Test that sha256_finup_2x() and sha256_update/final() produce consistent + * results with total message lengths that require more than 32 bits. + */ +static void test_sha256_finup_2x_hugelen(struct kunit *test) +{ + const size_t data_len = 4 * SHA256_BLOCK_SIZE; + struct sha256_ctx ctx = {}; + u8 expected_hash[SHA256_DIGEST_SIZE]; + u8 hash[SHA256_DIGEST_SIZE]; + + rand_bytes(test_buf, data_len); + for (size_t align = 0; align < SHA256_BLOCK_SIZE; align++) { + sha256_init(&ctx); + ctx.ctx.bytecount = 0x123456789abcd00 + align; + + sha256_finup_2x(&ctx, test_buf, test_buf, data_len, hash, hash); + + sha256_update(&ctx, test_buf, data_len); + sha256_final(&ctx, expected_hash); + + KUNIT_ASSERT_MEMEQ(test, hash, expected_hash, + SHA256_DIGEST_SIZE); + } +} + +/* Benchmark for sha256_finup_2x() */ +static void benchmark_sha256_finup_2x(struct kunit *test) +{ + /* + * Try a few different salt lengths, since sha256_finup_2x() performance + * may vary slightly for the same data_len depending on how many bytes + * were already processed in the initial context. + */ + static const size_t salt_lens_to_test[] = { 0, 32, 64 }; + const size_t data_len = 4096; + const size_t num_iters = 4096; + struct sha256_ctx ctx; + u8 hash1[SHA256_DIGEST_SIZE]; + u8 hash2[SHA256_DIGEST_SIZE]; + + if (!IS_ENABLED(CONFIG_CRYPTO_LIB_BENCHMARK)) + kunit_skip(test, "not enabled"); + if (!sha256_finup_2x_is_optimized()) + kunit_skip(test, "not relevant"); + + rand_bytes(test_buf, data_len * 2); + + /* Warm-up */ + for (size_t i = 0; i < num_iters; i++) + sha256_finup_2x(NULL, &test_buf[0], &test_buf[data_len], + data_len, hash1, hash2); + + for (size_t i = 0; i < ARRAY_SIZE(salt_lens_to_test); i++) { + size_t salt_len = salt_lens_to_test[i]; + u64 t0, t1; + + /* + * Prepare the initial context. The time to process the salt is + * not measured; we're just interested in sha256_finup_2x(). + */ + sha256_init(&ctx); + sha256_update(&ctx, test_buf, salt_len); + + preempt_disable(); + t0 = ktime_get_ns(); + for (size_t j = 0; j < num_iters; j++) + sha256_finup_2x(&ctx, &test_buf[0], &test_buf[data_len], + data_len, hash1, hash2); + t1 = ktime_get_ns(); + preempt_enable(); + kunit_info(test, "data_len=%zu salt_len=%zu: %llu MB/s", + data_len, salt_len, + div64_u64((u64)data_len * 2 * num_iters * 1000, + t1 - t0 ?: 1)); + } +} + static struct kunit_case hash_test_cases[] = { HASH_KUNIT_CASES, + KUNIT_CASE(test_sha256_finup_2x), + KUNIT_CASE(test_sha256_finup_2x_defaultctx), + KUNIT_CASE(test_sha256_finup_2x_hugelen), KUNIT_CASE(benchmark_hash), + KUNIT_CASE(benchmark_sha256_finup_2x), {}, }; -- cgit v1.2.3 From 1e338f4d99e6814ede16bad1db1cc463aad8032c Mon Sep 17 00:00:00 2001 From: Sabyrzhan Tasbolatov Date: Sun, 10 Aug 2025 17:57:45 +0500 Subject: kasan: introduce ARCH_DEFER_KASAN and unify static key across modes Patch series "kasan: unify kasan_enabled() and remove arch-specific implementations", v6. This patch series addresses the fragmentation in KASAN initialization across architectures by introducing a unified approach that eliminates duplicate static keys and arch-specific kasan_arch_is_ready() implementations. The core issue is that different architectures have inconsistent approaches to KASAN readiness tracking: - PowerPC, LoongArch, and UML arch, each implement own kasan_arch_is_ready() - Only HW_TAGS mode had a unified static key (kasan_flag_enabled) - Generic and SW_TAGS modes relied on arch-specific solutions or always-on behavior This patch (of 2): Introduce CONFIG_ARCH_DEFER_KASAN to identify architectures [1] that need to defer KASAN initialization until shadow memory is properly set up, and unify the static key infrastructure across all KASAN modes. [1] PowerPC, UML, LoongArch selects ARCH_DEFER_KASAN. The core issue is that different architectures haveinconsistent approaches to KASAN readiness tracking: - PowerPC, LoongArch, and UML arch, each implement own kasan_arch_is_ready() - Only HW_TAGS mode had a unified static key (kasan_flag_enabled) - Generic and SW_TAGS modes relied on arch-specific solutions or always-on behavior This patch addresses the fragmentation in KASAN initialization across architectures by introducing a unified approach that eliminates duplicate static keys and arch-specific kasan_arch_is_ready() implementations. Let's replace kasan_arch_is_ready() with existing kasan_enabled() check, which examines the static key being enabled if arch selects ARCH_DEFER_KASAN or has HW_TAGS mode support. For other arch, kasan_enabled() checks the enablement during compile time. Now KASAN users can use a single kasan_enabled() check everywhere. Link: https://lkml.kernel.org/r/20250810125746.1105476-1-snovitoll@gmail.com Link: https://lkml.kernel.org/r/20250810125746.1105476-2-snovitoll@gmail.com Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217049 Signed-off-by: Sabyrzhan Tasbolatov Reviewed-by: Christophe Leroy Reviewed-by: Ritesh Harjani (IBM) #powerpc Cc: Alexander Gordeev Cc: Alexander Potapenko Cc: Alexandre Ghiti Cc: Alexandre Ghiti Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Baoquan He Cc: David Gow Cc: Dmitriy Vyukov Cc: Heiko Carstens Cc: Huacai Chen Cc: Marco Elver Cc: Qing Zhang Cc: Sabyrzhan Tasbolatov Cc: Vincenzo Frascino Signed-off-by: Andrew Morton --- lib/Kconfig.kasan | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index f82889a830fa..a4bb610a7a6f 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -19,6 +19,18 @@ config ARCH_DISABLE_KASAN_INLINE Disables both inline and stack instrumentation. Selected by architectures that do not support these instrumentation types. +config ARCH_NEEDS_DEFER_KASAN + bool + +config ARCH_DEFER_KASAN + def_bool y + depends on KASAN && ARCH_NEEDS_DEFER_KASAN + help + Architectures should select this if they need to defer KASAN + initialization until shadow memory is properly set up. This + enables runtime control via static keys. Otherwise, KASAN uses + compile-time constants for better performance. + config CC_HAS_KASAN_GENERIC def_bool $(cc-option, -fsanitize=kernel-address) -- cgit v1.2.3 From 123bcf284205b2513c4172c50da8d193f8f8ab3d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 15 Sep 2025 14:27:54 -0700 Subject: alloc_tag: use release_pages() in the cleanup path Patch series "Minor fixes for memory allocation profiling", v2. Over the last couple months I gathered a few reports of minor issues in memory allocation profiling which are addressed in this patchset. This patch (of 2): When bulk-freeing an array of pages use release_pages() instead of freeing them page-by-page. Link: https://lkml.kernel.org/r/20250915212756.3998938-1-surenb@google.com Link: https://lkml.kernel.org/r/20250915212756.3998938-2-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Andrew Morton Suggested-by: Usama Arif Acked-by: Shakeel Butt Acked-by: Usama Arif Cc: David Wang <00107082@163.com> Cc: Johannes Weiner Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index e9b33848700a..715315f5d9ba 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -438,9 +438,10 @@ static int vm_module_tags_populate(void) if (nr < more_pages || vmap_pages_range(phys_end, phys_end + (nr << PAGE_SHIFT), PAGE_KERNEL, next_page, PAGE_SHIFT) < 0) { + release_pages_arg arg = { .pages = next_page }; + /* Clean up and error out */ - for (int i = 0; i < nr; i++) - __free_page(next_page[i]); + release_pages(arg, nr); return -ENOMEM; } @@ -682,11 +683,10 @@ static int __init alloc_mod_tags_mem(void) static void __init free_mod_tags_mem(void) { - int i; + release_pages_arg arg = { .pages = vm_module_tags->pages }; module_tags.start_addr = 0; - for (i = 0; i < vm_module_tags->nr_pages; i++) - __free_page(vm_module_tags->pages[i]); + release_pages(arg, vm_module_tags->nr_pages); kfree(vm_module_tags->pages); free_vm_area(vm_module_tags); } -- cgit v1.2.3 From 9e8a0bbb128ec9379ce271ccecdfb022c483da0b Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 15 Sep 2025 14:27:55 -0700 Subject: alloc_tag: prevent enabling memory profiling if it was shut down Memory profiling can be shut down due to reasons like a failure during initialization. When this happens, the user should not be able to re-enable it. Current sysctrl interface does not handle this properly and will allow re-enabling memory profiling. Fix this by checking for this condition during sysctrl write operation. Link: https://lkml.kernel.org/r/20250915212756.3998938-3-surenb@google.com Signed-off-by: Suren Baghdasaryan Acked-by: Shakeel Butt Acked-by: Usama Arif Cc: David Wang <00107082@163.com> Cc: Johannes Weiner Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 715315f5d9ba..f79217427d81 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -766,6 +766,20 @@ struct page_ext_operations page_alloc_tagging_ops = { EXPORT_SYMBOL(page_alloc_tagging_ops); #ifdef CONFIG_SYSCTL +/* + * Not using proc_do_static_key() directly to prevent enabling profiling + * after it was shut down. + */ +static int proc_mem_profiling_handler(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + if (!mem_profiling_support && write) + return -EINVAL; + + return proc_do_static_key(table, write, buffer, lenp, ppos); +} + + static struct ctl_table memory_allocation_profiling_sysctls[] = { { .procname = "mem_profiling", @@ -775,7 +789,7 @@ static struct ctl_table memory_allocation_profiling_sysctls[] = { #else .mode = 0644, #endif - .proc_handler = proc_do_static_key, + .proc_handler = proc_mem_profiling_handler, }, }; -- cgit v1.2.3 From b9e2f58ffb84afcbba7e66f96ca14f98e0e88f26 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Mon, 15 Sep 2025 16:02:24 -0700 Subject: alloc_tag: mark inaccurate allocation counters in /proc/allocinfo output While rare, memory allocation profiling can contain inaccurate counters if slab object extension vector allocation fails. That allocation might succeed later but prior to that, slab allocations that would have used that object extension vector will not be accounted for. To indicate incorrect counters, "accurate:no" marker is appended to the call site line in the /proc/allocinfo output. Bump up /proc/allocinfo version to reflect the change in the file format and update documentation. Example output with invalid counters: allocinfo - version: 2.0 0 0 arch/x86/kernel/kdebugfs.c:105 func:create_setup_data_nodes 0 0 arch/x86/kernel/alternative.c:2090 func:alternatives_smp_module_add 0 0 arch/x86/kernel/alternative.c:127 func:__its_alloc accurate:no 0 0 arch/x86/kernel/fpu/regset.c:160 func:xstateregs_set 0 0 arch/x86/kernel/fpu/xstate.c:1590 func:fpstate_realloc 0 0 arch/x86/kernel/cpu/aperfmperf.c:379 func:arch_enable_hybrid_capacity_scale 0 0 arch/x86/kernel/cpu/amd_cache_disable.c:258 func:init_amd_l3_attrs 49152 48 arch/x86/kernel/cpu/mce/core.c:2709 func:mce_device_create accurate:no 32768 1 arch/x86/kernel/cpu/mce/genpool.c:132 func:mce_gen_pool_create 0 0 arch/x86/kernel/cpu/mce/amd.c:1341 func:mce_threshold_create_device [surenb@google.com: document new "accurate:no" marker] Fixes: 39d117e04d15 ("alloc_tag: mark inaccurate allocation counters in /proc/allocinfo output") [akpm@linux-foundation.org: simplification per Usama, reflow text] [akpm@linux-foundation.org: add newline to prevent docs warning, per Randy] Link: https://lkml.kernel.org/r/20250915230224.4115531-1-surenb@google.com Signed-off-by: Suren Baghdasaryan Suggested-by: Johannes Weiner Acked-by: Shakeel Butt Acked-by: Usama Arif Acked-by: Johannes Weiner Cc: David Rientjes Cc: David Wang <00107082@163.com> Cc: Kent Overstreet Cc: Pasha Tatashin Cc: Roman Gushchin Cc: Sourav Panda Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- lib/alloc_tag.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index f79217427d81..3ef702e6b69a 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -80,7 +80,7 @@ static void allocinfo_stop(struct seq_file *m, void *arg) static void print_allocinfo_header(struct seq_buf *buf) { /* Output format version, so we can change it. */ - seq_buf_printf(buf, "allocinfo - version: 1.0\n"); + seq_buf_printf(buf, "allocinfo - version: 2.0\n"); seq_buf_printf(buf, "# \n"); } @@ -92,6 +92,8 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); codetag_to_text(out, ct); + if (unlikely(alloc_tag_is_inaccurate(tag))) + seq_buf_printf(out, " accurate:no"); seq_buf_putc(out, ' '); seq_buf_putc(out, '\n'); } -- cgit v1.2.3 From 38cc91db2e87ab55bfca2b194e791867b77f9e55 Mon Sep 17 00:00:00 2001 From: Burak Emir Date: Mon, 8 Sep 2025 07:21:54 +0000 Subject: rust: add find_bit_benchmark_rust module. Microbenchmark protected by a config FIND_BIT_BENCHMARK_RUST, following `find_bit_benchmark.c` but testing the Rust Bitmap API. We add a fill_random() method protected by the config in order to maintain the abstraction. The sample output from the benchmark, both C and Rust version: find_bit_benchmark.c output: ``` Start testing find_bit() with random-filled bitmap [ 438.101937] find_next_bit: 860188 ns, 163419 iterations [ 438.109471] find_next_zero_bit: 912342 ns, 164262 iterations [ 438.116820] find_last_bit: 726003 ns, 163419 iterations [ 438.130509] find_nth_bit: 7056993 ns, 16269 iterations [ 438.139099] find_first_bit: 1963272 ns, 16270 iterations [ 438.173043] find_first_and_bit: 27314224 ns, 32654 iterations [ 438.180065] find_next_and_bit: 398752 ns, 73705 iterations [ 438.186689] Start testing find_bit() with sparse bitmap [ 438.193375] find_next_bit: 9675 ns, 656 iterations [ 438.201765] find_next_zero_bit: 1766136 ns, 327025 iterations [ 438.208429] find_last_bit: 9017 ns, 656 iterations [ 438.217816] find_nth_bit: 2749742 ns, 655 iterations [ 438.225168] find_first_bit: 721799 ns, 656 iterations [ 438.231797] find_first_and_bit: 2819 ns, 1 iterations [ 438.238441] find_next_and_bit: 3159 ns, 1 iterations ``` find_bit_benchmark_rust.rs output: ``` [ 451.182459] find_bit_benchmark_rust: [ 451.186688] Start testing find_bit() Rust with random-filled bitmap [ 451.194450] next_bit: 777950 ns, 163644 iterations [ 451.201997] next_zero_bit: 918889 ns, 164036 iterations [ 451.208642] Start testing find_bit() Rust with sparse bitmap [ 451.214300] next_bit: 9181 ns, 654 iterations [ 451.222806] next_zero_bit: 1855504 ns, 327026 iterations ``` Here are the results from 32 samples, with 95% confidence interval. The microbenchmark was built with RUST_BITMAP_HARDENED=n and run on a machine that did not execute other processes. Random-filled bitmap: +-----------+-------+-----------+--------------+-----------+-----------+ | Benchmark | Lang | Mean (ms) | Std Dev (ms) | 95% CI Lo | 95% CI Hi | +-----------+-------+-----------+--------------+-----------+-----------+ | find_bit/ | C | 825.07 | 53.89 | 806.40 | 843.74 | | next_bit | Rust | 870.91 | 46.29 | 854.88 | 886.95 | +-----------+-------+-----------+--------------+-----------+-----------+ | find_zero/| C | 933.56 | 56.34 | 914.04 | 953.08 | | next_zero | Rust | 945.85 | 60.44 | 924.91 | 966.79 | +-----------+-------+-----------+--------------+-----------+-----------+ Rust appears 5.5% slower for next_bit, 1.3% slower for next_zero. Sparse bitmap: +-----------+-------+-----------+--------------+-----------+-----------+ | Benchmark | Lang | Mean (ms) | Std Dev (ms) | 95% CI Lo | 95% CI Hi | +-----------+-------+-----------+--------------+-----------+-----------+ | find_bit/ | C | 13.17 | 6.21 | 11.01 | 15.32 | | next_bit | Rust | 14.30 | 8.27 | 11.43 | 17.17 | +-----------+-------+-----------+--------------+-----------+-----------+ | find_zero/| C | 1859.31 | 82.30 | 1830.80 | 1887.83 | | next_zero | Rust | 1908.09 | 139.82 | 1859.65 | 1956.54 | +-----------+-------+-----------+--------------+-----------+-----------+ Rust appears 8.5% slower for next_bit, 2.6% slower for next_zero. In summary, taking the arithmetic mean of all slow-downs, we can say the Rust API has a 4.5% slowdown. Suggested-by: Alice Ryhl Suggested-by: Yury Norov (NVIDIA) Reviewed-by: Yury Norov (NVIDIA) Reviewed-by: Alice Ryhl Signed-off-by: Burak Emir Signed-off-by: Yury Norov (NVIDIA) --- lib/Kconfig.debug | 13 ++++++ lib/Makefile | 1 + lib/find_bit_benchmark_rust.rs | 104 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 lib/find_bit_benchmark_rust.rs (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dc0e0c6ed075..386232d81a0e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2607,6 +2607,19 @@ config FIND_BIT_BENCHMARK If unsure, say N. +config FIND_BIT_BENCHMARK_RUST + tristate "Test find_bit functions in Rust" + depends on RUST + help + This builds the "find_bit_benchmark_rust" module. It is a micro + benchmark that measures the performance of Rust functions that + correspond to the find_*_bit() operations in C. It follows the + FIND_BIT_BENCHMARK closely but will in general not yield same + numbers due to extra bounds checks and overhead of foreign + function calls. + + If unsure, say N. + config TEST_FIRMWARE tristate "Test firmware loading via userspace interface" depends on FW_LOADER diff --git a/lib/Makefile b/lib/Makefile index 392ff808c9b9..96a83b937a60 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -62,6 +62,7 @@ obj-y += hexdump.o obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o +obj-$(CONFIG_FIND_BIT_BENCHMARK_RUST) += find_bit_benchmark_rust.o obj-$(CONFIG_TEST_BPF) += test_bpf.o test_dhry-objs := dhry_1.o dhry_2.o dhry_run.o obj-$(CONFIG_TEST_DHRY) += test_dhry.o diff --git a/lib/find_bit_benchmark_rust.rs b/lib/find_bit_benchmark_rust.rs new file mode 100644 index 000000000000..6bdc51de2f30 --- /dev/null +++ b/lib/find_bit_benchmark_rust.rs @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +//! Benchmark for find_bit-like methods in Bitmap Rust API. + +use kernel::alloc::flags::GFP_KERNEL; +use kernel::bindings; +use kernel::bitmap::BitmapVec; +use kernel::error::{code, Result}; +use kernel::prelude::module; +use kernel::time::{Instant, Monotonic}; +use kernel::ThisModule; +use kernel::{pr_cont, pr_err}; + +const BITMAP_LEN: usize = 4096 * 8 * 10; +// Reciprocal of the fraction of bits that are set in sparse bitmap. +const SPARSENESS: usize = 500; + +/// Test module that benchmarks performance of traversing bitmaps. +struct Benchmark(); + +fn test_next_bit(bitmap: &BitmapVec) { + let time = Instant::::now(); + let mut cnt = 0; + let mut i = 0; + + while let Some(index) = bitmap.next_bit(i) { + cnt += 1; + i = index + 1; + // CONFIG_RUST_BITMAP_HARDENED enforces strict bounds. + if i == BITMAP_LEN { + break; + } + } + + let delta = time.elapsed(); + pr_cont!( + "\nnext_bit: {:18} ns, {:6} iterations", + delta.as_nanos(), + cnt + ); +} + +fn test_next_zero_bit(bitmap: &BitmapVec) { + let time = Instant::::now(); + let mut cnt = 0; + let mut i = 0; + + while let Some(index) = bitmap.next_zero_bit(i) { + cnt += 1; + i = index + 1; + // CONFIG_RUST_BITMAP_HARDENED enforces strict bounds. + if i == BITMAP_LEN { + break; + } + } + + let delta = time.elapsed(); + pr_cont!( + "\nnext_zero_bit: {:18} ns, {:6} iterations", + delta.as_nanos(), + cnt + ); +} + +fn find_bit_test() { + pr_err!("Benchmark"); + pr_cont!("\nStart testing find_bit() Rust with random-filled bitmap"); + + let mut bitmap = BitmapVec::new(BITMAP_LEN, GFP_KERNEL).expect("alloc bitmap failed"); + bitmap.fill_random(); + + test_next_bit(&bitmap); + test_next_zero_bit(&bitmap); + + pr_cont!("\nStart testing find_bit() Rust with sparse bitmap"); + + let mut bitmap = BitmapVec::new(BITMAP_LEN, GFP_KERNEL).expect("alloc sparse bitmap failed"); + let nbits = BITMAP_LEN / SPARSENESS; + for _i in 0..nbits { + // SAFETY: __get_random_u32_below is safe to call with any u32 argument. + let bit = + unsafe { bindings::__get_random_u32_below(BITMAP_LEN.try_into().unwrap()) as usize }; + bitmap.set_bit(bit); + } + + test_next_bit(&bitmap); + test_next_zero_bit(&bitmap); + pr_cont!("\n"); +} + +impl kernel::Module for Benchmark { + fn init(_module: &'static ThisModule) -> Result { + find_bit_test(); + // Return error so test module can be inserted again without rmmod. + Err(code::EINVAL) + } +} + +module! { + type: Benchmark, + name: "find_bit_benchmark_rust", + authors: ["Burak Emir "], + description: "Module with benchmark for bitmap Rust API", + license: "GPL v2", +} -- cgit v1.2.3 From 04ae01a80df616b0998bb6a81c8db310f3aef102 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 11 Sep 2025 01:23:51 +0200 Subject: lib/decompress: use designated initializers for struct compress_format Switch 'compressed_formats[]' to the more modern and flexible designated initializers. This improves readability and allows struct fields to be reordered. Also use a more concise sentinel marker. Remove the curly braces around the for loop while we're at it. No functional changes intended. Link: https://lkml.kernel.org/r/20250910232350.1308206-2-thorsten.blum@linux.dev Signed-off-by: Thorsten Blum Reviewed-by: Kuan-Wei Chiu Cc: David Sterba Cc: Nick Terrell Signed-off-by: Andrew Morton --- lib/decompress.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/decompress.c b/lib/decompress.c index ab3fc90ffc64..7785471586c6 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -49,15 +49,15 @@ struct compress_format { }; static const struct compress_format compressed_formats[] __initconst = { - { {0x1f, 0x8b}, "gzip", gunzip }, - { {0x1f, 0x9e}, "gzip", gunzip }, - { {0x42, 0x5a}, "bzip2", bunzip2 }, - { {0x5d, 0x00}, "lzma", unlzma }, - { {0xfd, 0x37}, "xz", unxz }, - { {0x89, 0x4c}, "lzo", unlzo }, - { {0x02, 0x21}, "lz4", unlz4 }, - { {0x28, 0xb5}, "zstd", unzstd }, - { {0, 0}, NULL, NULL } + { .magic = {0x1f, 0x8b}, .name = "gzip", .decompressor = gunzip }, + { .magic = {0x1f, 0x9e}, .name = "gzip", .decompressor = gunzip }, + { .magic = {0x42, 0x5a}, .name = "bzip2", .decompressor = bunzip2 }, + { .magic = {0x5d, 0x00}, .name = "lzma", .decompressor = unlzma }, + { .magic = {0xfd, 0x37}, .name = "xz", .decompressor = unxz }, + { .magic = {0x89, 0x4c}, .name = "lzo", .decompressor = unlzo }, + { .magic = {0x02, 0x21}, .name = "lz4", .decompressor = unlz4 }, + { .magic = {0x28, 0xb5}, .name = "zstd", .decompressor = unzstd }, + { /* sentinel */ } }; decompress_fn __init decompress_method(const unsigned char *inbuf, long len, @@ -73,11 +73,10 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, long len, pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]); - for (cf = compressed_formats; cf->name; cf++) { + for (cf = compressed_formats; cf->name; cf++) if (!memcmp(inbuf, cf->magic, 2)) break; - } if (name) *name = cf->name; return cf->decompressor; -- cgit v1.2.3 From 23ef9d439769d5f35353650e771c63d13824235b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 23 Sep 2025 14:34:19 -0700 Subject: kcfi: Rename CONFIG_CFI_CLANG to CONFIG_CFI The kernel's CFI implementation uses the KCFI ABI specifically, and is not strictly tied to a particular compiler. In preparation for GCC supporting KCFI, rename CONFIG_CFI_CLANG to CONFIG_CFI (along with associated options). Use new "transitional" Kconfig option for old CONFIG_CFI_CLANG that will enable CONFIG_CFI during olddefconfig. Reviewed-by: Linus Walleij Reviewed-by: Nathan Chancellor Link: https://lore.kernel.org/r/20250923213422.1105654-3-kees@kernel.org Signed-off-by: Kees Cook --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dc0e0c6ed075..e3e69df19e78 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2894,7 +2894,7 @@ config FORTIFY_KUNIT_TEST config LONGEST_SYM_KUNIT_TEST tristate "Test the longest symbol possible" if !KUNIT_ALL_TESTS depends on KUNIT && KPROBES - depends on !PREFIX_SYMBOLS && !CFI_CLANG && !GCOV_KERNEL + depends on !PREFIX_SYMBOLS && !CFI && !GCOV_KERNEL default KUNIT_ALL_TESTS help Tests the longest symbol possible -- cgit v1.2.3 From 1daf37592a050da046a03f78b20abb2a91f6d934 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Sep 2025 11:43:04 +0200 Subject: panic: remove CONFIG_PANIC_ON_OOPS_VALUE There's really no need for this since it's 0 or 1 when CONFIG_PANIC_ON_OOPS is disabled/enabled, so just use IS_ENABLED() instead. The extra symbol goes back to the original code adding it in commit 2a01bb3885c9 ("panic: Make panic_on_oops configurable"). Link: https://lkml.kernel.org/r/20250924094303.18521-2-johannes@sipsolutions.net Signed-off-by: Johannes Berg Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index dc0e0c6ed075..30761648e2de 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1067,12 +1067,6 @@ config PANIC_ON_OOPS Say N if unsure. -config PANIC_ON_OOPS_VALUE - int - range 0 1 - default 0 if !PANIC_ON_OOPS - default 1 if PANIC_ON_OOPS - config PANIC_TIMEOUT int "panic timeout" default 0 -- cgit v1.2.3 From 1260cbcffa608219fc9188a6cbe9c45a300ef8b5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 24 Sep 2025 10:02:07 +0200 Subject: lib/genalloc: fix device leak in of_gen_pool_get() Make sure to drop the reference taken when looking up the genpool platform device in of_gen_pool_get() before returning the pool. Note that holding a reference to a device does typically not prevent its devres managed resources from being released so there is no point in keeping the reference. Link: https://lkml.kernel.org/r/20250924080207.18006-1-johan@kernel.org Fixes: 9375db07adea ("genalloc: add devres support, allow to find a managed pool by device") Signed-off-by: Johan Hovold Cc: Philipp Zabel Cc: Vladimir Zapolskiy Cc: [3.10+] Signed-off-by: Andrew Morton --- lib/genalloc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/genalloc.c b/lib/genalloc.c index 4fa5635bf81b..841f29783833 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -899,8 +899,11 @@ struct gen_pool *of_gen_pool_get(struct device_node *np, if (!name) name = of_node_full_name(np_pool); } - if (pdev) + if (pdev) { pool = gen_pool_get(&pdev->dev, name); + put_device(&pdev->dev); + } + of_node_put(np_pool); return pool; -- cgit v1.2.3 From a8541957e7b18921dbf5d887780e8eaeb04916cf Mon Sep 17 00:00:00 2001 From: Qianfeng Rong Date: Mon, 4 Aug 2025 20:56:57 +0800 Subject: maple_tree: remove redundant __GFP_NOWARN Commit 16f5dfbc851b ("gfp: include __GFP_NOWARN in GFP_NOWAIT") made GFP_NOWAIT implicitly include __GFP_NOWARN. Therefore, explicit __GFP_NOWARN combined with GFP_NOWAIT (e.g., `GFP_NOWAIT | __GFP_NOWARN`) is now redundant. Let's clean up these redundant flags across subsystems. No functional changes. Link: https://lkml.kernel.org/r/20250804125657.482109-1-rongqianfeng@vivo.com Signed-off-by: Qianfeng Rong Reviewed-by: Wei Yang Reviewed-by: Liam R. Howlett Cc: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b4ee2d29d7a9..38fb68c08291 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1344,11 +1344,11 @@ static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) * @mas: The maple state * @count: The number of nodes needed * - * Note: Uses GFP_NOWAIT | __GFP_NOWARN for gfp flags. + * Note: Uses GFP_NOWAIT for gfp flags. */ static void mas_node_count(struct ma_state *mas, int count) { - return mas_node_count_gfp(mas, count, GFP_NOWAIT | __GFP_NOWARN); + return mas_node_count_gfp(mas, count, GFP_NOWAIT); } /* -- cgit v1.2.3 From e3852a1213ffc6fbd89c768e7c54a360652648b8 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Thu, 28 Aug 2025 10:53:45 -0400 Subject: maple_tree: Drop bulk insert support Bulk insert mode was added to facilitate forking faster, but forking now uses __mt_dup() to duplicate the tree. The addition of sheaves has made the bulk allocations difficult to maintain - since the expected entries would preallocate into the maple state. A big part of the maple state node allocation was the ability to push nodes back onto the state for later use, which was essential to the bulk insert algorithm. Remove mas_expected_entries() and mas_destroy_rebalance() functions as well as the MA_STATE_BULK and MA_STATE_REBALANCE maple state flags since there are no users anymore. Drop the associated testing as well. Signed-off-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 270 +------------------------------------------------- lib/test_maple_tree.c | 137 ------------------------- 2 files changed, 4 insertions(+), 403 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 38fb68c08291..4f0e30b57b0c 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -83,13 +83,9 @@ /* * Maple state flags - * * MA_STATE_BULK - Bulk insert mode - * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation */ -#define MA_STATE_BULK 1 -#define MA_STATE_REBALANCE 2 -#define MA_STATE_PREALLOC 4 +#define MA_STATE_PREALLOC 1 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) @@ -1031,24 +1027,6 @@ static inline void mas_descend(struct ma_state *mas) mas->node = mas_slot(mas, slots, mas->offset); } -/* - * mte_set_gap() - Set a maple node gap. - * @mn: The encoded maple node - * @gap: The offset of the gap to set - * @val: The gap value - */ -static inline void mte_set_gap(const struct maple_enode *mn, - unsigned char gap, unsigned long val) -{ - switch (mte_node_type(mn)) { - default: - break; - case maple_arange_64: - mte_to_node(mn)->ma64.gap[gap] = val; - break; - } -} - /* * mas_ascend() - Walk up a level of the tree. * @mas: The maple state @@ -1878,21 +1856,7 @@ static inline int mab_calc_split(struct ma_state *mas, * end on a NULL entry, with the exception of the left-most leaf. The * limitation means that the split of a node must be checked for this condition * and be able to put more data in one direction or the other. - */ - if (unlikely((mas->mas_flags & MA_STATE_BULK))) { - *mid_split = 0; - split = b_end - mt_min_slots[bn->type]; - - if (!ma_is_leaf(bn->type)) - return split; - - mas->mas_flags |= MA_STATE_REBALANCE; - if (!bn->slot[split]) - split--; - return split; - } - - /* + * * Although extremely rare, it is possible to enter what is known as the 3-way * split scenario. The 3-way split comes about by means of a store of a range * that overwrites the end and beginning of two full nodes. The result is a set @@ -2039,27 +2003,6 @@ static inline void mab_mas_cp(struct maple_big_node *b_node, } } -/* - * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. - * @mas: The maple state - * @end: The maple node end - * @mt: The maple node type - */ -static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, - enum maple_type mt) -{ - if (!(mas->mas_flags & MA_STATE_BULK)) - return; - - if (mte_is_root(mas->node)) - return; - - if (end > mt_min_slots[mt]) { - mas->mas_flags &= ~MA_STATE_REBALANCE; - return; - } -} - /* * mas_store_b_node() - Store an @entry into the b_node while also copying the * data from a maple encoded node. @@ -2109,9 +2052,6 @@ static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, /* Handle new range ending before old range ends */ piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); if (piv > mas->last) { - if (piv == ULONG_MAX) - mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); - if (offset_end != slot) wr_mas->content = mas_slot_locked(mas, wr_mas->slots, offset_end); @@ -3011,126 +2951,6 @@ static inline void mas_rebalance(struct ma_state *mas, return mas_spanning_rebalance(mas, &mast, empty_count); } -/* - * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple - * state. - * @mas: The maple state - * @end: The end of the left-most node. - * - * During a mass-insert event (such as forking), it may be necessary to - * rebalance the left-most node when it is not sufficient. - */ -static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) -{ - enum maple_type mt = mte_node_type(mas->node); - struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; - struct maple_enode *eparent, *old_eparent; - unsigned char offset, tmp, split = mt_slots[mt] / 2; - void __rcu **l_slots, **slots; - unsigned long *l_pivs, *pivs, gap; - bool in_rcu = mt_in_rcu(mas->tree); - unsigned char new_height = mas_mt_height(mas); - - MA_STATE(l_mas, mas->tree, mas->index, mas->last); - - l_mas = *mas; - mas_prev_sibling(&l_mas); - - /* set up node. */ - if (in_rcu) { - newnode = mas_pop_node(mas); - } else { - newnode = &reuse; - } - - node = mas_mn(mas); - newnode->parent = node->parent; - slots = ma_slots(newnode, mt); - pivs = ma_pivots(newnode, mt); - left = mas_mn(&l_mas); - l_slots = ma_slots(left, mt); - l_pivs = ma_pivots(left, mt); - if (!l_slots[split]) - split++; - tmp = mas_data_end(&l_mas) - split; - - memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); - memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); - pivs[tmp] = l_mas.max; - memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); - memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); - - l_mas.max = l_pivs[split]; - mas->min = l_mas.max + 1; - old_eparent = mt_mk_node(mte_parent(l_mas.node), - mas_parent_type(&l_mas, l_mas.node)); - tmp += end; - if (!in_rcu) { - unsigned char max_p = mt_pivots[mt]; - unsigned char max_s = mt_slots[mt]; - - if (tmp < max_p) - memset(pivs + tmp, 0, - sizeof(unsigned long) * (max_p - tmp)); - - if (tmp < mt_slots[mt]) - memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); - - memcpy(node, newnode, sizeof(struct maple_node)); - ma_set_meta(node, mt, 0, tmp - 1); - mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), - l_pivs[split]); - - /* Remove data from l_pivs. */ - tmp = split + 1; - memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); - memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); - ma_set_meta(left, mt, 0, split); - eparent = old_eparent; - - goto done; - } - - /* RCU requires replacing both l_mas, mas, and parent. */ - mas->node = mt_mk_node(newnode, mt); - ma_set_meta(newnode, mt, 0, tmp); - - new_left = mas_pop_node(mas); - new_left->parent = left->parent; - mt = mte_node_type(l_mas.node); - slots = ma_slots(new_left, mt); - pivs = ma_pivots(new_left, mt); - memcpy(slots, l_slots, sizeof(void *) * split); - memcpy(pivs, l_pivs, sizeof(unsigned long) * split); - ma_set_meta(new_left, mt, 0, split); - l_mas.node = mt_mk_node(new_left, mt); - - /* replace parent. */ - offset = mte_parent_slot(mas->node); - mt = mas_parent_type(&l_mas, l_mas.node); - parent = mas_pop_node(mas); - slots = ma_slots(parent, mt); - pivs = ma_pivots(parent, mt); - memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); - rcu_assign_pointer(slots[offset], mas->node); - rcu_assign_pointer(slots[offset - 1], l_mas.node); - pivs[offset - 1] = l_mas.max; - eparent = mt_mk_node(parent, mt); -done: - gap = mas_leaf_max_gap(mas); - mte_set_gap(eparent, mte_parent_slot(mas->node), gap); - gap = mas_leaf_max_gap(&l_mas); - mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); - mas_ascend(mas); - - if (in_rcu) { - mas_replace_node(mas, old_eparent, new_height); - mas_adopt_children(mas, mas->node); - } - - mas_update_gap(mas); -} - /* * mas_split_final_node() - Split the final node in a subtree operation. * @mast: the maple subtree state @@ -3837,8 +3657,6 @@ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, if (mas->last == wr_mas->end_piv) offset_end++; /* don't copy this offset */ - else if (unlikely(wr_mas->r_max == ULONG_MAX)) - mas_bulk_rebalance(mas, mas->end, wr_mas->type); /* set up node. */ if (in_rcu) { @@ -4255,7 +4073,7 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) new_end = mas_wr_new_end(wr_mas); /* Potential spanning rebalance collapsing a node */ if (new_end < mt_min_slots[wr_mas->type]) { - if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) + if (!mte_is_root(mas->node)) return wr_rebalance; return wr_node_store; } @@ -5562,25 +5380,7 @@ void mas_destroy(struct ma_state *mas) struct maple_alloc *node; unsigned long total; - /* - * When using mas_for_each() to insert an expected number of elements, - * it is possible that the number inserted is less than the expected - * number. To fix an invalid final node, a check is performed here to - * rebalance the previous node with the final node. - */ - if (mas->mas_flags & MA_STATE_REBALANCE) { - unsigned char end; - if (mas_is_err(mas)) - mas_reset(mas); - mas_start(mas); - mtree_range_walk(mas); - end = mas->end + 1; - if (end < mt_min_slot_count(mas->node) - 1) - mas_destroy_rebalance(mas, end); - - mas->mas_flags &= ~MA_STATE_REBALANCE; - } - mas->mas_flags &= ~(MA_STATE_BULK|MA_STATE_PREALLOC); + mas->mas_flags &= ~MA_STATE_PREALLOC; total = mas_allocated(mas); while (total) { @@ -5600,68 +5400,6 @@ void mas_destroy(struct ma_state *mas) } EXPORT_SYMBOL_GPL(mas_destroy); -/* - * mas_expected_entries() - Set the expected number of entries that will be inserted. - * @mas: The maple state - * @nr_entries: The number of expected entries. - * - * This will attempt to pre-allocate enough nodes to store the expected number - * of entries. The allocations will occur using the bulk allocator interface - * for speed. Please call mas_destroy() on the @mas after inserting the entries - * to ensure any unused nodes are freed. - * - * Return: 0 on success, -ENOMEM if memory could not be allocated. - */ -int mas_expected_entries(struct ma_state *mas, unsigned long nr_entries) -{ - int nonleaf_cap = MAPLE_ARANGE64_SLOTS - 2; - struct maple_enode *enode = mas->node; - int nr_nodes; - int ret; - - /* - * Sometimes it is necessary to duplicate a tree to a new tree, such as - * forking a process and duplicating the VMAs from one tree to a new - * tree. When such a situation arises, it is known that the new tree is - * not going to be used until the entire tree is populated. For - * performance reasons, it is best to use a bulk load with RCU disabled. - * This allows for optimistic splitting that favours the left and reuse - * of nodes during the operation. - */ - - /* Optimize splitting for bulk insert in-order */ - mas->mas_flags |= MA_STATE_BULK; - - /* - * Avoid overflow, assume a gap between each entry and a trailing null. - * If this is wrong, it just means allocation can happen during - * insertion of entries. - */ - nr_nodes = max(nr_entries, nr_entries * 2 + 1); - if (!mt_is_alloc(mas->tree)) - nonleaf_cap = MAPLE_RANGE64_SLOTS - 2; - - /* Leaves; reduce slots to keep space for expansion */ - nr_nodes = DIV_ROUND_UP(nr_nodes, MAPLE_RANGE64_SLOTS - 2); - /* Internal nodes */ - nr_nodes += DIV_ROUND_UP(nr_nodes, nonleaf_cap); - /* Add working room for split (2 nodes) + new parents */ - mas_node_count_gfp(mas, nr_nodes + 3, GFP_KERNEL); - - /* Detect if allocations run out */ - mas->mas_flags |= MA_STATE_PREALLOC; - - if (!mas_is_err(mas)) - return 0; - - ret = xa_err(mas->node); - mas->node = enode; - mas_destroy(mas); - return ret; - -} -EXPORT_SYMBOL_GPL(mas_expected_entries); - static void mas_may_activate(struct ma_state *mas) { if (!mas->node) { diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c index cb3936595b0d..14fbbee32046 100644 --- a/lib/test_maple_tree.c +++ b/lib/test_maple_tree.c @@ -2746,139 +2746,6 @@ static noinline void __init check_fuzzer(struct maple_tree *mt) mtree_test_erase(mt, ULONG_MAX - 10); } -/* duplicate the tree with a specific gap */ -static noinline void __init check_dup_gaps(struct maple_tree *mt, - unsigned long nr_entries, bool zero_start, - unsigned long gap) -{ - unsigned long i = 0; - struct maple_tree newmt; - int ret; - void *tmp; - MA_STATE(mas, mt, 0, 0); - MA_STATE(newmas, &newmt, 0, 0); - struct rw_semaphore newmt_lock; - - init_rwsem(&newmt_lock); - mt_set_external_lock(&newmt, &newmt_lock); - - if (!zero_start) - i = 1; - - mt_zero_nr_tallocated(); - for (; i <= nr_entries; i++) - mtree_store_range(mt, i*10, (i+1)*10 - gap, - xa_mk_value(i), GFP_KERNEL); - - mt_init_flags(&newmt, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN); - mt_set_non_kernel(99999); - down_write(&newmt_lock); - ret = mas_expected_entries(&newmas, nr_entries); - mt_set_non_kernel(0); - MT_BUG_ON(mt, ret != 0); - - rcu_read_lock(); - mas_for_each(&mas, tmp, ULONG_MAX) { - newmas.index = mas.index; - newmas.last = mas.last; - mas_store(&newmas, tmp); - } - rcu_read_unlock(); - mas_destroy(&newmas); - - __mt_destroy(&newmt); - up_write(&newmt_lock); -} - -/* Duplicate many sizes of trees. Mainly to test expected entry values */ -static noinline void __init check_dup(struct maple_tree *mt) -{ - int i; - int big_start = 100010; - - /* Check with a value at zero */ - for (i = 10; i < 1000; i++) { - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - check_dup_gaps(mt, i, true, 5); - mtree_destroy(mt); - rcu_barrier(); - } - - cond_resched(); - mt_cache_shrink(); - /* Check with a value at zero, no gap */ - for (i = 1000; i < 2000; i++) { - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - check_dup_gaps(mt, i, true, 0); - mtree_destroy(mt); - rcu_barrier(); - } - - cond_resched(); - mt_cache_shrink(); - /* Check with a value at zero and unreasonably large */ - for (i = big_start; i < big_start + 10; i++) { - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - check_dup_gaps(mt, i, true, 5); - mtree_destroy(mt); - rcu_barrier(); - } - - cond_resched(); - mt_cache_shrink(); - /* Small to medium size not starting at zero*/ - for (i = 200; i < 1000; i++) { - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - check_dup_gaps(mt, i, false, 5); - mtree_destroy(mt); - rcu_barrier(); - } - - cond_resched(); - mt_cache_shrink(); - /* Unreasonably large not starting at zero*/ - for (i = big_start; i < big_start + 10; i++) { - mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); - check_dup_gaps(mt, i, false, 5); - mtree_destroy(mt); - rcu_barrier(); - cond_resched(); - mt_cache_shrink(); - } - - /* Check non-allocation tree not starting at zero */ - for (i = 1500; i < 3000; i++) { - mt_init_flags(mt, 0); - check_dup_gaps(mt, i, false, 5); - mtree_destroy(mt); - rcu_barrier(); - cond_resched(); - if (i % 2 == 0) - mt_cache_shrink(); - } - - mt_cache_shrink(); - /* Check non-allocation tree starting at zero */ - for (i = 200; i < 1000; i++) { - mt_init_flags(mt, 0); - check_dup_gaps(mt, i, true, 5); - mtree_destroy(mt); - rcu_barrier(); - cond_resched(); - } - - mt_cache_shrink(); - /* Unreasonably large */ - for (i = big_start + 5; i < big_start + 10; i++) { - mt_init_flags(mt, 0); - check_dup_gaps(mt, i, true, 5); - mtree_destroy(mt); - rcu_barrier(); - mt_cache_shrink(); - cond_resched(); - } -} - static noinline void __init check_bnode_min_spanning(struct maple_tree *mt) { int i = 50; @@ -4077,10 +3944,6 @@ static int __init maple_tree_seed(void) check_fuzzer(&tree); mtree_destroy(&tree); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); - check_dup(&tree); - mtree_destroy(&tree); - mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_bnode_min_spanning(&tree); mtree_destroy(&tree); -- cgit v1.2.3 From 59faa4da7cd4565cbce25358495556b75bb37022 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Wed, 3 Sep 2025 14:59:55 +0200 Subject: maple_tree: use percpu sheaves for maple_node_cache Setup the maple_node_cache with percpu sheaves of size 32 to hopefully improve its performance. Note this will not immediately take advantage of sheaf batching of kfree_rcu() operations due to the maple tree using call_rcu with custom callbacks. The followup changes to maple tree will change that and also make use of the prefilled sheaves functionality. Reviewed-by: Sidhartha Kumar Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 4f0e30b57b0c..d034f170ac89 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -6040,9 +6040,14 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) void __init maple_tree_init(void) { + struct kmem_cache_args args = { + .align = sizeof(struct maple_node), + .sheaf_capacity = 32, + }; + maple_node_cache = kmem_cache_create("maple_node", - sizeof(struct maple_node), sizeof(struct maple_node), - SLAB_PANIC, NULL); + sizeof(struct maple_node), &args, + SLAB_PANIC); } /** -- cgit v1.2.3 From 9b60811cb3b42fe9a8e3f0aa3e007cd9cc814a78 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 3 Sep 2025 14:59:58 +0200 Subject: maple_tree: Use kfree_rcu in ma_free_rcu kfree_rcu is an optimized version of call_rcu + kfree. It used to not be possible to call it on non-kmalloc objects, but this restriction was lifted ever since SLOB was dropped from the kernel, and since commit 6c6c47b063b5 ("mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()"). Thus, replace call_rcu + mt_free_rcu with kfree_rcu. Signed-off-by: Pedro Falcato Reviewed-by: Harry Yoo Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index d034f170ac89..c706e2e48f88 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -187,13 +187,6 @@ static inline void mt_free_bulk(size_t size, void __rcu **nodes) kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); } -static void mt_free_rcu(struct rcu_head *head) -{ - struct maple_node *node = container_of(head, struct maple_node, rcu); - - kmem_cache_free(maple_node_cache, node); -} - /* * ma_free_rcu() - Use rcu callback to free a maple node * @node: The node to free @@ -204,7 +197,7 @@ static void mt_free_rcu(struct rcu_head *head) static void ma_free_rcu(struct maple_node *node) { WARN_ON(node->parent != ma_parent_ptr(node)); - call_rcu(&node->rcu, mt_free_rcu); + kfree_rcu(node, rcu); } static void mt_set_height(struct maple_tree *mt, unsigned char height) @@ -5099,7 +5092,7 @@ static void mt_free_walk(struct rcu_head *head) mt_free_bulk(node->slot_len, slots); free_leaf: - mt_free_rcu(&node->rcu); + mt_free_one(node); } static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, @@ -5183,7 +5176,7 @@ next: free_leaf: if (free) - mt_free_rcu(&node->rcu); + mt_free_one(node); else mt_clear_meta(mt, node, node->type); } -- cgit v1.2.3 From 025f93101bb4e0a0cdc0fc4ee264f7905271ad75 Mon Sep 17 00:00:00 2001 From: Pedro Falcato Date: Wed, 3 Sep 2025 14:59:59 +0200 Subject: maple_tree: Replace mt_free_one() with kfree() kfree() is a little shorter and works with kmem_cache_alloc'd pointers too. Also lets us remove one more helper. Signed-off-by: Pedro Falcato Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index c706e2e48f88..0439aaacf6cb 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -177,11 +177,6 @@ static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); } -static inline void mt_free_one(struct maple_node *node) -{ - kmem_cache_free(maple_node_cache, node); -} - static inline void mt_free_bulk(size_t size, void __rcu **nodes) { kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); @@ -5092,7 +5087,7 @@ static void mt_free_walk(struct rcu_head *head) mt_free_bulk(node->slot_len, slots); free_leaf: - mt_free_one(node); + kfree(node); } static inline void __rcu **mte_destroy_descend(struct maple_enode **enode, @@ -5176,7 +5171,7 @@ next: free_leaf: if (free) - mt_free_one(node); + kfree(node); else mt_clear_meta(mt, node, node->type); } @@ -5385,7 +5380,7 @@ void mas_destroy(struct ma_state *mas) mt_free_bulk(count, (void __rcu **)&node->slot[1]); total -= count; } - mt_free_one(ma_mnode_ptr(node)); + kfree(ma_mnode_ptr(node)); total--; } @@ -6373,7 +6368,7 @@ static void mas_dup_free(struct ma_state *mas) } node = mte_to_node(mas->node); - mt_free_one(node); + kfree(node); } /* -- cgit v1.2.3 From 9b05890a25d9197e39fcf5b2298f0b911c323306 Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 15:00:01 +0200 Subject: maple_tree: Prefilled sheaf conversion and testing Use prefilled sheaves instead of bulk allocations. This should speed up the allocations and the return path of unused allocations. Remove the push and pop of nodes from the maple state as this is now handled by the slab layer with sheaves. Testing has been removed as necessary since the features of the tree have been reduced. Signed-off-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 326 +++++++++++-------------------------------------------- 1 file changed, 62 insertions(+), 264 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 0439aaacf6cb..b41245f2cc65 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -182,6 +182,22 @@ static inline void mt_free_bulk(size_t size, void __rcu **nodes) kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); } +static void mt_return_sheaf(struct slab_sheaf *sheaf) +{ + kmem_cache_return_sheaf(maple_node_cache, GFP_NOWAIT, sheaf); +} + +static struct slab_sheaf *mt_get_sheaf(gfp_t gfp, int count) +{ + return kmem_cache_prefill_sheaf(maple_node_cache, gfp, count); +} + +static int mt_refill_sheaf(gfp_t gfp, struct slab_sheaf **sheaf, + unsigned int size) +{ + return kmem_cache_refill_sheaf(maple_node_cache, gfp, sheaf, size); +} + /* * ma_free_rcu() - Use rcu callback to free a maple node * @node: The node to free @@ -574,67 +590,6 @@ static __always_inline bool mte_dead_node(const struct maple_enode *enode) return ma_dead_node(node); } -/* - * mas_allocated() - Get the number of nodes allocated in a maple state. - * @mas: The maple state - * - * The ma_state alloc member is overloaded to hold a pointer to the first - * allocated node or to the number of requested nodes to allocate. If bit 0 is - * set, then the alloc contains the number of requested nodes. If there is an - * allocated node, then the total allocated nodes is in that node. - * - * Return: The total number of nodes allocated - */ -static inline unsigned long mas_allocated(const struct ma_state *mas) -{ - if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) - return 0; - - return mas->alloc->total; -} - -/* - * mas_set_alloc_req() - Set the requested number of allocations. - * @mas: the maple state - * @count: the number of allocations. - * - * The requested number of allocations is either in the first allocated node, - * located in @mas->alloc->request_count, or directly in @mas->alloc if there is - * no allocated node. Set the request either in the node or do the necessary - * encoding to store in @mas->alloc directly. - */ -static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) -{ - if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { - if (!count) - mas->alloc = NULL; - else - mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); - return; - } - - mas->alloc->request_count = count; -} - -/* - * mas_alloc_req() - get the requested number of allocations. - * @mas: The maple state - * - * The alloc count is either stored directly in @mas, or in - * @mas->alloc->request_count if there is at least one node allocated. Decode - * the request count if it's stored directly in @mas->alloc. - * - * Return: The allocation request count. - */ -static inline unsigned int mas_alloc_req(const struct ma_state *mas) -{ - if ((unsigned long)mas->alloc & 0x1) - return (unsigned long)(mas->alloc) >> 1; - else if (mas->alloc) - return mas->alloc->request_count; - return 0; -} - /* * ma_pivots() - Get a pointer to the maple node pivots. * @node: the maple node @@ -1120,77 +1075,15 @@ static int mas_ascend(struct ma_state *mas) */ static inline struct maple_node *mas_pop_node(struct ma_state *mas) { - struct maple_alloc *ret, *node = mas->alloc; - unsigned long total = mas_allocated(mas); - unsigned int req = mas_alloc_req(mas); + struct maple_node *ret; - /* nothing or a request pending. */ - if (WARN_ON(!total)) + if (WARN_ON_ONCE(!mas->sheaf)) return NULL; - if (total == 1) { - /* single allocation in this ma_state */ - mas->alloc = NULL; - ret = node; - goto single_node; - } - - if (node->node_count == 1) { - /* Single allocation in this node. */ - mas->alloc = node->slot[0]; - mas->alloc->total = node->total - 1; - ret = node; - goto new_head; - } - node->total--; - ret = node->slot[--node->node_count]; - node->slot[node->node_count] = NULL; - -single_node: -new_head: - if (req) { - req++; - mas_set_alloc_req(mas, req); - } - + ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf); memset(ret, 0, sizeof(*ret)); - return (struct maple_node *)ret; -} - -/* - * mas_push_node() - Push a node back on the maple state allocation. - * @mas: The maple state - * @used: The used maple node - * - * Stores the maple node back into @mas->alloc for reuse. Updates allocated and - * requested node count as necessary. - */ -static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) -{ - struct maple_alloc *reuse = (struct maple_alloc *)used; - struct maple_alloc *head = mas->alloc; - unsigned long count; - unsigned int requested = mas_alloc_req(mas); - count = mas_allocated(mas); - - reuse->request_count = 0; - reuse->node_count = 0; - if (count) { - if (head->node_count < MAPLE_ALLOC_SLOTS) { - head->slot[head->node_count++] = reuse; - head->total++; - goto done; - } - reuse->slot[0] = head; - reuse->node_count = 1; - } - - reuse->total = count + 1; - mas->alloc = reuse; -done: - if (requested > 1) - mas_set_alloc_req(mas, requested - 1); + return ret; } /* @@ -1200,75 +1093,32 @@ done: */ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) { - struct maple_alloc *node; - unsigned long allocated = mas_allocated(mas); - unsigned int requested = mas_alloc_req(mas); - unsigned int count; - void **slots = NULL; - unsigned int max_req = 0; - - if (!requested) - return; + if (unlikely(mas->sheaf)) { + unsigned long refill = mas->node_request; - mas_set_alloc_req(mas, 0); - if (mas->mas_flags & MA_STATE_PREALLOC) { - if (allocated) + if (kmem_cache_sheaf_size(mas->sheaf) >= refill) { + mas->node_request = 0; return; - WARN_ON(!allocated); - } - - if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { - node = (struct maple_alloc *)mt_alloc_one(gfp); - if (!node) - goto nomem_one; - - if (allocated) { - node->slot[0] = mas->alloc; - node->node_count = 1; - } else { - node->node_count = 0; } - mas->alloc = node; - node->total = ++allocated; - node->request_count = 0; - requested--; - } + if (mt_refill_sheaf(gfp, &mas->sheaf, refill)) + goto error; - node = mas->alloc; - while (requested) { - max_req = MAPLE_ALLOC_SLOTS - node->node_count; - slots = (void **)&node->slot[node->node_count]; - max_req = min(requested, max_req); - count = mt_alloc_bulk(gfp, max_req, slots); - if (!count) - goto nomem_bulk; - - if (node->node_count == 0) { - node->slot[0]->node_count = 0; - node->slot[0]->request_count = 0; - } + mas->node_request = 0; + return; + } - node->node_count += count; - allocated += count; - /* find a non-full node*/ - do { - node = node->slot[0]; - } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); - requested -= count; + mas->sheaf = mt_get_sheaf(gfp, mas->node_request); + if (likely(mas->sheaf)) { + mas->node_request = 0; + return; } - mas->alloc->total = allocated; - return; -nomem_bulk: - /* Clean up potential freed allocations on bulk failure */ - memset(slots, 0, max_req * sizeof(unsigned long)); - mas->alloc->total = allocated; -nomem_one: - mas_set_alloc_req(mas, requested); +error: mas_set_err(mas, -ENOMEM); } + /* * mas_free() - Free an encoded maple node * @mas: The maple state @@ -1279,42 +1129,7 @@ nomem_one: */ static inline void mas_free(struct ma_state *mas, struct maple_enode *used) { - struct maple_node *tmp = mte_to_node(used); - - if (mt_in_rcu(mas->tree)) - ma_free_rcu(tmp); - else - mas_push_node(mas, tmp); -} - -/* - * mas_node_count_gfp() - Check if enough nodes are allocated and request more - * if there is not enough nodes. - * @mas: The maple state - * @count: The number of nodes needed - * @gfp: the gfp flags - */ -static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) -{ - unsigned long allocated = mas_allocated(mas); - - if (allocated < count) { - mas_set_alloc_req(mas, count - allocated); - mas_alloc_nodes(mas, gfp); - } -} - -/* - * mas_node_count() - Check if enough nodes are allocated and request more if - * there is not enough nodes. - * @mas: The maple state - * @count: The number of nodes needed - * - * Note: Uses GFP_NOWAIT for gfp flags. - */ -static void mas_node_count(struct ma_state *mas, int count) -{ - return mas_node_count_gfp(mas, count, GFP_NOWAIT); + ma_free_rcu(mte_to_node(used)); } /* @@ -2451,10 +2266,7 @@ static inline void mas_topiary_node(struct ma_state *mas, enode = tmp_mas->node; tmp = mte_to_node(enode); mte_set_node_dead(enode); - if (in_rcu) - ma_free_rcu(tmp); - else - mas_push_node(mas, tmp); + ma_free_rcu(tmp); } /* @@ -3980,7 +3792,7 @@ set_content: * * Return: Number of nodes required for preallocation. */ -static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) +static inline void mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) { struct ma_state *mas = wr_mas->mas; unsigned char height = mas_mt_height(mas); @@ -4026,7 +3838,7 @@ static inline int mas_prealloc_calc(struct ma_wr_state *wr_mas, void *entry) WARN_ON_ONCE(1); } - return ret; + mas->node_request = ret; } /* @@ -4087,15 +3899,15 @@ static inline enum store_type mas_wr_store_type(struct ma_wr_state *wr_mas) */ static inline void mas_wr_preallocate(struct ma_wr_state *wr_mas, void *entry) { - int request; + struct ma_state *mas = wr_mas->mas; mas_wr_prealloc_setup(wr_mas); - wr_mas->mas->store_type = mas_wr_store_type(wr_mas); - request = mas_prealloc_calc(wr_mas, entry); - if (!request) + mas->store_type = mas_wr_store_type(wr_mas); + mas_prealloc_calc(wr_mas, entry); + if (!mas->node_request) return; - mas_node_count(wr_mas->mas, request); + mas_alloc_nodes(mas, GFP_NOWAIT); } /** @@ -5208,7 +5020,6 @@ static inline void mte_destroy_walk(struct maple_enode *enode, */ void *mas_store(struct ma_state *mas, void *entry) { - int request; MA_WR_STATE(wr_mas, mas, entry); trace_ma_write(__func__, mas, 0, entry); @@ -5238,11 +5049,11 @@ void *mas_store(struct ma_state *mas, void *entry) return wr_mas.content; } - request = mas_prealloc_calc(&wr_mas, entry); - if (!request) + mas_prealloc_calc(&wr_mas, entry); + if (!mas->node_request) goto store; - mas_node_count(mas, request); + mas_alloc_nodes(mas, GFP_NOWAIT); if (mas_is_err(mas)) return NULL; @@ -5330,20 +5141,19 @@ EXPORT_SYMBOL_GPL(mas_store_prealloc); int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) { MA_WR_STATE(wr_mas, mas, entry); - int ret = 0; - int request; mas_wr_prealloc_setup(&wr_mas); mas->store_type = mas_wr_store_type(&wr_mas); - request = mas_prealloc_calc(&wr_mas, entry); - if (!request) + mas_prealloc_calc(&wr_mas, entry); + if (!mas->node_request) goto set_flag; mas->mas_flags &= ~MA_STATE_PREALLOC; - mas_node_count_gfp(mas, request, gfp); + mas_alloc_nodes(mas, gfp); if (mas_is_err(mas)) { - mas_set_alloc_req(mas, 0); - ret = xa_err(mas->node); + int ret = xa_err(mas->node); + + mas->node_request = 0; mas_destroy(mas); mas_reset(mas); return ret; @@ -5351,7 +5161,7 @@ int mas_preallocate(struct ma_state *mas, void *entry, gfp_t gfp) set_flag: mas->mas_flags |= MA_STATE_PREALLOC; - return ret; + return 0; } EXPORT_SYMBOL_GPL(mas_preallocate); @@ -5365,26 +5175,13 @@ EXPORT_SYMBOL_GPL(mas_preallocate); */ void mas_destroy(struct ma_state *mas) { - struct maple_alloc *node; - unsigned long total; - mas->mas_flags &= ~MA_STATE_PREALLOC; - total = mas_allocated(mas); - while (total) { - node = mas->alloc; - mas->alloc = node->slot[0]; - if (node->node_count > 1) { - size_t count = node->node_count - 1; - - mt_free_bulk(count, (void __rcu **)&node->slot[1]); - total -= count; - } - kfree(ma_mnode_ptr(node)); - total--; - } + mas->node_request = 0; + if (mas->sheaf) + mt_return_sheaf(mas->sheaf); - mas->alloc = NULL; + mas->sheaf = NULL; } EXPORT_SYMBOL_GPL(mas_destroy); @@ -6019,7 +5816,7 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) mas_alloc_nodes(mas, gfp); } - if (!mas_allocated(mas)) + if (!mas->sheaf) return false; mas->status = ma_start; @@ -7414,8 +7211,9 @@ void mas_dump(const struct ma_state *mas) pr_err("[%u/%u] index=%lx last=%lx\n", mas->offset, mas->end, mas->index, mas->last); - pr_err(" min=%lx max=%lx alloc=" PTR_FMT ", depth=%u, flags=%x\n", - mas->min, mas->max, mas->alloc, mas->depth, mas->mas_flags); + pr_err(" min=%lx max=%lx sheaf=" PTR_FMT ", request %lu depth=%u, flags=%x\n", + mas->min, mas->max, mas->sheaf, mas->node_request, mas->depth, + mas->mas_flags); if (mas->index > mas->last) pr_err("Check index & last\n"); } -- cgit v1.2.3 From 6bf377b06c08049d0f4042493df302285e45165e Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 15:00:02 +0200 Subject: maple_tree: Add single node allocation support to maple state The fast path through a write will require replacing a single node in the tree. Using a sheaf (32 nodes) is too heavy for the fast path, so special case the node store operation by just allocating one node in the maple state. Signed-off-by: Liam R. Howlett Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index b41245f2cc65..2b7299409900 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -1073,16 +1073,23 @@ static int mas_ascend(struct ma_state *mas) * * Return: A pointer to a maple node. */ -static inline struct maple_node *mas_pop_node(struct ma_state *mas) +static __always_inline struct maple_node *mas_pop_node(struct ma_state *mas) { struct maple_node *ret; + if (mas->alloc) { + ret = mas->alloc; + mas->alloc = NULL; + goto out; + } + if (WARN_ON_ONCE(!mas->sheaf)) return NULL; ret = kmem_cache_alloc_from_sheaf(maple_node_cache, GFP_NOWAIT, mas->sheaf); - memset(ret, 0, sizeof(*ret)); +out: + memset(ret, 0, sizeof(*ret)); return ret; } @@ -1093,9 +1100,34 @@ static inline struct maple_node *mas_pop_node(struct ma_state *mas) */ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) { - if (unlikely(mas->sheaf)) { - unsigned long refill = mas->node_request; + if (!mas->node_request) + return; + + if (mas->node_request == 1) { + if (mas->sheaf) + goto use_sheaf; + + if (mas->alloc) + return; + mas->alloc = mt_alloc_one(gfp); + if (!mas->alloc) + goto error; + + mas->node_request = 0; + return; + } + +use_sheaf: + if (unlikely(mas->alloc)) { + kfree(mas->alloc); + mas->alloc = NULL; + } + + if (mas->sheaf) { + unsigned long refill; + + refill = mas->node_request; if (kmem_cache_sheaf_size(mas->sheaf) >= refill) { mas->node_request = 0; return; @@ -5180,8 +5212,11 @@ void mas_destroy(struct ma_state *mas) mas->node_request = 0; if (mas->sheaf) mt_return_sheaf(mas->sheaf); - mas->sheaf = NULL; + + if (mas->alloc) + kfree(mas->alloc); + mas->alloc = NULL; } EXPORT_SYMBOL_GPL(mas_destroy); @@ -5816,7 +5851,7 @@ bool mas_nomem(struct ma_state *mas, gfp_t gfp) mas_alloc_nodes(mas, gfp); } - if (!mas->sheaf) + if (!mas->sheaf && !mas->alloc) return false; mas->status = ma_start; -- cgit v1.2.3 From 719a42e563bb087758500e43e67a57b27f303c4c Mon Sep 17 00:00:00 2001 From: "Liam R. Howlett" Date: Wed, 3 Sep 2025 15:00:03 +0200 Subject: maple_tree: Convert forking to use the sheaf interface Use the generic interface which should result in less bulk allocations during a forking. A part of this is to abstract the freeing of the sheaf or maple state allocations into its own function so mas_destroy() and the tree duplication code can use the same functionality to return any unused resources. [andriy.shevchenko@linux.intel.com: remove unused mt_alloc_bulk()] Signed-off-by: Liam R. Howlett Reviewed-by: Suren Baghdasaryan Signed-off-by: Vlastimil Babka --- lib/maple_tree.c | 47 +++++++++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 2b7299409900..ab4c6c21a625 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -172,11 +172,6 @@ static inline struct maple_node *mt_alloc_one(gfp_t gfp) return kmem_cache_alloc(maple_node_cache, gfp); } -static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) -{ - return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); -} - static inline void mt_free_bulk(size_t size, void __rcu **nodes) { kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); @@ -1150,6 +1145,19 @@ error: mas_set_err(mas, -ENOMEM); } +static inline void mas_empty_nodes(struct ma_state *mas) +{ + mas->node_request = 0; + if (mas->sheaf) { + mt_return_sheaf(mas->sheaf); + mas->sheaf = NULL; + } + + if (mas->alloc) { + kfree(mas->alloc); + mas->alloc = NULL; + } +} /* * mas_free() - Free an encoded maple node @@ -5208,15 +5216,7 @@ EXPORT_SYMBOL_GPL(mas_preallocate); void mas_destroy(struct ma_state *mas) { mas->mas_flags &= ~MA_STATE_PREALLOC; - - mas->node_request = 0; - if (mas->sheaf) - mt_return_sheaf(mas->sheaf); - mas->sheaf = NULL; - - if (mas->alloc) - kfree(mas->alloc); - mas->alloc = NULL; + mas_empty_nodes(mas); } EXPORT_SYMBOL_GPL(mas_destroy); @@ -6241,7 +6241,7 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, struct maple_node *node = mte_to_node(mas->node); struct maple_node *new_node = mte_to_node(new_mas->node); enum maple_type type; - unsigned char request, count, i; + unsigned char count, i; void __rcu **slots; void __rcu **new_slots; unsigned long val; @@ -6249,20 +6249,17 @@ static inline void mas_dup_alloc(struct ma_state *mas, struct ma_state *new_mas, /* Allocate memory for child nodes. */ type = mte_node_type(mas->node); new_slots = ma_slots(new_node, type); - request = mas_data_end(mas) + 1; - count = mt_alloc_bulk(gfp, request, (void **)new_slots); - if (unlikely(count < request)) { - memset(new_slots, 0, request * sizeof(void *)); - mas_set_err(mas, -ENOMEM); + count = mas->node_request = mas_data_end(mas) + 1; + mas_alloc_nodes(mas, gfp); + if (unlikely(mas_is_err(mas))) return; - } - /* Restore node type information in slots. */ slots = ma_slots(node, type); for (i = 0; i < count; i++) { val = (unsigned long)mt_slot_locked(mas->tree, slots, i); val &= MAPLE_NODE_MASK; - ((unsigned long *)new_slots)[i] |= val; + new_slots[i] = ma_mnode_ptr((unsigned long)mas_pop_node(mas) | + val); } } @@ -6316,7 +6313,7 @@ static inline void mas_dup_build(struct ma_state *mas, struct ma_state *new_mas, /* Only allocate child nodes for non-leaf nodes. */ mas_dup_alloc(mas, new_mas, gfp); if (unlikely(mas_is_err(mas))) - return; + goto empty_mas; } else { /* * This is the last leaf node and duplication is @@ -6349,6 +6346,8 @@ set_new_tree: /* Make them the same height */ new_mas->tree->ma_flags = mas->tree->ma_flags; rcu_assign_pointer(new_mas->tree->ma_root, root); +empty_mas: + mas_empty_nodes(mas); } /** -- cgit v1.2.3 From 191cac349c8eeeb6d64453b8db194d3ad42ca9dc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 14 Aug 2025 19:17:33 -0700 Subject: lib/digsig: Use SHA-1 library instead of crypto_shash Now that a SHA-1 library API is available, use it instead of crypto_shash. This is simpler and faster. Signed-off-by: Eric Biggers Reviewed-by: Paul Menzel Signed-off-by: Mimi Zohar --- lib/Kconfig | 3 +-- lib/digsig.c | 46 ++++++---------------------------------------- 2 files changed, 7 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index c483951b624f..e629449dd2a3 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -477,8 +477,7 @@ config MPILIB config SIGNATURE tristate depends on KEYS - select CRYPTO - select CRYPTO_SHA1 + select CRYPTO_LIB_SHA1 select MPILIB help Digital signature verification. Currently only RSA is supported. diff --git a/lib/digsig.c b/lib/digsig.c index 04b5e55ed95f..5ddcc52f7686 100644 --- a/lib/digsig.c +++ b/lib/digsig.c @@ -18,15 +18,11 @@ #include #include #include -#include -#include #include #include #include #include -static struct crypto_shash *shash; - static const char *pkcs_1_v1_5_decode_emsa(const unsigned char *msg, unsigned long msglen, unsigned long modulus_bitlen, @@ -199,12 +195,12 @@ err1: int digsig_verify(struct key *keyring, const char *sig, int siglen, const char *data, int datalen) { - int err = -ENOMEM; struct signature_hdr *sh = (struct signature_hdr *)sig; - struct shash_desc *desc = NULL; + struct sha1_ctx ctx; unsigned char hash[SHA1_DIGEST_SIZE]; struct key *key; char name[20]; + int err; if (siglen < sizeof(*sh) + 2) return -EINVAL; @@ -231,49 +227,19 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen, return PTR_ERR(key); } - desc = kzalloc(sizeof(*desc) + crypto_shash_descsize(shash), - GFP_KERNEL); - if (!desc) - goto err; - - desc->tfm = shash; - - crypto_shash_init(desc); - crypto_shash_update(desc, data, datalen); - crypto_shash_update(desc, sig, sizeof(*sh)); - crypto_shash_final(desc, hash); - - kfree(desc); + sha1_init(&ctx); + sha1_update(&ctx, data, datalen); + sha1_update(&ctx, sig, sizeof(*sh)); + sha1_final(&ctx, hash); /* pass signature mpis address */ err = digsig_verify_rsa(key, sig + sizeof(*sh), siglen - sizeof(*sh), hash, sizeof(hash)); -err: key_put(key); return err ? -EINVAL : 0; } EXPORT_SYMBOL_GPL(digsig_verify); -static int __init digsig_init(void) -{ - shash = crypto_alloc_shash("sha1", 0, 0); - if (IS_ERR(shash)) { - pr_err("shash allocation failed\n"); - return PTR_ERR(shash); - } - - return 0; - -} - -static void __exit digsig_cleanup(void) -{ - crypto_free_shash(shash); -} - -module_init(digsig_init); -module_exit(digsig_cleanup); - MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 90eb9ae35727a662789c850efaf225ffe5511fae Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 21 Sep 2025 08:44:58 +0300 Subject: lib/test_kho: use kho_preserve_vmalloc instead of storing addresses in fdt KHO test stores physical addresses of the preserved folios directly in fdt. Use kho_preserve_vmalloc() instead of it and kho_restore_vmalloc() to retrieve the addresses after kexec. This makes the test more scalable from one side and adds tests coverage for kho_preserve_vmalloc() from the other. Link: https://lkml.kernel.org/r/20250921054458.4043761-5-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Pratyush Yadav Cc: Alexander Graf Cc: Baoquan He Cc: Changyuan Lyu Cc: Chris Li Cc: Jason Gunthorpe Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- lib/test_kho.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/test_kho.c b/lib/test_kho.c index fe8504e3407b..60cd899ea745 100644 --- a/lib/test_kho.c +++ b/lib/test_kho.c @@ -32,6 +32,7 @@ module_param(max_mem, long, 0644); struct kho_test_state { unsigned int nr_folios; struct folio **folios; + phys_addr_t *folios_info; struct folio *fdt; __wsum csum; }; @@ -67,18 +68,15 @@ static struct notifier_block kho_test_nb = { static int kho_test_save_data(struct kho_test_state *state, void *fdt) { - phys_addr_t *folios_info; + phys_addr_t *folios_info __free(kvfree) = NULL; + struct kho_vmalloc folios_info_phys; int err = 0; - err |= fdt_begin_node(fdt, "data"); - err |= fdt_property(fdt, "nr_folios", &state->nr_folios, - sizeof(state->nr_folios)); - err |= fdt_property_placeholder(fdt, "folios_info", - state->nr_folios * sizeof(*folios_info), - (void **)&folios_info); - err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); - err |= fdt_end_node(fdt); + folios_info = vmalloc_array(state->nr_folios, sizeof(*folios_info)); + if (!folios_info) + return -ENOMEM; + err = kho_preserve_vmalloc(folios_info, &folios_info_phys); if (err) return err; @@ -93,6 +91,17 @@ static int kho_test_save_data(struct kho_test_state *state, void *fdt) break; } + err |= fdt_begin_node(fdt, "data"); + err |= fdt_property(fdt, "nr_folios", &state->nr_folios, + sizeof(state->nr_folios)); + err |= fdt_property(fdt, "folios_info", &folios_info_phys, + sizeof(folios_info_phys)); + err |= fdt_property(fdt, "csum", &state->csum, sizeof(state->csum)); + err |= fdt_end_node(fdt); + + if (!err) + state->folios_info = no_free_ptr(folios_info); + return err; } @@ -209,8 +218,9 @@ err_free_folios: static int kho_test_restore_data(const void *fdt, int node) { + const struct kho_vmalloc *folios_info_phys; const unsigned int *nr_folios; - const phys_addr_t *folios_info; + phys_addr_t *folios_info; const __wsum *old_csum; __wsum csum = 0; int len; @@ -225,8 +235,12 @@ static int kho_test_restore_data(const void *fdt, int node) if (!old_csum || len != sizeof(*old_csum)) return -EINVAL; - folios_info = fdt_getprop(fdt, node, "folios_info", &len); - if (!folios_info || len != sizeof(*folios_info) * *nr_folios) + folios_info_phys = fdt_getprop(fdt, node, "folios_info", &len); + if (!folios_info_phys || len != sizeof(*folios_info_phys)) + return -EINVAL; + + folios_info = kho_restore_vmalloc(folios_info_phys); + if (!folios_info) return -EINVAL; for (int i = 0; i < *nr_folios; i++) { @@ -246,6 +260,8 @@ static int kho_test_restore_data(const void *fdt, int node) folio_put(folio); } + vfree(folios_info); + if (csum != *old_csum) return -EINVAL; @@ -304,6 +320,7 @@ static void kho_test_cleanup(void) folio_put(kho_test_state.folios[i]); kvfree(kho_test_state.folios); + vfree(kho_test_state.folios_info); folio_put(kho_test_state.fdt); } -- cgit v1.2.3 From b37491d72b43c3a322d396c2d8e951a10be70c17 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 18 Sep 2025 09:30:03 -0700 Subject: interval_tree: Fix ITSTATIC usage for *_subtree_search() For consistency with the other function templates, change _subtree_search_*() to use the user-supplied ITSTATIC rather than the hard-coded 'static'. Acked-by: Petr Mladek Tested-by: Joe Lawrence Signed-off-by: Josh Poimboeuf --- lib/interval_tree.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 324766e9bf63..9ceb084b6b4e 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -13,6 +13,7 @@ INTERVAL_TREE_DEFINE(struct interval_tree_node, rb, EXPORT_SYMBOL_GPL(interval_tree_insert); EXPORT_SYMBOL_GPL(interval_tree_remove); +EXPORT_SYMBOL_GPL(interval_tree_subtree_search); EXPORT_SYMBOL_GPL(interval_tree_iter_first); EXPORT_SYMBOL_GPL(interval_tree_iter_next); -- cgit v1.2.3 From 2551a1eedc09f5a86f94b038dc1bb16855c256f1 Mon Sep 17 00:00:00 2001 From: Florian Schmaus Date: Fri, 17 Oct 2025 11:28:14 +0200 Subject: kunit: test_dev_action: Correctly cast 'priv' pointer to long* The previous implementation incorrectly assumed the original type of 'priv' was void**, leading to an unnecessary and misleading cast. Correct the cast of the 'priv' pointer in test_dev_action() to its actual type, long*, removing an unnecessary cast. As an additional benefit, this fixes an out-of-bounds CHERI fault on hardware with architectural capabilities. The original implementation tried to store a capability-sized pointer using the priv pointer. However, the priv pointer's capability only granted access to the memory region of its original long type, leading to a bounds violation since the size of a long is smaller than the size of a capability. This change ensures that the pointer usage respects the capabilities' bounds. Link: https://lore.kernel.org/r/20251017092814.80022-1-florian.schmaus@codasip.com Fixes: d03c720e03bd ("kunit: Add APIs for managing devices") Reviewed-by: David Gow Signed-off-by: Florian Schmaus Signed-off-by: Shuah Khan --- lib/kunit/kunit-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kunit/kunit-test.c b/lib/kunit/kunit-test.c index 8c01eabd4eaf..63130a48e237 100644 --- a/lib/kunit/kunit-test.c +++ b/lib/kunit/kunit-test.c @@ -739,7 +739,7 @@ static struct kunit_case kunit_current_test_cases[] = { static void test_dev_action(void *priv) { - *(void **)priv = (void *)1; + *(long *)priv = 1; } static void kunit_device_test(struct kunit *test) -- cgit v1.2.3 From 1af424b15401d2be789c4dc2279889514e7c5c94 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 21 Oct 2025 20:34:05 -0700 Subject: lib/crypto: poly1305: Restore dependency of arch code on !KMSAN Restore the dependency of the architecture-optimized Poly1305 code on !KMSAN. It was dropped by commit b646b782e522 ("lib/crypto: poly1305: Consolidate into single module"). Unlike the other hash algorithms in lib/crypto/ (e.g., SHA-512), the way the architecture-optimized Poly1305 code is integrated results in assembly code initializing memory, for several different architectures. Thus, it generates false positive KMSAN warnings. These could be suppressed with kmsan_unpoison_memory(), but it would be needed in quite a few places. For now let's just restore the dependency on !KMSAN. Note: this should have been caught by running poly1305_kunit with CONFIG_KMSAN=y, which I did. However, due to an unrelated KMSAN bug (https://lore.kernel.org/r/20251022030213.GA35717@sol/), KMSAN currently isn't working reliably. Thus, the warning wasn't noticed until later. Fixes: b646b782e522 ("lib/crypto: poly1305: Consolidate into single module") Reported-by: syzbot+01fcd39a0d90cdb0e3df@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/68f6a48f.050a0220.91a22.0452.GAE@google.com/ Reported-by: Pei Xiao Closes: https://lore.kernel.org/r/751b3d80293a6f599bb07770afcef24f623c7da0.1761026343.git.xiaopei01@kylinos.cn/ Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251022033405.64761-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index eea17e36a22b..8886055e938f 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -97,7 +97,7 @@ config CRYPTO_LIB_POLY1305 config CRYPTO_LIB_POLY1305_ARCH bool - depends on CRYPTO_LIB_POLY1305 && !UML + depends on CRYPTO_LIB_POLY1305 && !UML && !KMSAN default y if ARM default y if ARM64 && KERNEL_MODE_NEON default y if MIPS -- cgit v1.2.3 From 8080c67dd57cb968150b668ecbd4a4e4afd56ad4 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Fri, 24 Oct 2025 19:01:00 +0000 Subject: kunit: prevent log overwrite in param_tests When running parameterized tests, each test case is initialized with kunit_init_test(). This function takes the test_case->log as a parameter but it clears it via string_stream_clear() on each iteration. This results in only the log from the last parameter being preserved in the test_case->log and the results from the previous parameters are lost from the debugfs entry. Fix this by manually setting the param_test.log to the test_case->log after it has been initialized. This prevents kunit_init_test() from clearing the log on each iteration. Link: https://lore.kernel.org/r/20251024190101.2091549-1-cmllamas@google.com Fixes: 4b59300ba4d2 ("kunit: Add parent kunit for parameterized test context") Signed-off-by: Carlos Llamas Reviewed-by: David Gow Signed-off-by: Shuah Khan --- lib/kunit/test.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kunit/test.c b/lib/kunit/test.c index bb66ea1a3eac..62eb529824c6 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -745,7 +745,8 @@ int kunit_run_tests(struct kunit_suite *suite) .param_index = ++test.param_index, .parent = &test, }; - kunit_init_test(¶m_test, test_case->name, test_case->log); + kunit_init_test(¶m_test, test_case->name, NULL); + param_test.log = test_case->log; kunit_run_case_catch_errors(suite, test_case, ¶m_test); if (param_desc[0] == '\0') { -- cgit v1.2.3 From cf20852500d2895d1db22b69d87281aca2e7b5d0 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 23 Oct 2025 21:01:29 +0200 Subject: KMSAN: Restore dynamic check for '-fsanitize=kernel-memory' Commit 5ff8c11775c7 ("KMSAN: Remove tautological checks") changed CONFIG_HAVE_KMSAN_COMPILER from a dynamic check for '-fsanitize=kernel-memory' to just being true for CONFIG_CC_IS_CLANG. This missed the fact that not all architectures supported '-fsanitize=kernel-memory' at the same time. For example, SystemZ / s390 gained support for KMSAN in clang-18 [1], so builds with clang-15 through clang-17 can select KMSAN but they error with: clang-16: error: unsupported option '-fsanitize=kernel-memory' for target 's390x-unknown-linux-gnu' Restore the cc-option check for '-fsanitize=kernel-memory' to make sure the compiler target properly supports '-fsanitize=kernel-memory'. The check for '-msan-disable-checks=1' does not need to be restored because all supported clang versions for building the kernel support it. Fixes: 5ff8c11775c7 ("KMSAN: Remove tautological checks") Link: https://github.com/llvm/llvm-project/commit/a3e56a8792ffaf3a3d3538736e1042b8db45ab89 [1] Reported-by: kernel test robot Closes: https://lore.kernel.org/r/202510220236.AVuXXCYy-lkp@intel.com/ Acked-by: Nicolas Schier Link: https://patch.msgid.link/20251023-fix-kmsan-check-s390-clang-v1-1-4e6df477a4cc@kernel.org Signed-off-by: Nathan Chancellor --- lib/Kconfig.kmsan | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.kmsan b/lib/Kconfig.kmsan index 7251b6b59e69..cae1ddcc18e1 100644 --- a/lib/Kconfig.kmsan +++ b/lib/Kconfig.kmsan @@ -3,7 +3,7 @@ config HAVE_ARCH_KMSAN bool config HAVE_KMSAN_COMPILER - def_bool CC_IS_CLANG + def_bool $(cc-option,-fsanitize=kernel-memory) config KMSAN bool "KMSAN: detector of uninitialized values use" -- cgit v1.2.3 From 2b81082ad37cc3f28355fb73a6a69b91ff7dbf20 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Mon, 3 Nov 2025 12:11:24 -0700 Subject: lib/crypto: curve25519-hacl64: Fix older clang KASAN workaround for GCC Commit 2f13daee2a72 ("lib/crypto/curve25519-hacl64: Disable KASAN with clang-17 and older") inadvertently disabled KASAN in curve25519-hacl64.o for GCC unconditionally because clang-min-version will always evaluate to nothing for GCC. Add a check for CONFIG_CC_IS_CLANG to avoid applying the workaround for GCC, which is only needed for clang-17 and older. Cc: stable@vger.kernel.org Fixes: 2f13daee2a72 ("lib/crypto/curve25519-hacl64: Disable KASAN with clang-17 and older") Signed-off-by: Nathan Chancellor Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251103-curve25519-hacl64-fix-kasan-workaround-v2-1-ab581cbd8035@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index bded351aeace..d2845b214585 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -90,7 +90,7 @@ else libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-fiat32.o endif # clang versions prior to 18 may blow out the stack with KASAN -ifeq ($(call clang-min-version, 180000),) +ifeq ($(CONFIG_CC_IS_CLANG)_$(call clang-min-version, 180000),y_) KASAN_SANITIZE_curve25519-hacl64.o := n endif -- cgit v1.2.3 From 44e8241c51f762aafa50ed116da68fd6ecdcc954 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 3 Nov 2025 21:49:06 -0800 Subject: lib/crypto: arm/curve25519: Disable on CPU_BIG_ENDIAN On big endian arm kernels, the arm optimized Curve25519 code produces incorrect outputs and fails the Curve25519 test. This has been true ever since this code was added. It seems that hardly anyone (or even no one?) actually uses big endian arm kernels. But as long as they're ostensibly supported, we should disable this code on them so that it's not accidentally used. Note: for future-proofing, use !CPU_BIG_ENDIAN instead of CPU_LITTLE_ENDIAN. Both of these are arch-specific options that could get removed in the future if big endian support gets dropped. Fixes: d8f1308a025f ("crypto: arm/curve25519 - wire up NEON implementation") Cc: stable@vger.kernel.org Acked-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20251104054906.716914-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 8886055e938f..16859c6226dd 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -64,7 +64,7 @@ config CRYPTO_LIB_CURVE25519 config CRYPTO_LIB_CURVE25519_ARCH bool depends on CRYPTO_LIB_CURVE25519 && !UML && !KMSAN - default y if ARM && KERNEL_MODE_NEON + default y if ARM && KERNEL_MODE_NEON && !CPU_BIG_ENDIAN default y if PPC64 && CPU_LITTLE_ENDIAN default y if X86_64 -- cgit v1.2.3