From 640d31ea83c6f67133d47df9a0973f3281c91cf4 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:35:10 -0700 Subject: lib/crypto: sha256: Use underlying functions instead of crypto_simd_usable() Since sha256_kunit tests the fallback code paths without using crypto_simd_disabled_for_test, make the SHA-256 code just use the underlying may_use_simd() and irq_fpu_usable() functions directly instead of crypto_simd_usable(). This eliminates an unnecessary layer. While doing this, also add likely() annotations, and fix a minor inconsistency where the static keys in the sha256.h files were in a different place than in the corresponding sha1.h and sha512.h files. Link: https://lore.kernel.org/r/20250731223510.136650-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/sha256.h | 10 +++++----- lib/crypto/arm64/sha256.h | 10 +++++----- lib/crypto/riscv/sha256.h | 8 ++++---- lib/crypto/x86/sha256.h | 3 +-- 4 files changed, 15 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h index da75cbdc51d4..eab713e650f3 100644 --- a/lib/crypto/arm/sha256.h +++ b/lib/crypto/arm/sha256.h @@ -5,7 +5,10 @@ * Copyright 2025 Google LLC */ #include -#include +#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); asmlinkage void sha256_block_data_order(struct sha256_block_state *state, const u8 *data, size_t nblocks); @@ -14,14 +17,11 @@ asmlinkage void sha256_block_data_order_neon(struct sha256_block_state *state, asmlinkage void sha256_ce_transform(struct sha256_block_state *state, const u8 *data, size_t nblocks); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); - static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && crypto_simd_usable()) { + static_branch_likely(&have_neon) && likely(may_use_simd())) { kernel_neon_begin(); if (static_branch_likely(&have_ce)) sha256_ce_transform(state, data, nblocks); diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h index a211966c124a..d95f1077c32b 100644 --- a/lib/crypto/arm64/sha256.h +++ b/lib/crypto/arm64/sha256.h @@ -5,9 +5,12 @@ * Copyright 2025 Google LLC */ #include -#include +#include #include +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); + asmlinkage void sha256_block_data_order(struct sha256_block_state *state, const u8 *data, size_t nblocks); asmlinkage void sha256_block_neon(struct sha256_block_state *state, @@ -15,14 +18,11 @@ asmlinkage void sha256_block_neon(struct sha256_block_state *state, asmlinkage size_t __sha256_ce_transform(struct sha256_block_state *state, const u8 *data, size_t nblocks); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_ce); - static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && crypto_simd_usable()) { + static_branch_likely(&have_neon) && likely(may_use_simd())) { if (static_branch_likely(&have_ce)) { do { size_t rem; diff --git a/lib/crypto/riscv/sha256.h b/lib/crypto/riscv/sha256.h index c0f79c18f119..f36f68d2e88c 100644 --- a/lib/crypto/riscv/sha256.h +++ b/lib/crypto/riscv/sha256.h @@ -9,19 +9,19 @@ * Author: Jerry Shih */ +#include #include -#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); asmlinkage void sha256_transform_zvknha_or_zvknhb_zvkb(struct sha256_block_state *state, const u8 *data, size_t nblocks); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); - static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_extensions) && crypto_simd_usable()) { + if (static_branch_likely(&have_extensions) && likely(may_use_simd())) { kernel_vector_begin(); sha256_transform_zvknha_or_zvknhb_zvkb(state, data, nblocks); kernel_vector_end(); diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index 669bc06538b6..c852396ef319 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -5,7 +5,6 @@ * Copyright 2025 Google LLC */ #include -#include #include DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); @@ -16,7 +15,7 @@ DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); static void c_fn(struct sha256_block_state *state, const u8 *data, \ size_t nblocks) \ { \ - if (likely(crypto_simd_usable())) { \ + if (likely(irq_fpu_usable())) { \ kernel_fpu_begin(); \ asm_fn(state, data, nblocks); \ kernel_fpu_end(); \ -- cgit v1.2.3 From bce5816672ec27085489f096ec27739a4a233b7b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 31 Jul 2025 15:36:51 -0700 Subject: lib/crypto: sha512: Use underlying functions instead of crypto_simd_usable() Since sha512_kunit tests the fallback code paths without using crypto_simd_disabled_for_test, make the SHA-512 code just use the underlying may_use_simd() and irq_fpu_usable() functions directly instead of crypto_simd_usable(). This eliminates an unnecessary layer. Link: https://lore.kernel.org/r/20250731223651.136939-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/sha512.h | 5 ++--- lib/crypto/arm64/sha512.h | 5 ++--- lib/crypto/riscv/sha512.h | 4 +--- lib/crypto/x86/sha512.h | 4 +--- 4 files changed, 6 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h index f147b6490d6c..cc2447acd562 100644 --- a/lib/crypto/arm/sha512.h +++ b/lib/crypto/arm/sha512.h @@ -4,9 +4,8 @@ * * Copyright 2025 Google LLC */ - #include -#include +#include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -19,7 +18,7 @@ static void sha512_blocks(struct sha512_block_state *state, const u8 *data, size_t nblocks) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && likely(crypto_simd_usable())) { + static_branch_likely(&have_neon) && likely(may_use_simd())) { kernel_neon_begin(); sha512_block_data_order_neon(state, data, nblocks); kernel_neon_end(); diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h index 6abb40b467f2..7539ea3fef10 100644 --- a/lib/crypto/arm64/sha512.h +++ b/lib/crypto/arm64/sha512.h @@ -4,9 +4,8 @@ * * Copyright 2025 Google LLC */ - #include -#include +#include #include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha512_insns); @@ -21,7 +20,7 @@ static void sha512_blocks(struct sha512_block_state *state, { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && static_branch_likely(&have_sha512_insns) && - likely(crypto_simd_usable())) { + likely(may_use_simd())) { do { size_t rem; diff --git a/lib/crypto/riscv/sha512.h b/lib/crypto/riscv/sha512.h index 9d0abede322f..59dc0294a9a7 100644 --- a/lib/crypto/riscv/sha512.h +++ b/lib/crypto/riscv/sha512.h @@ -11,7 +11,6 @@ #include #include -#include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_extensions); @@ -21,8 +20,7 @@ asmlinkage void sha512_transform_zvknhb_zvkb(struct sha512_block_state *state, static void sha512_blocks(struct sha512_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_extensions) && - likely(crypto_simd_usable())) { + if (static_branch_likely(&have_extensions) && likely(may_use_simd())) { kernel_vector_begin(); sha512_transform_zvknhb_zvkb(state, data, nblocks); kernel_vector_end(); diff --git a/lib/crypto/x86/sha512.h b/lib/crypto/x86/sha512.h index c13503d9d57d..be2c8fc12246 100644 --- a/lib/crypto/x86/sha512.h +++ b/lib/crypto/x86/sha512.h @@ -4,9 +4,7 @@ * * Copyright 2025 Google LLC */ - #include -#include #include DEFINE_STATIC_CALL(sha512_blocks_x86, sha512_blocks_generic); @@ -17,7 +15,7 @@ DEFINE_STATIC_CALL(sha512_blocks_x86, sha512_blocks_generic); static void c_fn(struct sha512_block_state *state, const u8 *data, \ size_t nblocks) \ { \ - if (likely(crypto_simd_usable())) { \ + if (likely(irq_fpu_usable())) { \ kernel_fpu_begin(); \ asm_fn(state, data, nblocks); \ kernel_fpu_end(); \ -- cgit v1.2.3 From e164461349444ad27873e4ab2f492eb4465dbbb0 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:49 -0700 Subject: lib/crypto: md5: Add MD5 and HMAC-MD5 library functions Add library functions for MD5, including HMAC support. The MD5 implementation is derived from crypto/md5.c. This closely mirrors the corresponding SHA-1 and SHA-2 changes. Like SHA-1 and SHA-2, support for architecture-optimized MD5 implementations is included. I originally proposed dropping those, but unfortunately there is an AF_ALG user of the PowerPC MD5 code (https://lore.kernel.org/r/c4191597-341d-4fd7-bc3d-13daf7666c41@csgroup.eu/), and dropping that code would be viewed as a performance regression. We don't add new software algorithm implementations purely for AF_ALG, as escalating to kernel mode merely to do calculations that could be done in userspace is inefficient and is completely the wrong design. But since this one already existed, it gets grandfathered in for now. An objection was also raised to dropping the SPARC64 MD5 code because it utilizes the CPU's direct support for MD5, although it remains unclear that anyone is using that. Regardless, we'll keep these around for now. Note that while MD5 is a legacy algorithm that is vulnerable to practical collision attacks, it still has various in-kernel users that implement legacy protocols. Switching to a simple library API, which is the way the code should have been organized originally, will greatly simplify their code. For example: MD5: drivers/md/dm-crypt.c (for lmk IV generation) fs/nfsd/nfs4recover.c fs/ecryptfs/ fs/smb/client/ net/{ipv4,ipv6}/ (for TCP-MD5 signatures) HMAC-MD5: fs/smb/client/ fs/smb/server/ (Also net/sctp/ if it continues using HMAC-MD5 for cookie generation. However, that use case has the flexibility to upgrade to a more modern algorithm, which I'll be proposing instead.) As usual, the "md5" and "hmac(md5)" crypto_shash algorithms will also be reimplemented on top of these library functions. For "hmac(md5)" this will provide a faster, more streamlined implementation. Link: https://lore.kernel.org/r/20250805222855.10362-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 10 ++ lib/crypto/Makefile | 10 ++ lib/crypto/md5.c | 322 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 342 insertions(+) create mode 100644 lib/crypto/md5.c (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 1e6b008f8fca..e38e8ed779d8 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -101,6 +101,16 @@ config CRYPTO_LIB_CURVE25519 config CRYPTO_LIB_DES tristate +config CRYPTO_LIB_MD5 + tristate + help + The MD5 and HMAC-MD5 library functions. Select this if your module + uses any of the functions from . + +config CRYPTO_LIB_MD5_ARCH + bool + depends on CRYPTO_LIB_MD5 && !UML + config CRYPTO_LIB_POLY1305_RSIZE int default 2 if MIPS diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 539d5d59a50e..429573e8f8b3 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -59,6 +59,16 @@ libcurve25519-$(CONFIG_CRYPTO_SELFTESTS) += curve25519-selftest.o obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o libdes-y := des.o +################################################################################ + +obj-$(CONFIG_CRYPTO_LIB_MD5) += libmd5.o +libmd5-y := md5.o +ifeq ($(CONFIG_CRYPTO_LIB_MD5_ARCH),y) +CFLAGS_md5.o += -I$(src)/$(SRCARCH) +endif # CONFIG_CRYPTO_LIB_MD5_ARCH + +################################################################################ + obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o libpoly1305-y += poly1305.o diff --git a/lib/crypto/md5.c b/lib/crypto/md5.c new file mode 100644 index 000000000000..c0610ea1370e --- /dev/null +++ b/lib/crypto/md5.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * MD5 and HMAC-MD5 library functions + * + * md5_block_generic() is derived from cryptoapi implementation, originally + * based on the public domain implementation written by Colin Plumb in 1993. + * + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + * Copyright 2025 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct md5_block_state md5_iv = { + .h = { MD5_H0, MD5_H1, MD5_H2, MD5_H3 }, +}; + +#define F1(x, y, z) (z ^ (x & (y ^ z))) +#define F2(x, y, z) F1(z, x, y) +#define F3(x, y, z) (x ^ y ^ z) +#define F4(x, y, z) (y ^ (x | ~z)) + +#define MD5STEP(f, w, x, y, z, in, s) \ + (w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x) + +static void md5_block_generic(struct md5_block_state *state, + const u8 data[MD5_BLOCK_SIZE]) +{ + u32 in[MD5_BLOCK_WORDS]; + u32 a, b, c, d; + + memcpy(in, data, MD5_BLOCK_SIZE); + le32_to_cpu_array(in, ARRAY_SIZE(in)); + + a = state->h[0]; + b = state->h[1]; + c = state->h[2]; + d = state->h[3]; + + MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); + MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); + MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); + MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); + MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); + MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); + MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); + MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); + MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); + MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); + MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); + MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); + MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); + MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); + MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); + MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); + + MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); + MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); + MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); + MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); + MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); + MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); + MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); + MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); + MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); + MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); + MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); + MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); + MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); + MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); + MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); + MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); + + MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); + MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); + MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); + MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); + MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); + MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); + MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); + MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); + MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); + MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); + MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); + MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); + MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); + MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); + MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); + MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); + + MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); + MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); + MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); + MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); + MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); + MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); + MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); + MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); + MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); + MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); + MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); + MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); + MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); + MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); + MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); + MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); + + state->h[0] += a; + state->h[1] += b; + state->h[2] += c; + state->h[3] += d; +} + +static void __maybe_unused md5_blocks_generic(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + do { + md5_block_generic(state, data); + data += MD5_BLOCK_SIZE; + } while (--nblocks); +} + +#ifdef CONFIG_CRYPTO_LIB_MD5_ARCH +#include "md5.h" /* $(SRCARCH)/md5.h */ +#else +#define md5_blocks md5_blocks_generic +#endif + +void md5_init(struct md5_ctx *ctx) +{ + ctx->state = md5_iv; + ctx->bytecount = 0; +} +EXPORT_SYMBOL_GPL(md5_init); + +void md5_update(struct md5_ctx *ctx, const u8 *data, size_t len) +{ + size_t partial = ctx->bytecount % MD5_BLOCK_SIZE; + + ctx->bytecount += len; + + if (partial + len >= MD5_BLOCK_SIZE) { + size_t nblocks; + + if (partial) { + size_t l = MD5_BLOCK_SIZE - partial; + + memcpy(&ctx->buf[partial], data, l); + data += l; + len -= l; + + md5_blocks(&ctx->state, ctx->buf, 1); + } + + nblocks = len / MD5_BLOCK_SIZE; + len %= MD5_BLOCK_SIZE; + + if (nblocks) { + md5_blocks(&ctx->state, data, nblocks); + data += nblocks * MD5_BLOCK_SIZE; + } + partial = 0; + } + if (len) + memcpy(&ctx->buf[partial], data, len); +} +EXPORT_SYMBOL_GPL(md5_update); + +static void __md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) +{ + u64 bitcount = ctx->bytecount << 3; + size_t partial = ctx->bytecount % MD5_BLOCK_SIZE; + + ctx->buf[partial++] = 0x80; + if (partial > MD5_BLOCK_SIZE - 8) { + memset(&ctx->buf[partial], 0, MD5_BLOCK_SIZE - partial); + md5_blocks(&ctx->state, ctx->buf, 1); + partial = 0; + } + memset(&ctx->buf[partial], 0, MD5_BLOCK_SIZE - 8 - partial); + *(__le64 *)&ctx->buf[MD5_BLOCK_SIZE - 8] = cpu_to_le64(bitcount); + md5_blocks(&ctx->state, ctx->buf, 1); + + cpu_to_le32_array(ctx->state.h, ARRAY_SIZE(ctx->state.h)); + memcpy(out, ctx->state.h, MD5_DIGEST_SIZE); +} + +void md5_final(struct md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) +{ + __md5_final(ctx, out); + memzero_explicit(ctx, sizeof(*ctx)); +} +EXPORT_SYMBOL_GPL(md5_final); + +void md5(const u8 *data, size_t len, u8 out[MD5_DIGEST_SIZE]) +{ + struct md5_ctx ctx; + + md5_init(&ctx); + md5_update(&ctx, data, len); + md5_final(&ctx, out); +} +EXPORT_SYMBOL_GPL(md5); + +static void __hmac_md5_preparekey(struct md5_block_state *istate, + struct md5_block_state *ostate, + const u8 *raw_key, size_t raw_key_len) +{ + union { + u8 b[MD5_BLOCK_SIZE]; + unsigned long w[MD5_BLOCK_SIZE / sizeof(unsigned long)]; + } derived_key = { 0 }; + + if (unlikely(raw_key_len > MD5_BLOCK_SIZE)) + md5(raw_key, raw_key_len, derived_key.b); + else + memcpy(derived_key.b, raw_key, raw_key_len); + + for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) + derived_key.w[i] ^= REPEAT_BYTE(HMAC_IPAD_VALUE); + *istate = md5_iv; + md5_blocks(istate, derived_key.b, 1); + + for (size_t i = 0; i < ARRAY_SIZE(derived_key.w); i++) + derived_key.w[i] ^= REPEAT_BYTE(HMAC_OPAD_VALUE ^ + HMAC_IPAD_VALUE); + *ostate = md5_iv; + md5_blocks(ostate, derived_key.b, 1); + + memzero_explicit(&derived_key, sizeof(derived_key)); +} + +void hmac_md5_preparekey(struct hmac_md5_key *key, + const u8 *raw_key, size_t raw_key_len) +{ + __hmac_md5_preparekey(&key->istate, &key->ostate, raw_key, raw_key_len); +} +EXPORT_SYMBOL_GPL(hmac_md5_preparekey); + +void hmac_md5_init(struct hmac_md5_ctx *ctx, const struct hmac_md5_key *key) +{ + ctx->hash_ctx.state = key->istate; + ctx->hash_ctx.bytecount = MD5_BLOCK_SIZE; + ctx->ostate = key->ostate; +} +EXPORT_SYMBOL_GPL(hmac_md5_init); + +void hmac_md5_init_usingrawkey(struct hmac_md5_ctx *ctx, + const u8 *raw_key, size_t raw_key_len) +{ + __hmac_md5_preparekey(&ctx->hash_ctx.state, &ctx->ostate, + raw_key, raw_key_len); + ctx->hash_ctx.bytecount = MD5_BLOCK_SIZE; +} +EXPORT_SYMBOL_GPL(hmac_md5_init_usingrawkey); + +void hmac_md5_final(struct hmac_md5_ctx *ctx, u8 out[MD5_DIGEST_SIZE]) +{ + /* Generate the padded input for the outer hash in ctx->hash_ctx.buf. */ + __md5_final(&ctx->hash_ctx, ctx->hash_ctx.buf); + memset(&ctx->hash_ctx.buf[MD5_DIGEST_SIZE], 0, + MD5_BLOCK_SIZE - MD5_DIGEST_SIZE); + ctx->hash_ctx.buf[MD5_DIGEST_SIZE] = 0x80; + *(__le64 *)&ctx->hash_ctx.buf[MD5_BLOCK_SIZE - 8] = + cpu_to_le64(8 * (MD5_BLOCK_SIZE + MD5_DIGEST_SIZE)); + + /* Compute the outer hash, which gives the HMAC value. */ + md5_blocks(&ctx->ostate, ctx->hash_ctx.buf, 1); + cpu_to_le32_array(ctx->ostate.h, ARRAY_SIZE(ctx->ostate.h)); + memcpy(out, ctx->ostate.h, MD5_DIGEST_SIZE); + + memzero_explicit(ctx, sizeof(*ctx)); +} +EXPORT_SYMBOL_GPL(hmac_md5_final); + +void hmac_md5(const struct hmac_md5_key *key, + const u8 *data, size_t data_len, u8 out[MD5_DIGEST_SIZE]) +{ + struct hmac_md5_ctx ctx; + + hmac_md5_init(&ctx, key); + hmac_md5_update(&ctx, data, data_len); + hmac_md5_final(&ctx, out); +} +EXPORT_SYMBOL_GPL(hmac_md5); + +void hmac_md5_usingrawkey(const u8 *raw_key, size_t raw_key_len, + const u8 *data, size_t data_len, + u8 out[MD5_DIGEST_SIZE]) +{ + struct hmac_md5_ctx ctx; + + hmac_md5_init_usingrawkey(&ctx, raw_key, raw_key_len); + hmac_md5_update(&ctx, data, data_len); + hmac_md5_final(&ctx, out); +} +EXPORT_SYMBOL_GPL(hmac_md5_usingrawkey); + +#ifdef md5_mod_init_arch +static int __init md5_mod_init(void) +{ + md5_mod_init_arch(); + return 0; +} +subsys_initcall(md5_mod_init); + +static void __exit md5_mod_exit(void) +{ +} +module_exit(md5_mod_exit); +#endif + +MODULE_DESCRIPTION("MD5 and HMAC-MD5 library functions"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From c9e5ac0ab9d1235f1a91852ec3d3c5c5f3e8ba0e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:50 -0700 Subject: lib/crypto: mips/md5: Migrate optimized code into library Instead of exposing the mips-optimized MD5 code via mips-specific crypto_shash algorithms, instead just implement the md5_blocks() library function. This is much simpler, it makes the MD5 library functions be mips-optimized, and it fixes the longstanding issue where the mips-optimized MD5 code was disabled by default. MD5 still remains available through crypto_shash, but individual architectures no longer need to handle it. Note: to see the diff from arch/mips/cavium-octeon/crypto/octeon-md5.c to lib/crypto/mips/md5.h, view this commit with 'git show -M10'. Link: https://lore.kernel.org/r/20250805222855.10362-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 1 + lib/crypto/mips/md5.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 lib/crypto/mips/md5.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index e38e8ed779d8..7b4e47ce37bb 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -110,6 +110,7 @@ config CRYPTO_LIB_MD5 config CRYPTO_LIB_MD5_ARCH bool depends on CRYPTO_LIB_MD5 && !UML + default y if MIPS && CPU_CAVIUM_OCTEON config CRYPTO_LIB_POLY1305_RSIZE int diff --git a/lib/crypto/mips/md5.h b/lib/crypto/mips/md5.h new file mode 100644 index 000000000000..e08e28aeffa4 --- /dev/null +++ b/lib/crypto/mips/md5.h @@ -0,0 +1,65 @@ +/* + * Cryptographic API. + * + * MD5 Message Digest Algorithm (RFC1321). + * + * Adapted for OCTEON by Aaro Koskinen . + * + * Based on crypto/md5.c, which is: + * + * Derived from cryptoapi implementation, originally based on the + * public domain implementation written by Colin Plumb in 1993. + * + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include +#include + +/* + * We pass everything as 64-bit. OCTEON can handle misaligned data. + */ + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + struct octeon_cop2_state cop2_state; + u64 *state64 = (u64 *)state; + unsigned long flags; + + if (!octeon_has_crypto()) + return md5_blocks_generic(state, data, nblocks); + + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + + flags = octeon_crypto_enable(&cop2_state); + write_octeon_64bit_hash_dword(state64[0], 0); + write_octeon_64bit_hash_dword(state64[1], 1); + + do { + const u64 *block = (const u64 *)data; + + write_octeon_64bit_block_dword(block[0], 0); + write_octeon_64bit_block_dword(block[1], 1); + write_octeon_64bit_block_dword(block[2], 2); + write_octeon_64bit_block_dword(block[3], 3); + write_octeon_64bit_block_dword(block[4], 4); + write_octeon_64bit_block_dword(block[5], 5); + write_octeon_64bit_block_dword(block[6], 6); + octeon_md5_start(block[7]); + + data += MD5_BLOCK_SIZE; + } while (--nblocks); + + state64[0] = read_octeon_64bit_hash_dword(0); + state64[1] = read_octeon_64bit_hash_dword(1); + octeon_crypto_disable(&cop2_state, flags); + + le32_to_cpu_array(state->h, ARRAY_SIZE(state->h)); +} -- cgit v1.2.3 From 09371e1349c9bb34ac030973c7867016a8a8914d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:52 -0700 Subject: lib/crypto: powerpc/md5: Migrate optimized code into library Instead of exposing the powerpc-optimized MD5 code via powerpc-specific crypto_shash algorithms, instead just implement the md5_blocks() library function. This is much simpler, it makes the MD5 library functions be powerpc-optimized, and it fixes the longstanding issue where the powerpc-optimized MD5 code was disabled by default. MD5 still remains available through crypto_shash, but individual architectures no longer need to handle it. Link: https://lore.kernel.org/r/20250805222855.10362-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 1 + lib/crypto/Makefile | 1 + lib/crypto/powerpc/md5-asm.S | 235 +++++++++++++++++++++++++++++++++++++++++++ lib/crypto/powerpc/md5.h | 12 +++ 4 files changed, 249 insertions(+) create mode 100644 lib/crypto/powerpc/md5-asm.S create mode 100644 lib/crypto/powerpc/md5.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 7b4e47ce37bb..7334ddc70e59 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -111,6 +111,7 @@ config CRYPTO_LIB_MD5_ARCH bool depends on CRYPTO_LIB_MD5 && !UML default y if MIPS && CPU_CAVIUM_OCTEON + default y if PPC config CRYPTO_LIB_POLY1305_RSIZE int diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 429573e8f8b3..b3986dde676b 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_CRYPTO_LIB_MD5) += libmd5.o libmd5-y := md5.o ifeq ($(CONFIG_CRYPTO_LIB_MD5_ARCH),y) CFLAGS_md5.o += -I$(src)/$(SRCARCH) +libmd5-$(CONFIG_PPC) += powerpc/md5-asm.o endif # CONFIG_CRYPTO_LIB_MD5_ARCH ################################################################################ diff --git a/lib/crypto/powerpc/md5-asm.S b/lib/crypto/powerpc/md5-asm.S new file mode 100644 index 000000000000..fa6bc440cf4a --- /dev/null +++ b/lib/crypto/powerpc/md5-asm.S @@ -0,0 +1,235 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Fast MD5 implementation for PPC + * + * Copyright (c) 2015 Markus Stockhausen + */ +#include +#include +#include + +#define rHP r3 +#define rWP r4 + +#define rH0 r0 +#define rH1 r6 +#define rH2 r7 +#define rH3 r5 + +#define rW00 r8 +#define rW01 r9 +#define rW02 r10 +#define rW03 r11 +#define rW04 r12 +#define rW05 r14 +#define rW06 r15 +#define rW07 r16 +#define rW08 r17 +#define rW09 r18 +#define rW10 r19 +#define rW11 r20 +#define rW12 r21 +#define rW13 r22 +#define rW14 r23 +#define rW15 r24 + +#define rT0 r25 +#define rT1 r26 + +#define INITIALIZE \ + PPC_STLU r1,-INT_FRAME_SIZE(r1); \ + SAVE_GPRS(14, 26, r1) /* push registers onto stack */ + +#define FINALIZE \ + REST_GPRS(14, 26, r1); /* pop registers from stack */ \ + addi r1,r1,INT_FRAME_SIZE + +#ifdef __BIG_ENDIAN__ +#define LOAD_DATA(reg, off) \ + lwbrx reg,0,rWP; /* load data */ +#define INC_PTR \ + addi rWP,rWP,4; /* increment per word */ +#define NEXT_BLOCK /* nothing to do */ +#else +#define LOAD_DATA(reg, off) \ + lwz reg,off(rWP); /* load data */ +#define INC_PTR /* nothing to do */ +#define NEXT_BLOCK \ + addi rWP,rWP,64; /* increment per block */ +#endif + +#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ + LOAD_DATA(w0, off) /* W */ \ + and rT0,b,c; /* 1: f = b and c */ \ + INC_PTR /* ptr++ */ \ + andc rT1,d,b; /* 1: f' = ~b and d */ \ + LOAD_DATA(w1, off+4) /* W */ \ + or rT0,rT0,rT1; /* 1: f = f or f' */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + addis w1,w1,k1h; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addi w1,w1,k1l; /* 2: wk = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add d,d,w1; /* 2: a = a + wk */ \ + add a,a,b; /* 1: a = a + b */ \ + and rT0,a,b; /* 2: f = b and c */ \ + andc rT1,c,a; /* 2: f' = ~b and d */ \ + or rT0,rT0,rT1; /* 2: f = f or f' */ \ + add d,d,rT0; /* 2: a = a + f */ \ + INC_PTR /* ptr++ */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + andc rT0,c,d; /* 1: f = c and ~d */ \ + and rT1,b,d; /* 1: f' = b and d */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + or rT0,rT0,rT1; /* 1: f = f or f' */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addi w1,w1,k1l; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addis w1,w1,k1h; /* 2: wk = w + k' */ \ + andc rT0,b,c; /* 2: f = c and ~d */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add a,a,b; /* 1: a = a + b */ \ + add d,d,w1; /* 2: a = a + wk */ \ + and rT1,a,c; /* 2: f' = b and d */ \ + or rT0,rT0,rT1; /* 2: f = f or f' */ \ + add d,d,rT0; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a +b */ + +#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + xor rT0,b,c; /* 1: f' = b xor c */ \ + addi w0,w0,k0l; /* 1: wk = w + k */ \ + xor rT1,rT0,d; /* 1: f = f xor f' */ \ + addis w0,w0,k0h; /* 1: wk = w + k' */ \ + add a,a,rT1; /* 1: a = a + f */ \ + addi w1,w1,k1l; /* 2: wk = w + k */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addis w1,w1,k1h; /* 2: wk = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add d,d,w1; /* 2: a = a + wk */ \ + add a,a,b; /* 1: a = a + b */ \ + xor rT1,rT0,a; /* 2: f = b xor f' */ \ + add d,d,rT1; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ + addi w0,w0,k0l; /* 1: w = w + k */ \ + orc rT0,b,d; /* 1: f = b or ~d */ \ + addis w0,w0,k0h; /* 1: w = w + k' */ \ + xor rT0,rT0,c; /* 1: f = f xor c */ \ + add a,a,w0; /* 1: a = a + wk */ \ + addi w1,w1,k1l; /* 2: w = w + k */ \ + add a,a,rT0; /* 1: a = a + f */ \ + addis w1,w1,k1h; /* 2: w = w + k' */ \ + rotrwi a,a,p; /* 1: a = a rotl x */ \ + add a,a,b; /* 1: a = a + b */ \ + orc rT0,a,c; /* 2: f = b or ~d */ \ + add d,d,w1; /* 2: a = a + wk */ \ + xor rT0,rT0,b; /* 2: f = f xor c */ \ + add d,d,rT0; /* 2: a = a + f */ \ + rotrwi d,d,q; /* 2: a = a rotl x */ \ + add d,d,a; /* 2: a = a + b */ + +_GLOBAL(ppc_md5_transform) + INITIALIZE + + mtctr r5 + lwz rH0,0(rHP) + lwz rH1,4(rHP) + lwz rH2,8(rHP) + lwz rH3,12(rHP) + +ppc_md5_main: + R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, + 0xd76b, -23432, 0xe8c8, -18602) + R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, + 0x2420, 0x70db, 0xc1be, -12562) + R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, + 0xf57c, 0x0faf, 0x4788, -14806) + R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, + 0xa830, 0x4613, 0xfd47, -27391) + R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, + 0x6981, -26408, 0x8b45, -2129) + R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, + 0xffff, 0x5bb1, 0x895d, -10306) + R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, + 0x6b90, 0x1122, 0xfd98, 0x7193) + R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, + 0xa679, 0x438e, 0x49b4, 0x0821) + + R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, + 0x0d56, 0x6e0c, 0x1810, 0x6d2d) + R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, + 0x9d02, -32109, 0x124c, 0x2332) + R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, + 0x8ea7, 0x4a33, 0x0245, -18270) + R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, + 0x8eee, -8608, 0xf258, -5095) + R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, + 0x969d, -10697, 0x1cbe, -15288) + R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, + 0x3317, 0x3e99, 0xdbd9, 0x7c15) + R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, + 0xac4b, 0x7772, 0xd8cf, 0x331d) + R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, + 0x6a28, 0x6dd8, 0x219a, 0x3b68) + + R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, + 0x29cb, 0x28e5, 0x4218, -7788) + R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, + 0x473f, 0x06d1, 0x3aae, 0x3036) + R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, + 0xaea1, -15134, 0x640b, -11295) + R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, + 0x8f4c, 0x4887, 0xbc7c, -22499) + R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, + 0x7eb8, -27199, 0x00ea, 0x6050) + R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, + 0xe01a, 0x22fe, 0x4447, 0x69c5) + R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, + 0xb7f3, 0x0253, 0x59b1, 0x4d5b) + R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, + 0x4701, -27017, 0xc7bd, -19859) + + R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, + 0x0988, -1462, 0x4c70, -19401) + R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, + 0xadaf, -5221, 0xfc99, 0x66f7) + R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, + 0x7e80, -16418, 0xba1e, -25587) + R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, + 0x4130, 0x380d, 0xe0c5, 0x738d) + lwz rW00,0(rHP) + R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, + 0xe837, -30770, 0xde8a, 0x69e8) + lwz rW14,4(rHP) + R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, + 0x9e79, 0x260f, 0x256d, -27941) + lwz rW12,8(rHP) + R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, + 0xab75, -20775, 0x4f9e, -28397) + lwz rW10,12(rHP) + R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, + 0x662b, 0x7c56, 0x11b2, 0x0358) + + add rH0,rH0,rW00 + stw rH0,0(rHP) + add rH1,rH1,rW14 + stw rH1,4(rHP) + add rH2,rH2,rW12 + stw rH2,8(rHP) + add rH3,rH3,rW10 + stw rH3,12(rHP) + NEXT_BLOCK + + bdnz ppc_md5_main + + FINALIZE + blr diff --git a/lib/crypto/powerpc/md5.h b/lib/crypto/powerpc/md5.h new file mode 100644 index 000000000000..540b08e34d1d --- /dev/null +++ b/lib/crypto/powerpc/md5.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * MD5 optimized for PowerPC + */ + +void ppc_md5_transform(u32 *state, const u8 *data, size_t nblocks); + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + ppc_md5_transform(state->h, data, nblocks); +} -- cgit v1.2.3 From a1848f6e382145e0200843549f24f5af4b5c8136 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:53 -0700 Subject: lib/crypto: sparc/md5: Migrate optimized code into library Instead of exposing the sparc-optimized MD5 code via sparc-specific crypto_shash algorithms, instead just implement the md5_blocks() library function. This is much simpler, it makes the MD5 library functions be sparc-optimized, and it fixes the longstanding issue where the sparc-optimized MD5 code was disabled by default. MD5 still remains available through crypto_shash, but individual architectures no longer need to handle it. Note: to see the diff from arch/sparc/crypto/md5_glue.c to lib/crypto/sparc/md5.h, view this commit with 'git show -M10'. Link: https://lore.kernel.org/r/20250805222855.10362-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 1 + lib/crypto/Makefile | 1 + lib/crypto/sparc/md5.h | 48 +++++++++++++++++++++++++++++++ lib/crypto/sparc/md5_asm.S | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 lib/crypto/sparc/md5.h create mode 100644 lib/crypto/sparc/md5_asm.S (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 7334ddc70e59..79b848448e07 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -112,6 +112,7 @@ config CRYPTO_LIB_MD5_ARCH depends on CRYPTO_LIB_MD5 && !UML default y if MIPS && CPU_CAVIUM_OCTEON default y if PPC + default y if SPARC64 config CRYPTO_LIB_POLY1305_RSIZE int diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index b3986dde676b..d362636a22d3 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -66,6 +66,7 @@ libmd5-y := md5.o ifeq ($(CONFIG_CRYPTO_LIB_MD5_ARCH),y) CFLAGS_md5.o += -I$(src)/$(SRCARCH) libmd5-$(CONFIG_PPC) += powerpc/md5-asm.o +libmd5-$(CONFIG_SPARC) += sparc/md5_asm.o endif # CONFIG_CRYPTO_LIB_MD5_ARCH ################################################################################ diff --git a/lib/crypto/sparc/md5.h b/lib/crypto/sparc/md5.h new file mode 100644 index 000000000000..3f1b0ed8c0b3 --- /dev/null +++ b/lib/crypto/sparc/md5.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * MD5 accelerated using the sparc64 crypto opcodes + * + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald + * Copyright (c) Jean-Francois Dive + * Copyright (c) Mathias Krause + * Copyright (c) Cryptoapi developers. + * Copyright (c) 2002 James Morris + */ + +#include +#include +#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_md5_opcodes); + +asmlinkage void md5_sparc64_transform(struct md5_block_state *state, + const u8 *data, size_t nblocks); + +static void md5_blocks(struct md5_block_state *state, + const u8 *data, size_t nblocks) +{ + if (static_branch_likely(&have_md5_opcodes)) { + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + md5_sparc64_transform(state, data, nblocks); + le32_to_cpu_array(state->h, ARRAY_SIZE(state->h)); + } else { + md5_blocks_generic(state, data, nblocks); + } +} + +#define md5_mod_init_arch md5_mod_init_arch +static inline void md5_mod_init_arch(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return; + + __asm__ __volatile__("rd %%asr26, %0" : "=r" (cfr)); + if (!(cfr & CFR_MD5)) + return; + + static_branch_enable(&have_md5_opcodes); + pr_info("Using sparc64 md5 opcode optimized MD5 implementation\n"); +} diff --git a/lib/crypto/sparc/md5_asm.S b/lib/crypto/sparc/md5_asm.S new file mode 100644 index 000000000000..60b544e4d205 --- /dev/null +++ b/lib/crypto/sparc/md5_asm.S @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include + +ENTRY(md5_sparc64_transform) + /* %o0 = digest, %o1 = data, %o2 = rounds */ + VISEntryHalf + ld [%o0 + 0x00], %f0 + ld [%o0 + 0x04], %f1 + andcc %o1, 0x7, %g0 + ld [%o0 + 0x08], %f2 + bne,pn %xcc, 10f + ld [%o0 + 0x0c], %f3 + +1: + ldd [%o1 + 0x00], %f8 + ldd [%o1 + 0x08], %f10 + ldd [%o1 + 0x10], %f12 + ldd [%o1 + 0x18], %f14 + ldd [%o1 + 0x20], %f16 + ldd [%o1 + 0x28], %f18 + ldd [%o1 + 0x30], %f20 + ldd [%o1 + 0x38], %f22 + + MD5 + + subcc %o2, 1, %o2 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + +5: + st %f0, [%o0 + 0x00] + st %f1, [%o0 + 0x04] + st %f2, [%o0 + 0x08] + st %f3, [%o0 + 0x0c] + retl + VISExitHalf +10: + alignaddr %o1, %g0, %o1 + + ldd [%o1 + 0x00], %f10 +1: + ldd [%o1 + 0x08], %f12 + ldd [%o1 + 0x10], %f14 + ldd [%o1 + 0x18], %f16 + ldd [%o1 + 0x20], %f18 + ldd [%o1 + 0x28], %f20 + ldd [%o1 + 0x30], %f22 + ldd [%o1 + 0x38], %f24 + ldd [%o1 + 0x40], %f26 + + faligndata %f10, %f12, %f8 + faligndata %f12, %f14, %f10 + faligndata %f14, %f16, %f12 + faligndata %f16, %f18, %f14 + faligndata %f18, %f20, %f16 + faligndata %f20, %f22, %f18 + faligndata %f22, %f24, %f20 + faligndata %f24, %f26, %f22 + + MD5 + + subcc %o2, 1, %o2 + fsrc2 %f26, %f10 + bne,pt %xcc, 1b + add %o1, 0x40, %o1 + + ba,a,pt %xcc, 5b +ENDPROC(md5_sparc64_transform) -- cgit v1.2.3 From d6b6aac0cdb4b4f81cccc531ed76211d56c17444 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 5 Aug 2025 15:28:55 -0700 Subject: lib/crypto: tests: Add KUnit tests for MD5 and HMAC-MD5 Add a KUnit test suite for the MD5 library functions, including the corresponding HMAC support. The core test logic is in the previously-added hash-test-template.h. This commit just adds the actual KUnit suite, and it adds the generated test vectors to the tree so that gen-hash-testvecs.py won't have to be run at build time. Link: https://lore.kernel.org/r/20250805222855.10362-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 10 +++ lib/crypto/tests/Makefile | 1 + lib/crypto/tests/md5-testvecs.h | 186 ++++++++++++++++++++++++++++++++++++++++ lib/crypto/tests/md5_kunit.c | 39 +++++++++ 4 files changed, 236 insertions(+) create mode 100644 lib/crypto/tests/md5-testvecs.h create mode 100644 lib/crypto/tests/md5_kunit.c (limited to 'lib') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index de7e8babb6af..c21d53fd4b0c 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -1,5 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later +config CRYPTO_LIB_MD5_KUNIT_TEST + tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_MD5 + help + KUnit tests for the MD5 cryptographic hash function and its + corresponding HMAC. + config CRYPTO_LIB_POLY1305_KUNIT_TEST tristate "KUnit tests for Poly1305" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index 8601dccd6fdd..f6f82c6f9cb5 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA256_KUNIT_TEST) += sha224_kunit.o sha256_kunit.o diff --git a/lib/crypto/tests/md5-testvecs.h b/lib/crypto/tests/md5-testvecs.h new file mode 100644 index 000000000000..be6727feb296 --- /dev/null +++ b/lib/crypto/tests/md5-testvecs.h @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py md5 */ + +static const struct { + size_t data_len; + u8 digest[MD5_DIGEST_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0xd4, 0x1d, 0x8c, 0xd9, 0x8f, 0x00, 0xb2, 0x04, + 0xe9, 0x80, 0x09, 0x98, 0xec, 0xf8, 0x42, 0x7e, + }, + }, + { + .data_len = 1, + .digest = { + 0x16, 0x7b, 0x86, 0xf2, 0x1d, 0xf3, 0x76, 0xc9, + 0x6f, 0x10, 0xa0, 0x61, 0x5b, 0x14, 0x20, 0x0b, + }, + }, + { + .data_len = 2, + .digest = { + 0x2d, 0x30, 0x96, 0xc7, 0x43, 0x40, 0xed, 0xb2, + 0xfb, 0x84, 0x63, 0x9a, 0xec, 0xc7, 0x3c, 0x3c, + }, + }, + { + .data_len = 3, + .digest = { + 0xe5, 0x0f, 0xce, 0xe0, 0xc8, 0xff, 0x4e, 0x08, + 0x5e, 0x19, 0xe5, 0xf2, 0x08, 0x11, 0x19, 0x16, + }, + }, + { + .data_len = 16, + .digest = { + 0xe8, 0xca, 0x29, 0x05, 0x2f, 0xd1, 0xf3, 0x99, + 0x40, 0x71, 0xf5, 0xc2, 0xf7, 0xf8, 0x17, 0x3e, + }, + }, + { + .data_len = 32, + .digest = { + 0xe3, 0x20, 0xc1, 0xd8, 0x21, 0x14, 0x44, 0x59, + 0x1a, 0xf5, 0x91, 0xaf, 0x69, 0xbe, 0x93, 0x9d, + }, + }, + { + .data_len = 48, + .digest = { + 0xfb, 0x06, 0xb0, 0xf0, 0x00, 0x10, 0x4b, 0x68, + 0x3d, 0x75, 0xf9, 0x70, 0xde, 0xbb, 0x32, 0x16, + }, + }, + { + .data_len = 49, + .digest = { + 0x52, 0x86, 0x48, 0x8b, 0xae, 0x91, 0x7c, 0x4e, + 0xc2, 0x2a, 0x69, 0x07, 0x35, 0xcc, 0xb2, 0x88, + }, + }, + { + .data_len = 63, + .digest = { + 0xfa, 0xd3, 0xf6, 0xe6, 0x7b, 0x1a, 0xc6, 0x05, + 0x73, 0x35, 0x02, 0xab, 0xc7, 0xb3, 0x47, 0xcb, + }, + }, + { + .data_len = 64, + .digest = { + 0xc5, 0x59, 0x29, 0xe9, 0x0a, 0x4a, 0x86, 0x43, + 0x7c, 0xaf, 0xdf, 0x83, 0xd3, 0xb8, 0x33, 0x5f, + }, + }, + { + .data_len = 65, + .digest = { + 0x80, 0x05, 0x75, 0x39, 0xec, 0x44, 0x8a, 0x81, + 0xe7, 0x6e, 0x8d, 0xd1, 0xc6, 0xeb, 0xc2, 0xf0, + }, + }, + { + .data_len = 127, + .digest = { + 0x3f, 0x02, 0xe8, 0xc6, 0xb8, 0x6a, 0x39, 0xc3, + 0xa4, 0x1c, 0xd9, 0x8f, 0x4a, 0x71, 0x40, 0x30, + }, + }, + { + .data_len = 128, + .digest = { + 0x89, 0x4f, 0x79, 0x3e, 0xff, 0x0c, 0x22, 0x60, + 0xa2, 0xdc, 0x10, 0x5f, 0x23, 0x0a, 0xe7, 0xc6, + }, + }, + { + .data_len = 129, + .digest = { + 0x06, 0x56, 0x61, 0xb8, 0x8a, 0x82, 0x77, 0x1b, + 0x2c, 0x35, 0xb8, 0x9f, 0xd6, 0xf7, 0xbd, 0x5a, + }, + }, + { + .data_len = 256, + .digest = { + 0x5d, 0xdf, 0x7d, 0xc8, 0x43, 0x96, 0x3b, 0xdb, + 0xc7, 0x0e, 0x44, 0x42, 0x23, 0xf7, 0xed, 0xdf, + }, + }, + { + .data_len = 511, + .digest = { + 0xf6, 0x5f, 0x26, 0x51, 0x8a, 0x5a, 0x46, 0x8f, + 0x48, 0x72, 0x90, 0x74, 0x9d, 0x87, 0xbd, 0xdf, + }, + }, + { + .data_len = 513, + .digest = { + 0xd8, 0x2c, 0xc9, 0x76, 0xfa, 0x67, 0x2e, 0xa6, + 0xc8, 0x12, 0x4a, 0x64, 0xaa, 0x0b, 0x3d, 0xbd, + }, + }, + { + .data_len = 1000, + .digest = { + 0xe2, 0x7e, 0xb4, 0x5f, 0xe1, 0x74, 0x51, 0xfc, + 0xe0, 0xc8, 0xd5, 0xe6, 0x8b, 0x40, 0xd2, 0x0e, + }, + }, + { + .data_len = 3333, + .digest = { + 0xcd, 0x7d, 0x56, 0xa9, 0x4c, 0x47, 0xea, 0xc2, + 0x34, 0x0b, 0x84, 0x05, 0xf9, 0xad, 0xbb, 0x46, + }, + }, + { + .data_len = 4096, + .digest = { + 0x63, 0x6e, 0x58, 0xb3, 0x94, 0x6b, 0x83, 0x5f, + 0x1f, 0x0e, 0xd3, 0x66, 0x78, 0x71, 0x98, 0x42, + }, + }, + { + .data_len = 4128, + .digest = { + 0x9d, 0x68, 0xfc, 0x26, 0x8b, 0x4c, 0xa8, 0xe7, + 0x30, 0x0b, 0x19, 0x52, 0x6e, 0xa5, 0x65, 0x1c, + }, + }, + { + .data_len = 4160, + .digest = { + 0x1c, 0xaa, 0x7d, 0xee, 0x91, 0x01, 0xe2, 0x5a, + 0xec, 0xe9, 0xde, 0x57, 0x0a, 0xb6, 0x4c, 0x2f, + }, + }, + { + .data_len = 4224, + .digest = { + 0x1b, 0x31, 0xe3, 0x14, 0x07, 0x16, 0x17, 0xc6, + 0x98, 0x79, 0x88, 0x23, 0xb6, 0x3b, 0x25, 0xc4, + }, + }, + { + .data_len = 16384, + .digest = { + 0xc6, 0x3d, 0x56, 0x90, 0xf0, 0xf6, 0xe6, 0x50, + 0xf4, 0x76, 0x78, 0x67, 0xa3, 0xdd, 0x62, 0x7b, + }, + }, +}; + +static const u8 hash_testvec_consolidated[MD5_DIGEST_SIZE] = { + 0x70, 0x86, 0x9e, 0x6c, 0xa4, 0xc6, 0x71, 0x43, + 0x26, 0x02, 0x1b, 0x3f, 0xfd, 0x56, 0x9f, 0xa6, +}; + +static const u8 hmac_testvec_consolidated[MD5_DIGEST_SIZE] = { + 0x10, 0x02, 0x74, 0xf6, 0x4d, 0xb3, 0x3c, 0xc7, + 0xa1, 0xf7, 0xe6, 0xd4, 0x32, 0x64, 0xfa, 0x6d, +}; diff --git a/lib/crypto/tests/md5_kunit.c b/lib/crypto/tests/md5_kunit.c new file mode 100644 index 000000000000..38bd52c25ae3 --- /dev/null +++ b/lib/crypto/tests/md5_kunit.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2025 Google LLC + */ +#include +#include "md5-testvecs.h" + +#define HASH md5 +#define HASH_CTX md5_ctx +#define HASH_SIZE MD5_DIGEST_SIZE +#define HASH_INIT md5_init +#define HASH_UPDATE md5_update +#define HASH_FINAL md5_final +#define HMAC_KEY hmac_md5_key +#define HMAC_CTX hmac_md5_ctx +#define HMAC_PREPAREKEY hmac_md5_preparekey +#define HMAC_INIT hmac_md5_init +#define HMAC_UPDATE hmac_md5_update +#define HMAC_FINAL hmac_md5_final +#define HMAC hmac_md5 +#define HMAC_USINGRAWKEY hmac_md5_usingrawkey +#include "hash-test-template.h" + +static struct kunit_case hash_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite hash_test_suite = { + .name = "md5", + .test_cases = hash_test_cases, + .suite_init = hash_suite_init, + .suite_exit = hash_suite_exit, +}; +kunit_test_suite(hash_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for MD5 and HMAC-MD5"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 5012bd2dc6ab0c4499923b3b6c6113def9b0c88b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 15 Aug 2025 19:04:57 -0700 Subject: lib/crypto: Drop inline from all *_mod_init_arch() functions Drop 'inline' from all the *_mod_init_arch() functions so that the compiler will warn about any bugs where they are unused due to not being wired up properly. (There are no such bugs currently, so this just establishes a more robust convention for the future. Of course, these functions also tend to get inlined anyway, regardless of the keyword.) Link: https://lore.kernel.org/r/20250816020457.432040-1-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/sha1.h | 2 +- lib/crypto/arm/sha256.h | 2 +- lib/crypto/arm/sha512.h | 2 +- lib/crypto/arm64/sha1.h | 2 +- lib/crypto/arm64/sha256.h | 2 +- lib/crypto/arm64/sha512.h | 2 +- lib/crypto/riscv/sha256.h | 2 +- lib/crypto/riscv/sha512.h | 2 +- lib/crypto/s390/sha1.h | 2 +- lib/crypto/s390/sha256.h | 2 +- lib/crypto/s390/sha512.h | 2 +- lib/crypto/sparc/md5.h | 2 +- lib/crypto/sparc/sha1.h | 2 +- lib/crypto/sparc/sha256.h | 2 +- lib/crypto/sparc/sha512.h | 2 +- lib/crypto/x86/sha1.h | 2 +- lib/crypto/x86/sha256.h | 2 +- lib/crypto/x86/sha512.h | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/sha1.h b/lib/crypto/arm/sha1.h index fa1e92419000..29f8bcad0447 100644 --- a/lib/crypto/arm/sha1.h +++ b/lib/crypto/arm/sha1.h @@ -35,7 +35,7 @@ static void sha1_blocks(struct sha1_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); diff --git a/lib/crypto/arm/sha256.h b/lib/crypto/arm/sha256.h index eab713e650f3..7556457b3094 100644 --- a/lib/crypto/arm/sha256.h +++ b/lib/crypto/arm/sha256.h @@ -35,7 +35,7 @@ static void sha256_blocks(struct sha256_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (elf_hwcap & HWCAP_NEON) { static_branch_enable(&have_neon); diff --git a/lib/crypto/arm/sha512.h b/lib/crypto/arm/sha512.h index cc2447acd562..d1b485dd275d 100644 --- a/lib/crypto/arm/sha512.h +++ b/lib/crypto/arm/sha512.h @@ -29,7 +29,7 @@ static void sha512_blocks(struct sha512_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_has_neon()) static_branch_enable(&have_neon); diff --git a/lib/crypto/arm64/sha1.h b/lib/crypto/arm64/sha1.h index f822563538cc..aaef4ebfc5e3 100644 --- a/lib/crypto/arm64/sha1.h +++ b/lib/crypto/arm64/sha1.h @@ -32,7 +32,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (cpu_have_named_feature(SHA1)) static_branch_enable(&have_ce); diff --git a/lib/crypto/arm64/sha256.h b/lib/crypto/arm64/sha256.h index d95f1077c32b..be4aeda9d0e6 100644 --- a/lib/crypto/arm64/sha256.h +++ b/lib/crypto/arm64/sha256.h @@ -46,7 +46,7 @@ static void sha256_blocks(struct sha256_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (cpu_have_named_feature(ASIMD)) { static_branch_enable(&have_neon); diff --git a/lib/crypto/arm64/sha512.h b/lib/crypto/arm64/sha512.h index 7539ea3fef10..ddb0d256f73a 100644 --- a/lib/crypto/arm64/sha512.h +++ b/lib/crypto/arm64/sha512.h @@ -37,7 +37,7 @@ static void sha512_blocks(struct sha512_block_state *state, #ifdef CONFIG_KERNEL_MODE_NEON #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_have_named_feature(SHA512)) static_branch_enable(&have_sha512_insns); diff --git a/lib/crypto/riscv/sha256.h b/lib/crypto/riscv/sha256.h index f36f68d2e88c..1def18b0a4fb 100644 --- a/lib/crypto/riscv/sha256.h +++ b/lib/crypto/riscv/sha256.h @@ -31,7 +31,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { /* Both zvknha and zvknhb provide the SHA-256 instructions. */ if ((riscv_isa_extension_available(NULL, ZVKNHA) || diff --git a/lib/crypto/riscv/sha512.h b/lib/crypto/riscv/sha512.h index 59dc0294a9a7..145bdab1214e 100644 --- a/lib/crypto/riscv/sha512.h +++ b/lib/crypto/riscv/sha512.h @@ -30,7 +30,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (riscv_isa_extension_available(NULL, ZVKNHB) && riscv_isa_extension_available(NULL, ZVKB) && diff --git a/lib/crypto/s390/sha1.h b/lib/crypto/s390/sha1.h index 08bd138e881c..73d94476a157 100644 --- a/lib/crypto/s390/sha1.h +++ b/lib/crypto/s390/sha1.h @@ -20,7 +20,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (cpu_have_feature(S390_CPU_FEATURE_MSA) && cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1)) diff --git a/lib/crypto/s390/sha256.h b/lib/crypto/s390/sha256.h index 70a81cbc06b2..acd483508789 100644 --- a/lib/crypto/s390/sha256.h +++ b/lib/crypto/s390/sha256.h @@ -20,7 +20,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (cpu_have_feature(S390_CPU_FEATURE_MSA) && cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256)) diff --git a/lib/crypto/s390/sha512.h b/lib/crypto/s390/sha512.h index 24744651550c..46699d43df7e 100644 --- a/lib/crypto/s390/sha512.h +++ b/lib/crypto/s390/sha512.h @@ -20,7 +20,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_have_feature(S390_CPU_FEATURE_MSA) && cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512)) diff --git a/lib/crypto/sparc/md5.h b/lib/crypto/sparc/md5.h index 3f1b0ed8c0b3..3995f3e075eb 100644 --- a/lib/crypto/sparc/md5.h +++ b/lib/crypto/sparc/md5.h @@ -32,7 +32,7 @@ static void md5_blocks(struct md5_block_state *state, } #define md5_mod_init_arch md5_mod_init_arch -static inline void md5_mod_init_arch(void) +static void md5_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/sparc/sha1.h b/lib/crypto/sparc/sha1.h index 5015f93584b7..bdf771fcc1f7 100644 --- a/lib/crypto/sparc/sha1.h +++ b/lib/crypto/sparc/sha1.h @@ -27,7 +27,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/sparc/sha256.h b/lib/crypto/sparc/sha256.h index 1d10108eb195..b2f4419ec778 100644 --- a/lib/crypto/sparc/sha256.h +++ b/lib/crypto/sparc/sha256.h @@ -27,7 +27,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/sparc/sha512.h b/lib/crypto/sparc/sha512.h index 55303ab6b15f..a8c37a7d4c39 100644 --- a/lib/crypto/sparc/sha512.h +++ b/lib/crypto/sparc/sha512.h @@ -26,7 +26,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { unsigned long cfr; diff --git a/lib/crypto/x86/sha1.h b/lib/crypto/x86/sha1.h index e308379d89bc..c48a0131fd12 100644 --- a/lib/crypto/x86/sha1.h +++ b/lib/crypto/x86/sha1.h @@ -55,7 +55,7 @@ static void sha1_blocks(struct sha1_block_state *state, } #define sha1_mod_init_arch sha1_mod_init_arch -static inline void sha1_mod_init_arch(void) +static void sha1_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { static_call_update(sha1_blocks_x86, sha1_blocks_ni); diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index c852396ef319..41fa95fbc3bf 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -36,7 +36,7 @@ static void sha256_blocks(struct sha256_block_state *state, } #define sha256_mod_init_arch sha256_mod_init_arch -static inline void sha256_mod_init_arch(void) +static void sha256_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { static_call_update(sha256_blocks_x86, sha256_blocks_ni); diff --git a/lib/crypto/x86/sha512.h b/lib/crypto/x86/sha512.h index be2c8fc12246..0213c70cedd0 100644 --- a/lib/crypto/x86/sha512.h +++ b/lib/crypto/x86/sha512.h @@ -35,7 +35,7 @@ static void sha512_blocks(struct sha512_block_state *state, } #define sha512_mod_init_arch sha512_mod_init_arch -static inline void sha512_mod_init_arch(void) +static void sha512_mod_init_arch(void) { if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { -- cgit v1.2.3 From df220cc5e689213c34a0eec7ef26d25f503c77ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Aug 2025 08:25:11 -0700 Subject: lib/crypto: poly1305: Remove unused function poly1305_is_arch_optimized() poly1305_is_arch_optimized() is unused, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/poly1305-glue.c | 7 ------- lib/crypto/arm64/poly1305-glue.c | 7 ------- lib/crypto/mips/poly1305-glue.c | 6 ------ lib/crypto/powerpc/poly1305-p10-glue.c | 6 ------ lib/crypto/x86/poly1305_glue.c | 6 ------ 5 files changed, 32 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/poly1305-glue.c b/lib/crypto/arm/poly1305-glue.c index 2d86c78af883..9e513e319e37 100644 --- a/lib/crypto/arm/poly1305-glue.c +++ b/lib/crypto/arm/poly1305-glue.c @@ -51,13 +51,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -bool poly1305_is_arch_optimized(void) -{ - /* We always can use at least the ARM scalar implementation. */ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init arm_poly1305_mod_init(void) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c index 31aea21ce42f..d4a522e7d25a 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -50,13 +50,6 @@ void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -bool poly1305_is_arch_optimized(void) -{ - /* We always can use at least the ARM64 scalar implementation. */ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init neon_poly1305_mod_init(void) { if (cpu_have_named_feature(ASIMD)) diff --git a/lib/crypto/mips/poly1305-glue.c b/lib/crypto/mips/poly1305-glue.c index 764a38a65200..002f50f710ab 100644 --- a/lib/crypto/mips/poly1305-glue.c +++ b/lib/crypto/mips/poly1305-glue.c @@ -23,11 +23,5 @@ asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, const u32 nonce[4]); EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/poly1305-p10-glue.c index 3f1664a724b6..184a71f9c1de 100644 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ b/lib/crypto/powerpc/poly1305-p10-glue.c @@ -72,12 +72,6 @@ void poly1305_emit_arch(const struct poly1305_state *state, } EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init poly1305_p10_init(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index 856d48fd422b..deb5841cb0ad 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -141,12 +141,6 @@ void poly1305_emit_arch(const struct poly1305_state *ctx, } EXPORT_SYMBOL_GPL(poly1305_emit_arch); -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&poly1305_use_avx); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init poly1305_simd_mod_init(void) { if (boot_cpu_has(X86_FEATURE_AVX) && -- cgit v1.2.3 From b646b782e522da3509e61f971e5502fccb3a3723 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 29 Aug 2025 08:25:12 -0700 Subject: lib/crypto: poly1305: Consolidate into single module Consolidate the Poly1305 code into a single module, similar to various other algorithms (SHA-1, SHA-256, SHA-512, etc.): - Each arch now provides a header file lib/crypto/$(SRCARCH)/poly1305.h, replacing lib/crypto/$(SRCARCH)/poly1305*.c. The header defines poly1305_block_init(), poly1305_blocks(), poly1305_emit(), and optionally poly1305_mod_init_arch(). It is included by lib/crypto/poly1305.c, and thus the code gets built into the single libpoly1305 module, with improved inlining in some cases. - Whether arch-optimized Poly1305 is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. (The PPC64 one remains unconditionally disabled due to 'depends on BROKEN'.) - Any additional arch-specific translation units for the optimized Poly1305 code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. A special consideration is needed because the Adiantum code uses the poly1305_core_*() functions directly. For now, just carry forward that approach. This means retaining the CRYPTO_LIB_POLY1305_GENERIC kconfig symbol, and keeping the poly1305_core_*() functions in separate translation units. So it's not quite as streamlined I've done with the other hash functions, but we still get a single libpoly1305 module. Note: to see the diff from the arm, arm64, and x86 .c files to the new .h files, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 50 ++++---- lib/crypto/Makefile | 59 +++++++++- lib/crypto/arm/Kconfig | 5 - lib/crypto/arm/Makefile | 18 --- lib/crypto/arm/poly1305-armv4.pl | 3 +- lib/crypto/arm/poly1305-glue.c | 69 ----------- lib/crypto/arm/poly1305.h | 53 +++++++++ lib/crypto/arm64/Kconfig | 6 - lib/crypto/arm64/Makefile | 13 --- lib/crypto/arm64/poly1305-armv8.pl | 3 + lib/crypto/arm64/poly1305-glue.c | 67 ----------- lib/crypto/arm64/poly1305.h | 50 ++++++++ lib/crypto/mips/Kconfig | 5 - lib/crypto/mips/Makefile | 14 --- lib/crypto/mips/poly1305-glue.c | 27 ----- lib/crypto/mips/poly1305-mips.pl | 8 +- lib/crypto/mips/poly1305.h | 14 +++ lib/crypto/poly1305-generic.c | 25 ---- lib/crypto/poly1305.c | 81 ++++++++----- lib/crypto/powerpc/Kconfig | 8 -- lib/crypto/powerpc/Makefile | 3 - lib/crypto/powerpc/poly1305-p10-glue.c | 90 -------------- lib/crypto/powerpc/poly1305.h | 74 ++++++++++++ lib/crypto/x86/Kconfig | 6 - lib/crypto/x86/Makefile | 10 -- lib/crypto/x86/poly1305-x86_64-cryptogams.pl | 33 ++---- lib/crypto/x86/poly1305.h | 158 +++++++++++++++++++++++++ lib/crypto/x86/poly1305_glue.c | 169 --------------------------- 28 files changed, 498 insertions(+), 623 deletions(-) delete mode 100644 lib/crypto/arm/poly1305-glue.c create mode 100644 lib/crypto/arm/poly1305.h delete mode 100644 lib/crypto/arm64/poly1305-glue.c create mode 100644 lib/crypto/arm64/poly1305.h delete mode 100644 lib/crypto/mips/poly1305-glue.c create mode 100644 lib/crypto/mips/poly1305.h delete mode 100644 lib/crypto/poly1305-generic.c delete mode 100644 lib/crypto/powerpc/poly1305-p10-glue.c create mode 100644 lib/crypto/powerpc/poly1305.h create mode 100644 lib/crypto/x86/poly1305.h delete mode 100644 lib/crypto/x86/poly1305_glue.c (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 79b848448e07..9991118c41a9 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -114,6 +114,33 @@ config CRYPTO_LIB_MD5_ARCH default y if PPC default y if SPARC64 +config CRYPTO_LIB_POLY1305 + tristate + help + The Poly1305 library functions. Select this if your module uses any + of the functions from . + +config CRYPTO_LIB_POLY1305_ARCH + bool + depends on CRYPTO_LIB_POLY1305 && !UML + default y if ARM + default y if ARM64 && KERNEL_MODE_NEON + default y if MIPS + # The PPC64 code needs to be fixed to work in softirq context. + default y if PPC64 && CPU_LITTLE_ENDIAN && VSX && BROKEN + default y if X86_64 + +# This symbol controls the inclusion of the Poly1305 generic code. This differs +# from most of the other algorithms, which handle the generic code +# "automatically" via __maybe_unused. This is needed so that the Adiantum code, +# which calls the poly1305_core_*() functions directly, can enable them. +config CRYPTO_LIB_POLY1305_GENERIC + bool + depends on CRYPTO_LIB_POLY1305 + # Enable if there's no arch impl or the arch impl requires the generic + # impl as a fallback. (Or if selected explicitly.) + default y if !CRYPTO_LIB_POLY1305_ARCH || PPC64 + config CRYPTO_LIB_POLY1305_RSIZE int default 2 if MIPS @@ -121,29 +148,6 @@ config CRYPTO_LIB_POLY1305_RSIZE default 9 if ARM || ARM64 default 1 -config CRYPTO_ARCH_HAVE_LIB_POLY1305 - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Poly1305 library interface, - either builtin or as a module. - -config CRYPTO_LIB_POLY1305_GENERIC - tristate - default CRYPTO_LIB_POLY1305 if !CRYPTO_ARCH_HAVE_LIB_POLY1305 - help - This symbol can be selected by arch implementations of the Poly1305 - library interface that require the generic code as a fallback, e.g., - for SIMD implementations. If no arch specific implementation is - enabled, this implementation serves the users of CRYPTO_LIB_POLY1305. - -config CRYPTO_LIB_POLY1305 - tristate - help - Enable the Poly1305 library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. - config CRYPTO_LIB_CHACHA20POLY1305 tristate select CRYPTO_LIB_CHACHA diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index d362636a22d3..e0536e3b3a04 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -71,13 +71,60 @@ endif # CONFIG_CRYPTO_LIB_MD5_ARCH ################################################################################ -obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o -libpoly1305-y += poly1305.o +obj-$(CONFIG_CRYPTO_LIB_POLY1305) += libpoly1305.o +libpoly1305-y := poly1305.o +ifeq ($(CONFIG_ARCH_SUPPORTS_INT128),y) +libpoly1305-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += poly1305-donna64.o +else +libpoly1305-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += poly1305-donna32.o +endif + +ifeq ($(CONFIG_CRYPTO_LIB_POLY1305_ARCH),y) +CFLAGS_poly1305.o += -I$(src)/$(SRCARCH) + +ifeq ($(CONFIG_ARM),y) +libpoly1305-y += arm/poly1305-core.o +$(obj)/arm/poly1305-core.S: $(src)/arm/poly1305-armv4.pl + $(call cmd,perlasm) +# massage the perlasm code a bit so we only get the NEON routine if we need it +poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 +poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 +AFLAGS_arm/poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y) +endif + +ifeq ($(CONFIG_ARM64),y) +libpoly1305-y += arm64/poly1305-core.o +$(obj)/arm64/poly1305-core.S: $(src)/arm64/poly1305-armv8.pl + $(call cmd,perlasm_with_args) +endif + +ifeq ($(CONFIG_MIPS),y) +libpoly1305-y += mips/poly1305-core.o +poly1305-perlasm-flavour-$(CONFIG_32BIT) := o32 +poly1305-perlasm-flavour-$(CONFIG_64BIT) := 64 +quiet_cmd_perlasm_poly1305 = PERLASM $@ + cmd_perlasm_poly1305 = $(PERL) $< $(poly1305-perlasm-flavour-y) $@ +# Use if_changed instead of cmd, in case the flavour changed. +$(obj)/mips/poly1305-core.S: $(src)/mips/poly1305-mips.pl FORCE + $(call if_changed,perlasm_poly1305) +targets += mips/poly1305-core.S +endif -obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) += libpoly1305-generic.o -libpoly1305-generic-y := poly1305-donna32.o -libpoly1305-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := poly1305-donna64.o -libpoly1305-generic-y += poly1305-generic.o +libpoly1305-$(CONFIG_PPC) += powerpc/poly1305-p10le_64.o + +ifeq ($(CONFIG_X86),y) +libpoly1305-y += x86/poly1305-x86_64-cryptogams.o +$(obj)/x86/poly1305-x86_64-cryptogams.S: $(src)/x86/poly1305-x86_64-cryptogams.pl + $(call cmd,perlasm) +endif + +endif # CONFIG_CRYPTO_LIB_POLY1305_ARCH + +# clean-files must be defined unconditionally +clean-files += arm/poly1305-core.S \ + arm64/poly1305-core.S \ + mips/poly1305-core.S \ + x86/poly1305-x86_64-cryptogams.S ################################################################################ diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index e8444fd0aae3..0d821e282c64 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -17,8 +17,3 @@ config CRYPTO_CHACHA20_NEON tristate default CRYPTO_LIB_CHACHA select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_ARM - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile index 4c042a4c77ed..9f70e61d419e 100644 --- a/lib/crypto/arm/Makefile +++ b/lib/crypto/arm/Makefile @@ -6,21 +6,3 @@ libblake2s-arm-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o chacha-neon-y := chacha-scalar-core.o chacha-glue.o chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o - -obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o -poly1305-arm-y := poly1305-core.o poly1305-glue.o - -quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $(<) > $(@) - -$(obj)/%-core.S: $(src)/%-armv4.pl - $(call cmd,perl) - -clean-files += poly1305-core.S - -aflags-thumb2-$(CONFIG_THUMB2_KERNEL) := -U__thumb2__ -D__thumb2__=1 - -# massage the perlasm code a bit so we only get the NEON routine if we need it -poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5 -poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7 -AFLAGS_poly1305-core.o += $(poly1305-aflags-y) $(aflags-thumb2-y) diff --git a/lib/crypto/arm/poly1305-armv4.pl b/lib/crypto/arm/poly1305-armv4.pl index dd7a996361a7..34c11b7b44bd 100644 --- a/lib/crypto/arm/poly1305-armv4.pl +++ b/lib/crypto/arm/poly1305-armv4.pl @@ -43,9 +43,8 @@ $code.=<<___; #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -# define poly1305_init poly1305_block_init_arch +# define poly1305_init poly1305_block_init # define poly1305_blocks poly1305_blocks_arm -# define poly1305_emit poly1305_emit_arch #endif #if defined(__thumb2__) diff --git a/lib/crypto/arm/poly1305-glue.c b/lib/crypto/arm/poly1305-glue.c deleted file mode 100644 index 9e513e319e37..000000000000 --- a/lib/crypto/arm/poly1305-glue.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) -{ - len = round_down(len, POLY1305_BLOCK_SIZE); - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && likely(may_use_simd())) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else - poly1305_blocks_arm(state, src, len, padbit); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -static int __init arm_poly1305_mod_init(void) -{ - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - (elf_hwcap & HWCAP_NEON)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(arm_poly1305_mod_init); - -static void __exit arm_poly1305_mod_exit(void) -{ -} -module_exit(arm_poly1305_mod_exit); - -MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm/poly1305.h b/lib/crypto/arm/poly1305.h new file mode 100644 index 000000000000..0021cf368307 --- /dev/null +++ b/lib/crypto/arm/poly1305.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM + * + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon) && likely(may_use_simd())) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks_arm(state, src, len, padbit); +} + +#ifdef CONFIG_KERNEL_MODE_NEON +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (elf_hwcap & HWCAP_NEON) + static_branch_enable(&have_neon); +} +#endif /* CONFIG_KERNEL_MODE_NEON */ diff --git a/lib/crypto/arm64/Kconfig b/lib/crypto/arm64/Kconfig index 0b903ef524d8..07c8a4f0ab03 100644 --- a/lib/crypto/arm64/Kconfig +++ b/lib/crypto/arm64/Kconfig @@ -6,9 +6,3 @@ config CRYPTO_CHACHA20_NEON default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_NEON - tristate - depends on KERNEL_MODE_NEON - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/arm64/Makefile b/lib/crypto/arm64/Makefile index 6207088397a7..d49cceca3d1c 100644 --- a/lib/crypto/arm64/Makefile +++ b/lib/crypto/arm64/Makefile @@ -2,16 +2,3 @@ obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o -poly1305-neon-y := poly1305-core.o poly1305-glue.o -AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_block_init_arch -AFLAGS_poly1305-core.o += -Dpoly1305_emit=poly1305_emit_arch - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) void $(@) - -$(obj)/%-core.S: $(src)/%-armv8.pl - $(call cmd,perlasm) - -clean-files += poly1305-core.S diff --git a/lib/crypto/arm64/poly1305-armv8.pl b/lib/crypto/arm64/poly1305-armv8.pl index 22c9069c0650..f1930c6b55ce 100644 --- a/lib/crypto/arm64/poly1305-armv8.pl +++ b/lib/crypto/arm64/poly1305-armv8.pl @@ -50,6 +50,9 @@ $code.=<<___; #ifndef __KERNEL__ # include "arm_arch.h" .extern OPENSSL_armcap_P +#else +# define poly1305_init poly1305_block_init +# define poly1305_blocks poly1305_blocks_arm64 #endif .text diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-glue.c deleted file mode 100644 index d4a522e7d25a..000000000000 --- a/lib/crypto/arm64/poly1305-glue.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) -{ - len = round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon) && likely(may_use_simd())) { - do { - unsigned int todo = min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); - - len -= todo; - src += todo; - } while (len); - } else - poly1305_blocks(state, src, len, padbit); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -static int __init neon_poly1305_mod_init(void) -{ - if (cpu_have_named_feature(ASIMD)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(neon_poly1305_mod_init); - -static void __exit neon_poly1305_mod_exit(void) -{ -} -module_exit(neon_poly1305_mod_exit); - -MODULE_DESCRIPTION("Poly1305 authenticator (ARM64 optimized)"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h new file mode 100644 index 000000000000..aed5921ccd9a --- /dev/null +++ b/lib/crypto/arm64/poly1305.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 + * + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_arm64(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { + do { + unsigned int todo = min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -= todo; + src += todo; + } while (len); + } else + poly1305_blocks_arm64(state, src, len, padbit); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/mips/Kconfig b/lib/crypto/mips/Kconfig index 0670a170c1be..94c1a0892c20 100644 --- a/lib/crypto/mips/Kconfig +++ b/lib/crypto/mips/Kconfig @@ -5,8 +5,3 @@ config CRYPTO_CHACHA_MIPS depends on CPU_MIPS32_R2 default CRYPTO_LIB_CHACHA select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_MIPS - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/mips/Makefile b/lib/crypto/mips/Makefile index 804488c7aded..b5ea0e25c21e 100644 --- a/lib/crypto/mips/Makefile +++ b/lib/crypto/mips/Makefile @@ -3,17 +3,3 @@ obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o chacha-mips-y := chacha-core.o chacha-glue.o AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots - -obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o -poly1305-mips-y := poly1305-core.o poly1305-glue.o - -perlasm-flavour-$(CONFIG_32BIT) := o32 -perlasm-flavour-$(CONFIG_64BIT) := 64 - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) - -$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE - $(call if_changed,perlasm) - -targets += poly1305-core.S diff --git a/lib/crypto/mips/poly1305-glue.c b/lib/crypto/mips/poly1305-glue.c deleted file mode 100644 index 002f50f710ab..000000000000 --- a/lib/crypto/mips/poly1305-glue.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/mips/poly1305-mips.pl b/lib/crypto/mips/poly1305-mips.pl index 399f10c3e385..71347f34f4f9 100644 --- a/lib/crypto/mips/poly1305-mips.pl +++ b/lib/crypto/mips/poly1305-mips.pl @@ -93,9 +93,7 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch +# define poly1305_init poly1305_block_init #endif #if defined(__MIPSEB__) && !defined(MIPSEB) @@ -565,9 +563,7 @@ $code.=<<___; #endif #ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch +# define poly1305_init poly1305_block_init #endif #if defined(__MIPSEB__) && !defined(MIPSEB) diff --git a/lib/crypto/mips/poly1305.h b/lib/crypto/mips/poly1305.h new file mode 100644 index 000000000000..85de450f1a93 --- /dev/null +++ b/lib/crypto/mips/poly1305.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS + * + * Copyright (C) 2019 Linaro Ltd. + */ + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); diff --git a/lib/crypto/poly1305-generic.c b/lib/crypto/poly1305-generic.c deleted file mode 100644 index 71a16c5c538b..000000000000 --- a/lib/crypto/poly1305-generic.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Poly1305 authenticator algorithm, RFC7539 - * - * Copyright (C) 2015 Martin Willi - * - * Based on public domain code by Andrew Moon and Daniel J. Bernstein. - */ - -#include -#include -#include -#include - -void poly1305_block_init_generic(struct poly1305_block_state *desc, - const u8 raw_key[POLY1305_BLOCK_SIZE]) -{ - poly1305_core_init(&desc->h); - poly1305_core_setkey(&desc->core_r, raw_key); -} -EXPORT_SYMBOL_GPL(poly1305_block_init_generic); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("Poly1305 algorithm (generic implementation)"); diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index a6dc182b6c22..f313ccc4b4dd 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -7,7 +7,6 @@ * Based on public domain code by Andrew Moon and Daniel J. Bernstein. */ -#include #include #include #include @@ -15,6 +14,14 @@ #include #include +#ifdef CONFIG_CRYPTO_LIB_POLY1305_ARCH +#include "poly1305.h" /* $(SRCARCH)/poly1305.h */ +#else +#define poly1305_block_init poly1305_block_init_generic +#define poly1305_blocks poly1305_blocks_generic +#define poly1305_emit poly1305_emit_generic +#endif + void poly1305_init(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]) { @@ -23,28 +30,40 @@ void poly1305_init(struct poly1305_desc_ctx *desc, desc->s[2] = get_unaligned_le32(key + 24); desc->s[3] = get_unaligned_le32(key + 28); desc->buflen = 0; - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_block_init_arch(&desc->state, key); - else - poly1305_block_init_generic(&desc->state, key); + poly1305_block_init(&desc->state, key); } EXPORT_SYMBOL(poly1305_init); -static inline void poly1305_blocks(struct poly1305_block_state *state, - const u8 *src, unsigned int len) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_blocks_arch(state, src, len, 1); - else - poly1305_blocks_generic(state, src, len, 1); -} - void poly1305_update(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes) { - desc->buflen = BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, - src, nbytes, POLY1305_BLOCK_SIZE, - desc->buf, desc->buflen); + if (desc->buflen + nbytes >= POLY1305_BLOCK_SIZE) { + unsigned int bulk_len; + + if (desc->buflen) { + unsigned int l = POLY1305_BLOCK_SIZE - desc->buflen; + + memcpy(&desc->buf[desc->buflen], src, l); + src += l; + nbytes -= l; + + poly1305_blocks(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 1); + desc->buflen = 0; + } + + bulk_len = round_down(nbytes, POLY1305_BLOCK_SIZE); + nbytes %= POLY1305_BLOCK_SIZE; + + if (bulk_len) { + poly1305_blocks(&desc->state, src, bulk_len, 1); + src += bulk_len; + } + } + if (nbytes) { + memcpy(&desc->buf[desc->buflen], src, nbytes); + desc->buflen += nbytes; + } } EXPORT_SYMBOL(poly1305_update); @@ -54,22 +73,28 @@ void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst) desc->buf[desc->buflen++] = 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_blocks_arch(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); - else - poly1305_blocks_generic(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); + poly1305_blocks(&desc->state, desc->buf, POLY1305_BLOCK_SIZE, + 0); } - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_emit_arch(&desc->state.h, dst, desc->s); - else - poly1305_emit_generic(&desc->state.h, dst, desc->s); + poly1305_emit(&desc->state.h, dst, desc->s); *desc = (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final); +#ifdef poly1305_mod_init_arch +static int __init poly1305_mod_init(void) +{ + poly1305_mod_init_arch(); + return 0; +} +subsys_initcall(poly1305_mod_init); + +static void __exit poly1305_mod_exit(void) +{ +} +module_exit(poly1305_mod_exit); +#endif + MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); MODULE_DESCRIPTION("Poly1305 authenticator algorithm, RFC7539"); diff --git a/lib/crypto/powerpc/Kconfig b/lib/crypto/powerpc/Kconfig index 2eaeb7665a6a..e41012a61876 100644 --- a/lib/crypto/powerpc/Kconfig +++ b/lib/crypto/powerpc/Kconfig @@ -6,11 +6,3 @@ config CRYPTO_CHACHA20_P10 default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - depends on BROKEN # Needs to be fixed to work in softirq context - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - select CRYPTO_LIB_POLY1305_GENERIC diff --git a/lib/crypto/powerpc/Makefile b/lib/crypto/powerpc/Makefile index 5709ae14258a..778a04edd226 100644 --- a/lib/crypto/powerpc/Makefile +++ b/lib/crypto/powerpc/Makefile @@ -2,6 +2,3 @@ obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o - -obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o -poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/poly1305-p10-glue.c deleted file mode 100644 index 184a71f9c1de..000000000000 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Poly1305 authenticator algorithm, RFC7539. - * - * Copyright 2023- IBM Corp. All rights reserved. - */ -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); -asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit); -asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); - -static void vsx_begin(void) -{ - preempt_disable(); - enable_kernel_vsx(); -} - -static void vsx_end(void) -{ - disable_kernel_vsx(); - preempt_enable(); -} - -void poly1305_block_init_arch(struct poly1305_block_state *dctx, - const u8 raw_key[POLY1305_BLOCK_SIZE]) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_block_init_generic(dctx, raw_key); - - dctx->h = (struct poly1305_state){}; - dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); - dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); -} -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *src, - unsigned int len, u32 padbit) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_blocks_generic(state, src, len, padbit); - vsx_begin(); - if (len >= POLY1305_BLOCK_SIZE * 4) { - poly1305_p10le_4blocks(state, src, len); - src += len - (len % (POLY1305_BLOCK_SIZE * 4)); - len %= POLY1305_BLOCK_SIZE * 4; - } - while (len >= POLY1305_BLOCK_SIZE) { - poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit); - len -= POLY1305_BLOCK_SIZE; - src += POLY1305_BLOCK_SIZE; - } - vsx_end(); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) -{ - if (!static_key_enabled(&have_p10)) - return poly1305_emit_generic(state, digest, nonce); - poly1305_emit_64(state, nonce, digest); -} -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static int __init poly1305_p10_init(void) -{ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - static_branch_enable(&have_p10); - return 0; -} -subsys_initcall(poly1305_p10_init); - -static void __exit poly1305_p10_exit(void) -{ -} -module_exit(poly1305_p10_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Danny Tsen "); -MODULE_DESCRIPTION("Optimized Poly1305 for P10"); diff --git a/lib/crypto/powerpc/poly1305.h b/lib/crypto/powerpc/poly1305.h new file mode 100644 index 000000000000..b8ed098a0e95 --- /dev/null +++ b/lib/crypto/powerpc/poly1305.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Poly1305 authenticator algorithm, RFC7539. + * + * Copyright 2023- IBM Corp. All rights reserved. + */ +#include +#include +#include +#include +#include + +asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state, const u8 *m, u32 mlen); +asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 *m, u32 mlen, int highbit); +asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); + +static void vsx_begin(void) +{ + preempt_disable(); + enable_kernel_vsx(); +} + +static void vsx_end(void) +{ + disable_kernel_vsx(); + preempt_enable(); +} + +static void poly1305_block_init(struct poly1305_block_state *dctx, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_block_init_generic(dctx, raw_key); + + dctx->h = (struct poly1305_state){}; + dctx->core_r.key.r64[0] = get_unaligned_le64(raw_key + 0); + dctx->core_r.key.r64[1] = get_unaligned_le64(raw_key + 8); +} + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *src, + unsigned int len, u32 padbit) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_blocks_generic(state, src, len, padbit); + vsx_begin(); + if (len >= POLY1305_BLOCK_SIZE * 4) { + poly1305_p10le_4blocks(state, src, len); + src += len - (len % (POLY1305_BLOCK_SIZE * 4)); + len %= POLY1305_BLOCK_SIZE * 4; + } + while (len >= POLY1305_BLOCK_SIZE) { + poly1305_64s(state, src, POLY1305_BLOCK_SIZE, padbit); + len -= POLY1305_BLOCK_SIZE; + src += POLY1305_BLOCK_SIZE; + } + vsx_end(); +} + +static void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]) +{ + if (!static_key_enabled(&have_p10)) + return poly1305_emit_generic(state, digest, nonce); + poly1305_emit_64(state, nonce, digest); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + static_branch_enable(&have_p10); +} diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index 546fe2afe0b5..24dc9a59b272 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -18,9 +18,3 @@ config CRYPTO_CHACHA20_X86_64 default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_X86_64 - tristate - depends on 64BIT - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile index c2ff8c5f1046..16c9d76f9947 100644 --- a/lib/crypto/x86/Makefile +++ b/lib/crypto/x86/Makefile @@ -5,13 +5,3 @@ libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x86_64.o chacha_glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o -poly1305-x86_64-y := poly1305-x86_64-cryptogams.o poly1305_glue.o -targets += poly1305-x86_64-cryptogams.S - -quiet_cmd_perlasm = PERLASM $@ - cmd_perlasm = $(PERL) $< > $@ - -$(obj)/%.S: $(src)/%.pl FORCE - $(call if_changed,perlasm) diff --git a/lib/crypto/x86/poly1305-x86_64-cryptogams.pl b/lib/crypto/x86/poly1305-x86_64-cryptogams.pl index 501827254fed..409ec6955733 100644 --- a/lib/crypto/x86/poly1305-x86_64-cryptogams.pl +++ b/lib/crypto/x86/poly1305-x86_64-cryptogams.pl @@ -118,19 +118,6 @@ sub declare_function() { } } -sub declare_typed_function() { - my ($name, $align, $nargs) = @_; - if($kernel) { - $code .= "SYM_TYPED_FUNC_START($name)\n"; - $code .= ".L$name:\n"; - } else { - $code .= ".globl $name\n"; - $code .= ".type $name,\@function,$nargs\n"; - $code .= ".align $align\n"; - $code .= "$name:\n"; - } -} - sub end_function() { my ($name) = @_; if($kernel) { @@ -141,7 +128,7 @@ sub end_function() { } $code.=<<___ if $kernel; -#include +#include ___ if ($avx) { @@ -249,14 +236,14 @@ ___ $code.=<<___ if (!$kernel); .extern OPENSSL_ia32cap_P -.globl poly1305_block_init_arch -.hidden poly1305_block_init_arch +.globl poly1305_init_x86_64 +.hidden poly1305_init_x86_64 .globl poly1305_blocks_x86_64 .hidden poly1305_blocks_x86_64 .globl poly1305_emit_x86_64 .hidden poly1305_emit_x86_64 ___ -&declare_typed_function("poly1305_block_init_arch", 32, 3); +&declare_function("poly1305_init_x86_64", 32, 3); $code.=<<___; xor %eax,%eax mov %rax,0($ctx) # initialize hash value @@ -311,7 +298,7 @@ $code.=<<___; .Lno_key: RET ___ -&end_function("poly1305_block_init_arch"); +&end_function("poly1305_init_x86_64"); &declare_function("poly1305_blocks_x86_64", 32, 4); $code.=<<___; @@ -4118,9 +4105,9 @@ avx_handler: .section .pdata .align 4 - .rva .LSEH_begin_poly1305_block_init_arch - .rva .LSEH_end_poly1305_block_init_arch - .rva .LSEH_info_poly1305_block_init_arch + .rva .LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_end_poly1305_init_x86_64 + .rva .LSEH_info_poly1305_init_x86_64 .rva .LSEH_begin_poly1305_blocks_x86_64 .rva .LSEH_end_poly1305_blocks_x86_64 @@ -4168,10 +4155,10 @@ ___ $code.=<<___; .section .xdata .align 8 -.LSEH_info_poly1305_block_init_arch: +.LSEH_info_poly1305_init_x86_64: .byte 9,0,0,0 .rva se_handler - .rva .LSEH_begin_poly1305_block_init_arch,.LSEH_begin_poly1305_block_init_arch + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 .LSEH_info_poly1305_blocks_x86_64: .byte 9,0,0,0 diff --git a/lib/crypto/x86/poly1305.h b/lib/crypto/x86/poly1305.h new file mode 100644 index 000000000000..ee92e3740a78 --- /dev/null +++ b/lib/crypto/x86/poly1305.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include +#include + +struct poly1305_arch_internal { + union { + struct { + u32 h[5]; + u32 is_base2_26; + }; + u64 hs[3]; + }; + u64 r[2]; + u64 pad; + struct { u32 r2, r1, r4, r3; } rn[9]; +}; + +/* + * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit + * the unfortunate situation of using AVX and then having to go back to scalar + * -- because the user is silly and has called the update function from two + * separate contexts -- then we need to convert back to the original base before + * proceeding. It is possible to reason that the initial reduction below is + * sufficient given the implementation invariants. However, for an avoidance of + * doubt and because this is not performance critical, we do the full reduction + * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py + */ +static void convert_to_base2_64(void *ctx) +{ + struct poly1305_arch_internal *state = ctx; + u32 cy; + + if (!state->is_base2_26) + return; + + cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; + cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; + cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; + cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; + state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; + state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); + state->hs[2] = state->h[4] >> 24; + /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ +#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) + cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); + state->hs[2] &= 3; + state->hs[0] += cy; + state->hs[1] += (cy = ULT(state->hs[0], cy)); + state->hs[2] += ULT(state->hs[1], cy); +#undef ULT + state->is_base2_26 = 0; +} + +asmlinkage void poly1305_init_x86_64(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); +asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); +asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx, + const u8 *inp, const size_t len, + const u32 padbit); +asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx, + const u8 *inp, + const size_t len, const u32 padbit); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); + +static void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_init_x86_64(state, raw_key); +} + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *inp, + unsigned int len, u32 padbit) +{ + struct poly1305_arch_internal *ctx = + container_of(&state->h.h, struct poly1305_arch_internal, h); + + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || + SZ_4K % POLY1305_BLOCK_SIZE); + + /* + * The AVX implementations have significant setup overhead (e.g. key + * power computation, kernel FPU enabling) which makes them slower for + * short messages. Fall back to the scalar implementation for messages + * shorter than 288 bytes, unless the AVX-specific key setup has already + * been performed (indicated by ctx->is_base2_26). + */ + if (!static_branch_likely(&poly1305_use_avx) || + (len < POLY1305_BLOCK_SIZE * 18 && !ctx->is_base2_26) || + unlikely(!irq_fpu_usable())) { + convert_to_base2_64(ctx); + poly1305_blocks_x86_64(ctx, inp, len, padbit); + return; + } + + do { + const unsigned int bytes = min(len, SZ_4K); + + kernel_fpu_begin(); + if (static_branch_likely(&poly1305_use_avx512)) + poly1305_blocks_avx512(ctx, inp, bytes, padbit); + else if (static_branch_likely(&poly1305_use_avx2)) + poly1305_blocks_avx2(ctx, inp, bytes, padbit); + else + poly1305_blocks_avx(ctx, inp, bytes, padbit); + kernel_fpu_end(); + + len -= bytes; + inp += bytes; + } while (len); +} + +static void poly1305_emit(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) +{ + if (!static_branch_likely(&poly1305_use_avx)) + poly1305_emit_x86_64(ctx, mac, nonce); + else + poly1305_emit_avx(ctx, mac, nonce); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_AVX) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx); + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) + static_branch_enable(&poly1305_use_avx2); + if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && + /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ + boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X) + static_branch_enable(&poly1305_use_avx512); +} diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c deleted file mode 100644 index deb5841cb0ad..000000000000 --- a/lib/crypto/x86/poly1305_glue.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -struct poly1305_arch_internal { - union { - struct { - u32 h[5]; - u32 is_base2_26; - }; - u64 hs[3]; - }; - u64 r[2]; - u64 pad; - struct { u32 r2, r1, r4, r3; } rn[9]; -}; - -/* - * The AVX code uses base 2^26, while the scalar code uses base 2^64. If we hit - * the unfortunate situation of using AVX and then having to go back to scalar - * -- because the user is silly and has called the update function from two - * separate contexts -- then we need to convert back to the original base before - * proceeding. It is possible to reason that the initial reduction below is - * sufficient given the implementation invariants. However, for an avoidance of - * doubt and because this is not performance critical, we do the full reduction - * anyway. Z3 proof of below function: https://xn--4db.cc/ltPtHCKN/py - */ -static void convert_to_base2_64(void *ctx) -{ - struct poly1305_arch_internal *state = ctx; - u32 cy; - - if (!state->is_base2_26) - return; - - cy = state->h[0] >> 26; state->h[0] &= 0x3ffffff; state->h[1] += cy; - cy = state->h[1] >> 26; state->h[1] &= 0x3ffffff; state->h[2] += cy; - cy = state->h[2] >> 26; state->h[2] &= 0x3ffffff; state->h[3] += cy; - cy = state->h[3] >> 26; state->h[3] &= 0x3ffffff; state->h[4] += cy; - state->hs[0] = ((u64)state->h[2] << 52) | ((u64)state->h[1] << 26) | state->h[0]; - state->hs[1] = ((u64)state->h[4] << 40) | ((u64)state->h[3] << 14) | (state->h[2] >> 12); - state->hs[2] = state->h[4] >> 24; - /* Unsigned Less Than: branchlessly produces 1 if a < b, else 0. */ -#define ULT(a, b) ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) - cy = (state->hs[2] >> 2) + (state->hs[2] & ~3ULL); - state->hs[2] &= 3; - state->hs[0] += cy; - state->hs[1] += (cy = ULT(state->hs[0], cy)); - state->hs[2] += ULT(state->hs[1], cy); -#undef ULT - state->is_base2_26 = 0; -} - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, - const u8 *inp, - const size_t len, const u32 padbit); -asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_emit_avx(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -asmlinkage void poly1305_blocks_avx(struct poly1305_arch_internal *ctx, - const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx2(struct poly1305_arch_internal *ctx, - const u8 *inp, const size_t len, - const u32 padbit); -asmlinkage void poly1305_blocks_avx512(struct poly1305_arch_internal *ctx, - const u8 *inp, - const size_t len, const u32 padbit); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *inp, - unsigned int len, u32 padbit) -{ - struct poly1305_arch_internal *ctx = - container_of(&state->h.h, struct poly1305_arch_internal, h); - - /* SIMD disables preemption, so relax after processing each page. */ - BUILD_BUG_ON(SZ_4K < POLY1305_BLOCK_SIZE || - SZ_4K % POLY1305_BLOCK_SIZE); - - /* - * The AVX implementations have significant setup overhead (e.g. key - * power computation, kernel FPU enabling) which makes them slower for - * short messages. Fall back to the scalar implementation for messages - * shorter than 288 bytes, unless the AVX-specific key setup has already - * been performed (indicated by ctx->is_base2_26). - */ - if (!static_branch_likely(&poly1305_use_avx) || - (len < POLY1305_BLOCK_SIZE * 18 && !ctx->is_base2_26) || - unlikely(!irq_fpu_usable())) { - convert_to_base2_64(ctx); - poly1305_blocks_x86_64(ctx, inp, len, padbit); - return; - } - - do { - const unsigned int bytes = min(len, SZ_4K); - - kernel_fpu_begin(); - if (static_branch_likely(&poly1305_use_avx512)) - poly1305_blocks_avx512(ctx, inp, bytes, padbit); - else if (static_branch_likely(&poly1305_use_avx2)) - poly1305_blocks_avx2(ctx, inp, bytes, padbit); - else - poly1305_blocks_avx(ctx, inp, bytes, padbit); - kernel_fpu_end(); - - len -= bytes; - inp += bytes; - } while (len); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -void poly1305_emit_arch(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) -{ - if (!static_branch_likely(&poly1305_use_avx)) - poly1305_emit_x86_64(ctx, mac, nonce); - else - poly1305_emit_avx(ctx, mac, nonce); -} -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static int __init poly1305_simd_mod_init(void) -{ - if (boot_cpu_has(X86_FEATURE_AVX) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - static_branch_enable(&poly1305_use_avx); - if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) - static_branch_enable(&poly1305_use_avx2); - if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_AVX512F) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MASK_AVX512, NULL) && - /* Skylake downclocks unacceptably much when using zmm, but later generations are fast. */ - boot_cpu_data.x86_vfm != INTEL_SKYLAKE_X) - static_branch_enable(&poly1305_use_avx512); - return 0; -} -subsys_initcall(poly1305_simd_mod_init); - -static void __exit poly1305_simd_mod_exit(void) -{ -} -module_exit(poly1305_simd_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jason A. Donenfeld "); -MODULE_DESCRIPTION("Poly1305 authenticator"); -- cgit v1.2.3 From bef9c755986980eecc18e9cecd847bc3c037aebb Mon Sep 17 00:00:00 2001 From: Zhihang Shao Date: Fri, 29 Aug 2025 08:25:13 -0700 Subject: lib/crypto: riscv/poly1305: Import OpenSSL/CRYPTOGAMS implementation This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for riscv authored by Andy Polyakov. The file 'poly1305-riscv.pl' is taken straight from https://github.com/dot-asm/cryptogams commit 5e3fba73576244708a752fa61a8e93e587f271bb. This patch was tested on SpacemiT X60, with 2~2.5x improvement over generic implementation. Signed-off-by: Chunyan Zhang Signed-off-by: Zhihang Shao [EB: ported to lib/crypto/riscv/] Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250829152513.92459-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 3 +- lib/crypto/Makefile | 14 + lib/crypto/riscv/poly1305-riscv.pl | 847 +++++++++++++++++++++++++++++++++++++ lib/crypto/riscv/poly1305.h | 14 + 4 files changed, 877 insertions(+), 1 deletion(-) create mode 100644 lib/crypto/riscv/poly1305-riscv.pl create mode 100644 lib/crypto/riscv/poly1305.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 9991118c41a9..cb4e056a98fa 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -128,6 +128,7 @@ config CRYPTO_LIB_POLY1305_ARCH default y if MIPS # The PPC64 code needs to be fixed to work in softirq context. default y if PPC64 && CPU_LITTLE_ENDIAN && VSX && BROKEN + default y if RISCV default y if X86_64 # This symbol controls the inclusion of the Poly1305 generic code. This differs @@ -143,7 +144,7 @@ config CRYPTO_LIB_POLY1305_GENERIC config CRYPTO_LIB_POLY1305_RSIZE int - default 2 if MIPS + default 2 if MIPS || RISCV default 11 if X86_64 default 9 if ARM || ARM64 default 1 diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e0536e3b3a04..cd460e5e3dd2 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -112,6 +112,19 @@ endif libpoly1305-$(CONFIG_PPC) += powerpc/poly1305-p10le_64.o +ifeq ($(CONFIG_RISCV),y) +libpoly1305-y += riscv/poly1305-core.o +poly1305-perlasm-flavour-$(CONFIG_32BIT) := 32 +poly1305-perlasm-flavour-$(CONFIG_64BIT) := 64 +quiet_cmd_perlasm_poly1305 = PERLASM $@ + cmd_perlasm_poly1305 = $(PERL) $< $(poly1305-perlasm-flavour-y) $@ +# Use if_changed instead of cmd, in case the flavour changed. +$(obj)/riscv/poly1305-core.S: $(src)/riscv/poly1305-riscv.pl FORCE + $(call if_changed,perlasm_poly1305) +targets += riscv/poly1305-core.S +AFLAGS_riscv/poly1305-core.o += -Dpoly1305_init=poly1305_block_init +endif + ifeq ($(CONFIG_X86),y) libpoly1305-y += x86/poly1305-x86_64-cryptogams.o $(obj)/x86/poly1305-x86_64-cryptogams.S: $(src)/x86/poly1305-x86_64-cryptogams.pl @@ -124,6 +137,7 @@ endif # CONFIG_CRYPTO_LIB_POLY1305_ARCH clean-files += arm/poly1305-core.S \ arm64/poly1305-core.S \ mips/poly1305-core.S \ + riscv/poly1305-core.S \ x86/poly1305-x86_64-cryptogams.S ################################################################################ diff --git a/lib/crypto/riscv/poly1305-riscv.pl b/lib/crypto/riscv/poly1305-riscv.pl new file mode 100644 index 000000000000..e25e6338a9ac --- /dev/null +++ b/lib/crypto/riscv/poly1305-riscv.pl @@ -0,0 +1,847 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, initially for use with OpenSSL. +# ==================================================================== +# +# Poly1305 hash for RISC-V. +# +# February 2019 +# +# In the essence it's pretty straightforward transliteration of MIPS +# module [without big-endian option]. +# +# 1.8 cycles per byte on U74, >100% faster than compiler-generated +# code. 1.9 cpb on C910, ~75% improvement. 3.3 on Spacemit X60, ~69% +# improvement. +# +# June 2024. +# +# Add CHERI support. +# +###################################################################### +# +($zero,$ra,$sp,$gp,$tp)=map("x$_",(0..4)); +($t0,$t1,$t2,$t3,$t4,$t5,$t6)=map("x$_",(5..7,28..31)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("x$_",(10..17)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("x$_",(8,9,18..27)); +# +###################################################################### + +$flavour = shift || "64"; + +for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +$code.=<<___; +#ifdef __KERNEL__ +# ifdef __riscv_zicfilp +# undef __riscv_zicfilp // calls are expected to be direct +# endif +#endif + +#if defined(__CHERI_PURE_CAPABILITY__) && !defined(__riscv_misaligned_fast) +# define __riscv_misaligned_fast 1 +#endif +___ + +if ($flavour =~ /64/) {{{ +###################################################################### +# 64-bit code path... +# +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$t0,$t1,$t2); + +$code.=<<___; +#if __riscv_xlen == 64 +# if __SIZEOF_POINTER__ == 16 +# define PUSH csc +# define POP clc +# else +# define PUSH sd +# define POP ld +# endif +#else +# error "unsupported __riscv_xlen" +#endif + +.option pic +.text + +.globl poly1305_init +.type poly1305_init,\@function +poly1305_init: +#ifdef __riscv_zicfilp + lpad 0 +#endif + sd $zero,0($ctx) + sd $zero,8($ctx) + sd $zero,16($ctx) + + beqz $inp,.Lno_key + +#ifndef __riscv_misaligned_fast + andi $tmp0,$inp,7 # $inp % 8 + andi $inp,$inp,-8 # align $inp + slli $tmp0,$tmp0,3 # byte to bit offset +#endif + ld $in0,0($inp) + ld $in1,8($inp) +#ifndef __riscv_misaligned_fast + beqz $tmp0,.Laligned_key + + ld $tmp2,16($inp) + neg $tmp1,$tmp0 # implicit &63 in sll + srl $in0,$in0,$tmp0 + sll $tmp3,$in1,$tmp1 + srl $in1,$in1,$tmp0 + sll $tmp2,$tmp2,$tmp1 + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 + +.Laligned_key: +#endif + li $tmp0,1 + slli $tmp0,$tmp0,32 # 0x0000000100000000 + addi $tmp0,$tmp0,-63 # 0x00000000ffffffc1 + slli $tmp0,$tmp0,28 # 0x0ffffffc10000000 + addi $tmp0,$tmp0,-1 # 0x0ffffffc0fffffff + + and $in0,$in0,$tmp0 + addi $tmp0,$tmp0,-3 # 0x0ffffffc0ffffffc + and $in1,$in1,$tmp0 + + sd $in0,24($ctx) + srli $tmp0,$in1,2 + sd $in1,32($ctx) + add $tmp0,$tmp0,$in1 # s1 = r1 + (r1 >> 2) + sd $tmp0,40($ctx) + +.Lno_key: + li $a0,0 # return 0 + ret +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = + ($s0,$s1,$s2,$s3,$t3,$t4,$in0,$in1,$t2); +my ($shr,$shl) = ($t5,$t6); # used on R6 + +$code.=<<___; +.globl poly1305_blocks +.type poly1305_blocks,\@function +poly1305_blocks: +#ifdef __riscv_zicfilp + lpad 0 +#endif + andi $len,$len,-16 # complete blocks only + beqz $len,.Lno_data + + caddi $sp,$sp,-4*__SIZEOF_POINTER__ + PUSH $s0,3*__SIZEOF_POINTER__($sp) + PUSH $s1,2*__SIZEOF_POINTER__($sp) + PUSH $s2,1*__SIZEOF_POINTER__($sp) + PUSH $s3,0*__SIZEOF_POINTER__($sp) + +#ifndef __riscv_misaligned_fast + andi $shr,$inp,7 + andi $inp,$inp,-8 # align $inp + slli $shr,$shr,3 # byte to bit offset + neg $shl,$shr # implicit &63 in sll +#endif + + ld $h0,0($ctx) # load hash value + ld $h1,8($ctx) + ld $h2,16($ctx) + + ld $r0,24($ctx) # load key + ld $r1,32($ctx) + ld $rs1,40($ctx) + + add $len,$len,$inp # end of buffer + +.Loop: + ld $in0,0($inp) # load input + ld $in1,8($inp) +#ifndef __riscv_misaligned_fast + beqz $shr,.Laligned_inp + + ld $tmp2,16($inp) + srl $in0,$in0,$shr + sll $tmp3,$in1,$shl + srl $in1,$in1,$shr + sll $tmp2,$tmp2,$shl + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 + +.Laligned_inp: +#endif + caddi $inp,$inp,16 + + andi $tmp0,$h2,-4 # modulo-scheduled reduction + srli $tmp1,$h2,2 + andi $h2,$h2,3 + + add $d0,$h0,$in0 # accumulate input + add $tmp1,$tmp1,$tmp0 + sltu $tmp0,$d0,$h0 + add $d0,$d0,$tmp1 # ... and residue + sltu $tmp1,$d0,$tmp1 + add $d1,$h1,$in1 + add $tmp0,$tmp0,$tmp1 + sltu $tmp1,$d1,$h1 + add $d1,$d1,$tmp0 + + add $d2,$h2,$padbit + sltu $tmp0,$d1,$tmp0 + mulhu $h1,$r0,$d0 # h0*r0 + mul $h0,$r0,$d0 + + add $d2,$d2,$tmp1 + add $d2,$d2,$tmp0 + mulhu $tmp1,$rs1,$d1 # h1*5*r1 + mul $tmp0,$rs1,$d1 + + mulhu $h2,$r1,$d0 # h0*r1 + mul $tmp2,$r1,$d0 + add $h0,$h0,$tmp0 + add $h1,$h1,$tmp1 + sltu $tmp0,$h0,$tmp0 + + add $h1,$h1,$tmp0 + add $h1,$h1,$tmp2 + mulhu $tmp1,$r0,$d1 # h1*r0 + mul $tmp0,$r0,$d1 + + sltu $tmp2,$h1,$tmp2 + add $h2,$h2,$tmp2 + mul $tmp2,$rs1,$d2 # h2*5*r1 + + add $h1,$h1,$tmp0 + add $h2,$h2,$tmp1 + mul $tmp3,$r0,$d2 # h2*r0 + sltu $tmp0,$h1,$tmp0 + add $h2,$h2,$tmp0 + + add $h1,$h1,$tmp2 + sltu $tmp2,$h1,$tmp2 + add $h2,$h2,$tmp2 + add $h2,$h2,$tmp3 + + bne $inp,$len,.Loop + + sd $h0,0($ctx) # store hash value + sd $h1,8($ctx) + sd $h2,16($ctx) + + POP $s0,3*__SIZEOF_POINTER__($sp) # epilogue + POP $s1,2*__SIZEOF_POINTER__($sp) + POP $s2,1*__SIZEOF_POINTER__($sp) + POP $s3,0*__SIZEOF_POINTER__($sp) + caddi $sp,$sp,4*__SIZEOF_POINTER__ + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); + +$code.=<<___; +.globl poly1305_emit +.type poly1305_emit,\@function +poly1305_emit: +#ifdef __riscv_zicfilp + lpad 0 +#endif + ld $tmp2,16($ctx) + ld $tmp0,0($ctx) + ld $tmp1,8($ctx) + + andi $in0,$tmp2,-4 # final reduction + srl $in1,$tmp2,2 + andi $tmp2,$tmp2,3 + add $in0,$in0,$in1 + + add $tmp0,$tmp0,$in0 + sltu $in1,$tmp0,$in0 + addi $in0,$tmp0,5 # compare to modulus + add $tmp1,$tmp1,$in1 + sltiu $tmp3,$in0,5 + sltu $tmp4,$tmp1,$in1 + add $in1,$tmp1,$tmp3 + add $tmp2,$tmp2,$tmp4 + sltu $tmp3,$in1,$tmp3 + add $tmp2,$tmp2,$tmp3 + + srli $tmp2,$tmp2,2 # see if it carried/borrowed + neg $tmp2,$tmp2 + + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + and $in0,$in0,$tmp2 + and $in1,$in1,$tmp2 + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + + lwu $tmp0,0($nonce) # load nonce + lwu $tmp1,4($nonce) + lwu $tmp2,8($nonce) + lwu $tmp3,12($nonce) + slli $tmp1,$tmp1,32 + slli $tmp3,$tmp3,32 + or $tmp0,$tmp0,$tmp1 + or $tmp2,$tmp2,$tmp3 + + add $in0,$in0,$tmp0 # accumulate nonce + add $in1,$in1,$tmp2 + sltu $tmp0,$in0,$tmp0 + add $in1,$in1,$tmp0 + +#ifdef __riscv_misaligned_fast + sd $in0,0($mac) # write mac value + sd $in1,8($mac) +#else + srli $tmp0,$in0,8 # write mac value + srli $tmp1,$in0,16 + srli $tmp2,$in0,24 + sb $in0,0($mac) + srli $tmp3,$in0,32 + sb $tmp0,1($mac) + srli $tmp0,$in0,40 + sb $tmp1,2($mac) + srli $tmp1,$in0,48 + sb $tmp2,3($mac) + srli $tmp2,$in0,56 + sb $tmp3,4($mac) + srli $tmp3,$in1,8 + sb $tmp0,5($mac) + srli $tmp0,$in1,16 + sb $tmp1,6($mac) + srli $tmp1,$in1,24 + sb $tmp2,7($mac) + + sb $in1,8($mac) + srli $tmp2,$in1,32 + sb $tmp3,9($mac) + srli $tmp3,$in1,40 + sb $tmp0,10($mac) + srli $tmp0,$in1,48 + sb $tmp1,11($mac) + srli $tmp1,$in1,56 + sb $tmp2,12($mac) + sb $tmp3,13($mac) + sb $tmp0,14($mac) + sb $tmp1,15($mac) +#endif + + ret +.size poly1305_emit,.-poly1305_emit +.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm" +___ +} +}}} else {{{ +###################################################################### +# 32-bit code path +# + +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = + ($a4,$a5,$a6,$a7,$t0,$t1,$t2,$t3); + +$code.=<<___; +#if __riscv_xlen == 32 +# if __SIZEOF_POINTER__ == 8 +# define PUSH csc +# define POP clc +# else +# define PUSH sw +# define POP lw +# endif +# define MULX(hi,lo,a,b) mulhu hi,a,b; mul lo,a,b +# define srliw srli +# define srlw srl +# define sllw sll +# define addw add +# define addiw addi +# define mulw mul +#elif __riscv_xlen == 64 +# if __SIZEOF_POINTER__ == 16 +# define PUSH csc +# define POP clc +# else +# define PUSH sd +# define POP ld +# endif +# define MULX(hi,lo,a,b) slli b,b,32; srli b,b,32; mul hi,a,b; addiw lo,hi,0; srai hi,hi,32 +#else +# error "unsupported __riscv_xlen" +#endif + +.option pic +.text + +.globl poly1305_init +.type poly1305_init,\@function +poly1305_init: +#ifdef __riscv_zicfilp + lpad 0 +#endif + sw $zero,0($ctx) + sw $zero,4($ctx) + sw $zero,8($ctx) + sw $zero,12($ctx) + sw $zero,16($ctx) + + beqz $inp,.Lno_key + +#ifndef __riscv_misaligned_fast + andi $tmp0,$inp,3 # $inp % 4 + sub $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset +#endif + lw $in0,0($inp) + lw $in1,4($inp) + lw $in2,8($inp) + lw $in3,12($inp) +#ifndef __riscv_misaligned_fast + beqz $tmp0,.Laligned_key + + lw $tmp2,16($inp) + sub $tmp1,$zero,$tmp0 + srlw $in0,$in0,$tmp0 + sllw $tmp3,$in1,$tmp1 + srlw $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + sllw $tmp3,$in2,$tmp1 + srlw $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + sllw $tmp3,$in3,$tmp1 + srlw $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + sllw $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +.Laligned_key: +#endif + + lui $tmp0,0x10000 + addi $tmp0,$tmp0,-1 # 0x0fffffff + and $in0,$in0,$tmp0 + addi $tmp0,$tmp0,-3 # 0x0ffffffc + and $in1,$in1,$tmp0 + and $in2,$in2,$tmp0 + and $in3,$in3,$tmp0 + + sw $in0,20($ctx) + sw $in1,24($ctx) + sw $in2,28($ctx) + sw $in3,32($ctx) + + srlw $tmp1,$in1,2 + srlw $tmp2,$in2,2 + srlw $tmp3,$in3,2 + addw $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) + addw $in2,$in2,$tmp2 + addw $in3,$in3,$tmp3 + sw $in1,36($ctx) + sw $in2,40($ctx) + sw $in3,44($ctx) +.Lno_key: + li $a0,0 + ret +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = + ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $t0,$t1,$t2); +my ($d0,$d1,$d2,$d3) = + ($a4,$a5,$a6,$a7); +my $shr = $ra; # used on R6 + +$code.=<<___; +.globl poly1305_blocks +.type poly1305_blocks,\@function +poly1305_blocks: +#ifdef __riscv_zicfilp + lpad 0 +#endif + andi $len,$len,-16 # complete blocks only + beqz $len,.Labort + +#ifdef __riscv_zcmp + cm.push {ra,s0-s8}, -48 +#else + caddi $sp,$sp,-__SIZEOF_POINTER__*12 + PUSH $ra, __SIZEOF_POINTER__*11($sp) + PUSH $s0, __SIZEOF_POINTER__*10($sp) + PUSH $s1, __SIZEOF_POINTER__*9($sp) + PUSH $s2, __SIZEOF_POINTER__*8($sp) + PUSH $s3, __SIZEOF_POINTER__*7($sp) + PUSH $s4, __SIZEOF_POINTER__*6($sp) + PUSH $s5, __SIZEOF_POINTER__*5($sp) + PUSH $s6, __SIZEOF_POINTER__*4($sp) + PUSH $s7, __SIZEOF_POINTER__*3($sp) + PUSH $s8, __SIZEOF_POINTER__*2($sp) +#endif + +#ifndef __riscv_misaligned_fast + andi $shr,$inp,3 + andi $inp,$inp,-4 # align $inp + slli $shr,$shr,3 # byte to bit offset +#endif + + lw $h0,0($ctx) # load hash value + lw $h1,4($ctx) + lw $h2,8($ctx) + lw $h3,12($ctx) + lw $h4,16($ctx) + + lw $r0,20($ctx) # load key + lw $r1,24($ctx) + lw $r2,28($ctx) + lw $r3,32($ctx) + lw $rs1,36($ctx) + lw $rs2,40($ctx) + lw $rs3,44($ctx) + + add $len,$len,$inp # end of buffer + +.Loop: + lw $d0,0($inp) # load input + lw $d1,4($inp) + lw $d2,8($inp) + lw $d3,12($inp) +#ifndef __riscv_misaligned_fast + beqz $shr,.Laligned_inp + + lw $t4,16($inp) + sub $t5,$zero,$shr + srlw $d0,$d0,$shr + sllw $t3,$d1,$t5 + srlw $d1,$d1,$shr + or $d0,$d0,$t3 + sllw $t3,$d2,$t5 + srlw $d2,$d2,$shr + or $d1,$d1,$t3 + sllw $t3,$d3,$t5 + srlw $d3,$d3,$shr + or $d2,$d2,$t3 + sllw $t4,$t4,$t5 + or $d3,$d3,$t4 + +.Laligned_inp: +#endif + srliw $t3,$h4,2 # modulo-scheduled reduction + andi $t4,$h4,-4 + andi $h4,$h4,3 + + addw $d0,$d0,$h0 # accumulate input + addw $t4,$t4,$t3 + sltu $h0,$d0,$h0 + addw $d0,$d0,$t4 # ... and residue + sltu $t4,$d0,$t4 + + addw $d1,$d1,$h1 + addw $h0,$h0,$t4 # carry + sltu $h1,$d1,$h1 + addw $d1,$d1,$h0 + sltu $h0,$d1,$h0 + + addw $d2,$d2,$h2 + addw $h1,$h1,$h0 # carry + sltu $h2,$d2,$h2 + addw $d2,$d2,$h1 + sltu $h1,$d2,$h1 + + addw $d3,$d3,$h3 + addw $h2,$h2,$h1 # carry + sltu $h3,$d3,$h3 + addw $d3,$d3,$h2 + + MULX ($h1,$h0,$r0,$d0) # d0*r0 + + sltu $h2,$d3,$h2 + addw $h3,$h3,$h2 # carry + + MULX ($t4,$t3,$rs3,$d1) # d1*s3 + + addw $h4,$h4,$padbit + caddi $inp,$inp,16 + addw $h4,$h4,$h3 + + MULX ($t6,$a3,$rs2,$d2) # d2*s2 + addw $h0,$h0,$t3 + addw $h1,$h1,$t4 + sltu $t3,$h0,$t3 + addw $h1,$h1,$t3 + + MULX ($t4,$t3,$rs1,$d3) # d3*s1 + addw $h0,$h0,$a3 + addw $h1,$h1,$t6 + sltu $a3,$h0,$a3 + addw $h1,$h1,$a3 + + + MULX ($h2,$a3,$r1,$d0) # d0*r1 + addw $h0,$h0,$t3 + addw $h1,$h1,$t4 + sltu $t3,$h0,$t3 + addw $h1,$h1,$t3 + + MULX ($t4,$t3,$r0,$d1) # d1*r0 + addw $h1,$h1,$a3 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + MULX ($t6,$a3,$rs3,$d2) # d2*s3 + addw $h1,$h1,$t3 + addw $h2,$h2,$t4 + sltu $t3,$h1,$t3 + addw $h2,$h2,$t3 + + MULX ($t4,$t3,$rs2,$d3) # d3*s2 + addw $h1,$h1,$a3 + addw $h2,$h2,$t6 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + mulw $a3,$rs1,$h4 # h4*s1 + addw $h1,$h1,$t3 + addw $h2,$h2,$t4 + sltu $t3,$h1,$t3 + addw $h2,$h2,$t3 + + + MULX ($h3,$t3,$r2,$d0) # d0*r2 + addw $h1,$h1,$a3 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + MULX ($t6,$a3,$r1,$d1) # d1*r1 + addw $h2,$h2,$t3 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + MULX ($t4,$t3,$r0,$d2) # d2*r0 + addw $h2,$h2,$a3 + addw $h3,$h3,$t6 + sltu $a3,$h2,$a3 + addw $h3,$h3,$a3 + + MULX ($t6,$a3,$rs3,$d3) # d3*s3 + addw $h2,$h2,$t3 + addw $h3,$h3,$t4 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + mulw $t3,$rs2,$h4 # h4*s2 + addw $h2,$h2,$a3 + addw $h3,$h3,$t6 + sltu $a3,$h2,$a3 + addw $h3,$h3,$a3 + + + MULX ($t6,$a3,$r3,$d0) # d0*r3 + addw $h2,$h2,$t3 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + MULX ($t4,$t3,$r2,$d1) # d1*r2 + addw $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addw $t6,$t6,$a3 + + MULX ($a3,$d3,$r0,$d3) # d3*r0 + addw $h3,$h3,$t3 + addw $t6,$t6,$t4 + sltu $t3,$h3,$t3 + addw $t6,$t6,$t3 + + MULX ($t4,$t3,$r1,$d2) # d2*r1 + addw $h3,$h3,$d3 + addw $t6,$t6,$a3 + sltu $d3,$h3,$d3 + addw $t6,$t6,$d3 + + mulw $a3,$rs3,$h4 # h4*s3 + addw $h3,$h3,$t3 + addw $t6,$t6,$t4 + sltu $t3,$h3,$t3 + addw $t6,$t6,$t3 + + + mulw $h4,$r0,$h4 # h4*r0 + addw $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addw $t6,$t6,$a3 + addw $h4,$t6,$h4 + + li $padbit,1 # if we loop, padbit is 1 + + bne $inp,$len,.Loop + + sw $h0,0($ctx) # store hash value + sw $h1,4($ctx) + sw $h2,8($ctx) + sw $h3,12($ctx) + sw $h4,16($ctx) + +#ifdef __riscv_zcmp + cm.popret {ra,s0-s8}, 48 +#else + POP $ra, __SIZEOF_POINTER__*11($sp) + POP $s0, __SIZEOF_POINTER__*10($sp) + POP $s1, __SIZEOF_POINTER__*9($sp) + POP $s2, __SIZEOF_POINTER__*8($sp) + POP $s3, __SIZEOF_POINTER__*7($sp) + POP $s4, __SIZEOF_POINTER__*6($sp) + POP $s5, __SIZEOF_POINTER__*5($sp) + POP $s6, __SIZEOF_POINTER__*4($sp) + POP $s7, __SIZEOF_POINTER__*3($sp) + POP $s8, __SIZEOF_POINTER__*2($sp) + caddi $sp,$sp,__SIZEOF_POINTER__*12 +#endif +.Labort: + ret +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); + +$code.=<<___; +.globl poly1305_emit +.type poly1305_emit,\@function +poly1305_emit: +#ifdef __riscv_zicfilp + lpad 0 +#endif + lw $tmp4,16($ctx) + lw $tmp0,0($ctx) + lw $tmp1,4($ctx) + lw $tmp2,8($ctx) + lw $tmp3,12($ctx) + + srliw $ctx,$tmp4,2 # final reduction + andi $in0,$tmp4,-4 + andi $tmp4,$tmp4,3 + addw $ctx,$ctx,$in0 + + addw $tmp0,$tmp0,$ctx + sltu $ctx,$tmp0,$ctx + addiw $in0,$tmp0,5 # compare to modulus + addw $tmp1,$tmp1,$ctx + sltiu $in1,$in0,5 + sltu $ctx,$tmp1,$ctx + addw $in1,$in1,$tmp1 + addw $tmp2,$tmp2,$ctx + sltu $in2,$in1,$tmp1 + sltu $ctx,$tmp2,$ctx + addw $in2,$in2,$tmp2 + addw $tmp3,$tmp3,$ctx + sltu $in3,$in2,$tmp2 + sltu $ctx,$tmp3,$ctx + addw $in3,$in3,$tmp3 + addw $tmp4,$tmp4,$ctx + sltu $ctx,$in3,$tmp3 + addw $ctx,$ctx,$tmp4 + + srl $ctx,$ctx,2 # see if it carried/borrowed + sub $ctx,$zero,$ctx + + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + xor $in2,$in2,$tmp2 + xor $in3,$in3,$tmp3 + and $in0,$in0,$ctx + and $in1,$in1,$ctx + and $in2,$in2,$ctx + and $in3,$in3,$ctx + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + xor $in2,$in2,$tmp2 + xor $in3,$in3,$tmp3 + + lw $tmp0,0($nonce) # load nonce + lw $tmp1,4($nonce) + lw $tmp2,8($nonce) + lw $tmp3,12($nonce) + + addw $in0,$in0,$tmp0 # accumulate nonce + sltu $ctx,$in0,$tmp0 + + addw $in1,$in1,$tmp1 + sltu $tmp1,$in1,$tmp1 + addw $in1,$in1,$ctx + sltu $ctx,$in1,$ctx + addw $ctx,$ctx,$tmp1 + + addw $in2,$in2,$tmp2 + sltu $tmp2,$in2,$tmp2 + addw $in2,$in2,$ctx + sltu $ctx,$in2,$ctx + addw $ctx,$ctx,$tmp2 + + addw $in3,$in3,$tmp3 + addw $in3,$in3,$ctx + +#ifdef __riscv_misaligned_fast + sw $in0,0($mac) # write mac value + sw $in1,4($mac) + sw $in2,8($mac) + sw $in3,12($mac) +#else + srl $tmp0,$in0,8 # write mac value + srl $tmp1,$in0,16 + srl $tmp2,$in0,24 + sb $in0, 0($mac) + sb $tmp0,1($mac) + srl $tmp0,$in1,8 + sb $tmp1,2($mac) + srl $tmp1,$in1,16 + sb $tmp2,3($mac) + srl $tmp2,$in1,24 + sb $in1, 4($mac) + sb $tmp0,5($mac) + srl $tmp0,$in2,8 + sb $tmp1,6($mac) + srl $tmp1,$in2,16 + sb $tmp2,7($mac) + srl $tmp2,$in2,24 + sb $in2, 8($mac) + sb $tmp0,9($mac) + srl $tmp0,$in3,8 + sb $tmp1,10($mac) + srl $tmp1,$in3,16 + sb $tmp2,11($mac) + srl $tmp2,$in3,24 + sb $in3, 12($mac) + sb $tmp0,13($mac) + sb $tmp1,14($mac) + sb $tmp2,15($mac) +#endif + + ret +.size poly1305_emit,.-poly1305_emit +.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm" +___ +} +}}} + +foreach (split("\n", $code)) { + if ($flavour =~ /^cheri/) { + s/\(x([0-9]+)\)/(c$1)/ and s/\b([ls][bhwd]u?)\b/c$1/; + s/\b(PUSH|POP)(\s+)x([0-9]+)/$1$2c$3/ or + s/\b(ret|jal)\b/c$1/; + s/\bcaddi?\b/cincoffset/ and s/\bx([0-9]+,)/c$1/g or + m/\bcmove\b/ and s/\bx([0-9]+)/c$1/g; + } else { + s/\bcaddi?\b/add/ or + s/\bcmove\b/mv/; + } + print $_, "\n"; +} + +close STDOUT; diff --git a/lib/crypto/riscv/poly1305.h b/lib/crypto/riscv/poly1305.h new file mode 100644 index 000000000000..88f3df44e355 --- /dev/null +++ b/lib/crypto/riscv/poly1305.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for riscv + * + * Copyright (C) 2025 Institute of Software, CAS. + */ + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); -- cgit v1.2.3 From c4b846ff6ecab0427cc7dcccbe0af60b244a6d56 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:22 -0700 Subject: lib/crypto: chacha: Remove unused function chacha_is_arch_optimized() chacha_is_arch_optimized() is no longer used, so remove it. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/chacha-glue.c | 7 ------- lib/crypto/arm64/chacha-neon-glue.c | 6 ------ lib/crypto/mips/chacha-glue.c | 6 ------ lib/crypto/powerpc/chacha-p10-glue.c | 6 ------ lib/crypto/riscv/chacha-riscv64-glue.c | 6 ------ lib/crypto/s390/chacha-glue.c | 6 ------ lib/crypto/x86/chacha_glue.c | 6 ------ 7 files changed, 43 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/chacha-glue.c b/lib/crypto/arm/chacha-glue.c index 88ec96415283..67ba045cae35 100644 --- a/lib/crypto/arm/chacha-glue.c +++ b/lib/crypto/arm/chacha-glue.c @@ -101,13 +101,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - /* We always can use at least the ARM scalar implementation. */ - return true; -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_arm_mod_init(void) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { diff --git a/lib/crypto/arm64/chacha-neon-glue.c b/lib/crypto/arm64/chacha-neon-glue.c index d0188f974ca5..48097aa34af7 100644 --- a/lib/crypto/arm64/chacha-neon-glue.c +++ b/lib/crypto/arm64/chacha-neon-glue.c @@ -95,12 +95,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&have_neon); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_simd_mod_init(void) { if (cpu_have_named_feature(ASIMD)) diff --git a/lib/crypto/mips/chacha-glue.c b/lib/crypto/mips/chacha-glue.c index 88c097594eb0..f8390af21dc9 100644 --- a/lib/crypto/mips/chacha-glue.c +++ b/lib/crypto/mips/chacha-glue.c @@ -18,12 +18,6 @@ asmlinkage void hchacha_block_arch(const struct chacha_state *state, u32 out[HCHACHA_OUT_WORDS], int nrounds); EXPORT_SYMBOL(hchacha_block_arch); -bool chacha_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - MODULE_DESCRIPTION("ChaCha and HChaCha functions (MIPS optimized)"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/chacha-p10-glue.c b/lib/crypto/powerpc/chacha-p10-glue.c index fcd23c6f1590..5d3d5506d7f9 100644 --- a/lib/crypto/powerpc/chacha-p10-glue.c +++ b/lib/crypto/powerpc/chacha-p10-glue.c @@ -76,12 +76,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_p10_init(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) diff --git a/lib/crypto/riscv/chacha-riscv64-glue.c b/lib/crypto/riscv/chacha-riscv64-glue.c index 8c3f11d79be3..a15f0aca3fc4 100644 --- a/lib/crypto/riscv/chacha-riscv64-glue.c +++ b/lib/crypto/riscv/chacha-riscv64-glue.c @@ -50,12 +50,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&use_zvkb); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init riscv64_chacha_mod_init(void) { if (riscv_isa_extension_available(NULL, ZVKB) && diff --git a/lib/crypto/s390/chacha-glue.c b/lib/crypto/s390/chacha-glue.c index c57dc851214f..d8137387fe28 100644 --- a/lib/crypto/s390/chacha-glue.c +++ b/lib/crypto/s390/chacha-glue.c @@ -47,11 +47,5 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return cpu_has_vx(); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/x86/chacha_glue.c b/lib/crypto/x86/chacha_glue.c index 10b2c945f541..de7da9d512af 100644 --- a/lib/crypto/x86/chacha_glue.c +++ b/lib/crypto/x86/chacha_glue.c @@ -160,12 +160,6 @@ void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, } EXPORT_SYMBOL(chacha_crypt_arch); -bool chacha_is_arch_optimized(void) -{ - return static_key_enabled(&chacha_use_simd); -} -EXPORT_SYMBOL(chacha_is_arch_optimized); - static int __init chacha_simd_mod_init(void) { if (!boot_cpu_has(X86_FEATURE_SSSE3)) -- cgit v1.2.3 From 20a1acb68d7a16481b70a693d49c2a42882f57a9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:23 -0700 Subject: lib/crypto: chacha: Rename chacha.c to chacha-block-generic.c Rename chacha.c to chacha-block-generic.c to free up the name chacha.c for the high-level API entry points (chacha_crypt() and hchacha_block()), similar to the other algorithms. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-5-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 4 +- lib/crypto/chacha-block-generic.c | 114 ++++++++++++++++++++++++++++++++++++++ lib/crypto/chacha.c | 114 -------------------------------------- 3 files changed, 116 insertions(+), 116 deletions(-) create mode 100644 lib/crypto/chacha-block-generic.c delete mode 100644 lib/crypto/chacha.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index cd460e5e3dd2..e71c4bee8310 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -15,8 +15,8 @@ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o libcryptoutils-y := memneq.o utils.o -# chacha is used by the /dev/random driver which is always builtin -obj-y += chacha.o +# chacha20_block() is used by the /dev/random driver which is always builtin +obj-y += chacha-block-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o diff --git a/lib/crypto/chacha-block-generic.c b/lib/crypto/chacha-block-generic.c new file mode 100644 index 000000000000..77f68de71066 --- /dev/null +++ b/lib/crypto/chacha-block-generic.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) + * + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include +#include +#include +#include +#include + +static void chacha_permute(struct chacha_state *state, int nrounds) +{ + u32 *x = state->x; + int i; + + /* whitelist the allowed round counts */ + WARN_ON_ONCE(nrounds != 20 && nrounds != 12); + + for (i = 0; i < nrounds; i += 2) { + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); + + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); + + x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); + x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); + x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); + x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); + + x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); + x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); + x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); + x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); + + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); + + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); + + x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); + x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); + x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); + x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); + + x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); + x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); + x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); + x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); + } +} + +/** + * chacha_block_generic - generate one keystream block and increment block counter + * @state: input state matrix + * @out: output keystream block + * @nrounds: number of rounds (20 or 12; 20 is recommended) + * + * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. + * The caller has already converted the endianness of the input. This function + * also handles incrementing the block counter in the input matrix. + */ +void chacha_block_generic(struct chacha_state *state, + u8 out[CHACHA_BLOCK_SIZE], int nrounds) +{ + struct chacha_state permuted_state = *state; + int i; + + chacha_permute(&permuted_state, nrounds); + + for (i = 0; i < ARRAY_SIZE(state->x); i++) + put_unaligned_le32(permuted_state.x[i] + state->x[i], + &out[i * sizeof(u32)]); + + state->x[12]++; +} +EXPORT_SYMBOL(chacha_block_generic); + +/** + * hchacha_block_generic - abbreviated ChaCha core, for XChaCha + * @state: input state matrix + * @out: the output words + * @nrounds: number of rounds (20 or 12; 20 is recommended) + * + * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step + * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha + * skips the final addition of the initial state, and outputs only certain words + * of the state. It should not be used for streaming directly. + */ +void hchacha_block_generic(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + struct chacha_state permuted_state = *state; + + chacha_permute(&permuted_state, nrounds); + + memcpy(&out[0], &permuted_state.x[0], 16); + memcpy(&out[4], &permuted_state.x[12], 16); +} +EXPORT_SYMBOL(hchacha_block_generic); diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c deleted file mode 100644 index 77f68de71066..000000000000 --- a/lib/crypto/chacha.c +++ /dev/null @@ -1,114 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The "hash function" used as the core of the ChaCha stream cipher (RFC7539) - * - * Copyright (C) 2015 Martin Willi - */ - -#include -#include -#include -#include -#include -#include -#include - -static void chacha_permute(struct chacha_state *state, int nrounds) -{ - u32 *x = state->x; - int i; - - /* whitelist the allowed round counts */ - WARN_ON_ONCE(nrounds != 20 && nrounds != 12); - - for (i = 0; i < nrounds; i += 2) { - x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 16); - x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 16); - x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 16); - x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 16); - - x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 12); - x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 12); - x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 12); - x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 12); - - x[0] += x[4]; x[12] = rol32(x[12] ^ x[0], 8); - x[1] += x[5]; x[13] = rol32(x[13] ^ x[1], 8); - x[2] += x[6]; x[14] = rol32(x[14] ^ x[2], 8); - x[3] += x[7]; x[15] = rol32(x[15] ^ x[3], 8); - - x[8] += x[12]; x[4] = rol32(x[4] ^ x[8], 7); - x[9] += x[13]; x[5] = rol32(x[5] ^ x[9], 7); - x[10] += x[14]; x[6] = rol32(x[6] ^ x[10], 7); - x[11] += x[15]; x[7] = rol32(x[7] ^ x[11], 7); - - x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 16); - x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 16); - x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 16); - x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 16); - - x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 12); - x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 12); - x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 12); - x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 12); - - x[0] += x[5]; x[15] = rol32(x[15] ^ x[0], 8); - x[1] += x[6]; x[12] = rol32(x[12] ^ x[1], 8); - x[2] += x[7]; x[13] = rol32(x[13] ^ x[2], 8); - x[3] += x[4]; x[14] = rol32(x[14] ^ x[3], 8); - - x[10] += x[15]; x[5] = rol32(x[5] ^ x[10], 7); - x[11] += x[12]; x[6] = rol32(x[6] ^ x[11], 7); - x[8] += x[13]; x[7] = rol32(x[7] ^ x[8], 7); - x[9] += x[14]; x[4] = rol32(x[4] ^ x[9], 7); - } -} - -/** - * chacha_block_generic - generate one keystream block and increment block counter - * @state: input state matrix - * @out: output keystream block - * @nrounds: number of rounds (20 or 12; 20 is recommended) - * - * This is the ChaCha core, a function from 64-byte strings to 64-byte strings. - * The caller has already converted the endianness of the input. This function - * also handles incrementing the block counter in the input matrix. - */ -void chacha_block_generic(struct chacha_state *state, - u8 out[CHACHA_BLOCK_SIZE], int nrounds) -{ - struct chacha_state permuted_state = *state; - int i; - - chacha_permute(&permuted_state, nrounds); - - for (i = 0; i < ARRAY_SIZE(state->x); i++) - put_unaligned_le32(permuted_state.x[i] + state->x[i], - &out[i * sizeof(u32)]); - - state->x[12]++; -} -EXPORT_SYMBOL(chacha_block_generic); - -/** - * hchacha_block_generic - abbreviated ChaCha core, for XChaCha - * @state: input state matrix - * @out: the output words - * @nrounds: number of rounds (20 or 12; 20 is recommended) - * - * HChaCha is the ChaCha equivalent of HSalsa and is an intermediate step - * towards XChaCha (see https://cr.yp.to/snuffle/xsalsa-20081128.pdf). HChaCha - * skips the final addition of the initial state, and outputs only certain words - * of the state. It should not be used for streaming directly. - */ -void hchacha_block_generic(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - struct chacha_state permuted_state = *state; - - chacha_permute(&permuted_state, nrounds); - - memcpy(&out[0], &permuted_state.x[0], 16); - memcpy(&out[4], &permuted_state.x[12], 16); -} -EXPORT_SYMBOL(hchacha_block_generic); -- cgit v1.2.3 From 1ae46b6eb5b9a97978fe12a71f5de53ab977297f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:24 -0700 Subject: lib/crypto: chacha: Rename libchacha.c to chacha.c Rename libchacha.c to chacha.c to make the naming consistent with other algorithms and allow additional source files to be added to the libchacha module. This file currently contains chacha_crypt_generic(), but it will soon be updated to contain chacha_crypt(). Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-6-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 + lib/crypto/chacha.c | 35 +++++++++++++++++++++++++++++++++++ lib/crypto/libchacha.c | 35 ----------------------------------- 3 files changed, 36 insertions(+), 35 deletions(-) create mode 100644 lib/crypto/chacha.c delete mode 100644 lib/crypto/libchacha.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e71c4bee8310..a006048ba2bd 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -18,6 +18,7 @@ libcryptoutils-y := memneq.o utils.o # chacha20_block() is used by the /dev/random driver which is always builtin obj-y += chacha-block-generic.o obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o +libchacha-y := chacha.o obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c new file mode 100644 index 000000000000..26862ad90a96 --- /dev/null +++ b/lib/crypto/chacha.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * The ChaCha stream cipher (RFC7539) + * + * Copyright (C) 2015 Martin Willi + */ + +#include // for crypto_xor_cpy +#include +#include +#include +#include + +void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + /* aligned to potentially speed up crypto_xor() */ + u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); + + while (bytes >= CHACHA_BLOCK_SIZE) { + chacha_block_generic(state, stream, nrounds); + crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); + bytes -= CHACHA_BLOCK_SIZE; + dst += CHACHA_BLOCK_SIZE; + src += CHACHA_BLOCK_SIZE; + } + if (bytes) { + chacha_block_generic(state, stream, nrounds); + crypto_xor_cpy(dst, src, stream, bytes); + } +} +EXPORT_SYMBOL(chacha_crypt_generic); + +MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)"); +MODULE_LICENSE("GPL"); diff --git a/lib/crypto/libchacha.c b/lib/crypto/libchacha.c deleted file mode 100644 index 26862ad90a96..000000000000 --- a/lib/crypto/libchacha.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The ChaCha stream cipher (RFC7539) - * - * Copyright (C) 2015 Martin Willi - */ - -#include // for crypto_xor_cpy -#include -#include -#include -#include - -void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* aligned to potentially speed up crypto_xor() */ - u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); - - while (bytes >= CHACHA_BLOCK_SIZE) { - chacha_block_generic(state, stream, nrounds); - crypto_xor_cpy(dst, src, stream, CHACHA_BLOCK_SIZE); - bytes -= CHACHA_BLOCK_SIZE; - dst += CHACHA_BLOCK_SIZE; - src += CHACHA_BLOCK_SIZE; - } - if (bytes) { - chacha_block_generic(state, stream, nrounds); - crypto_xor_cpy(dst, src, stream, bytes); - } -} -EXPORT_SYMBOL(chacha_crypt_generic); - -MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)"); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 13cecc526d8fe7eeb9b136159738688a1a10cd82 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:25 -0700 Subject: lib/crypto: chacha: Consolidate into single module Consolidate the ChaCha code into a single module (excluding chacha-block-generic.c which remains always built-in for random.c), similar to various other algorithms: - Each arch now provides a header file lib/crypto/$(SRCARCH)/chacha.h, replacing lib/crypto/$(SRCARCH)/chacha*.c. The header defines chacha_crypt_arch() and hchacha_block_arch(). It is included by lib/crypto/chacha.c, and thus the code gets built into the single libchacha module, with improved inlining in some cases. - Whether arch-optimized ChaCha is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. - Any additional arch-specific translation units for the optimized ChaCha code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. This removes the last use for the Makefile and Kconfig files in the arm64, mips, powerpc, riscv, and s390 subdirectories of lib/crypto/. So also remove those files and the references to them. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-7-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 47 +++----- lib/crypto/Makefile | 43 ++++++-- lib/crypto/arm/Kconfig | 5 - lib/crypto/arm/Makefile | 4 - lib/crypto/arm/chacha-glue.c | 131 ----------------------- lib/crypto/arm/chacha.h | 117 ++++++++++++++++++++ lib/crypto/arm64/Kconfig | 8 -- lib/crypto/arm64/Makefile | 4 - lib/crypto/arm64/chacha-neon-glue.c | 113 -------------------- lib/crypto/arm64/chacha.h | 99 +++++++++++++++++ lib/crypto/chacha.c | 41 ++++++- lib/crypto/mips/Kconfig | 7 -- lib/crypto/mips/Makefile | 5 - lib/crypto/mips/chacha-glue.c | 23 ---- lib/crypto/mips/chacha.h | 14 +++ lib/crypto/powerpc/Kconfig | 8 -- lib/crypto/powerpc/Makefile | 4 - lib/crypto/powerpc/chacha-p10-glue.c | 94 ---------------- lib/crypto/powerpc/chacha.h | 76 +++++++++++++ lib/crypto/riscv/Kconfig | 8 -- lib/crypto/riscv/Makefile | 4 - lib/crypto/riscv/chacha-riscv64-glue.c | 69 ------------ lib/crypto/riscv/chacha.h | 51 +++++++++ lib/crypto/s390/Kconfig | 7 -- lib/crypto/s390/Makefile | 4 - lib/crypto/s390/chacha-glue.c | 51 --------- lib/crypto/s390/chacha.h | 36 +++++++ lib/crypto/x86/Kconfig | 7 -- lib/crypto/x86/Makefile | 3 - lib/crypto/x86/chacha.h | 176 ++++++++++++++++++++++++++++++ lib/crypto/x86/chacha_glue.c | 190 --------------------------------- 31 files changed, 653 insertions(+), 796 deletions(-) delete mode 100644 lib/crypto/arm/chacha-glue.c create mode 100644 lib/crypto/arm/chacha.h delete mode 100644 lib/crypto/arm64/Kconfig delete mode 100644 lib/crypto/arm64/Makefile delete mode 100644 lib/crypto/arm64/chacha-neon-glue.c create mode 100644 lib/crypto/arm64/chacha.h delete mode 100644 lib/crypto/mips/Kconfig delete mode 100644 lib/crypto/mips/Makefile delete mode 100644 lib/crypto/mips/chacha-glue.c create mode 100644 lib/crypto/mips/chacha.h delete mode 100644 lib/crypto/powerpc/Kconfig delete mode 100644 lib/crypto/powerpc/Makefile delete mode 100644 lib/crypto/powerpc/chacha-p10-glue.c create mode 100644 lib/crypto/powerpc/chacha.h delete mode 100644 lib/crypto/riscv/Kconfig delete mode 100644 lib/crypto/riscv/Makefile delete mode 100644 lib/crypto/riscv/chacha-riscv64-glue.c create mode 100644 lib/crypto/riscv/chacha.h delete mode 100644 lib/crypto/s390/Kconfig delete mode 100644 lib/crypto/s390/Makefile delete mode 100644 lib/crypto/s390/chacha-glue.c create mode 100644 lib/crypto/s390/chacha.h create mode 100644 lib/crypto/x86/chacha.h delete mode 100644 lib/crypto/x86/chacha_glue.c (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index cb4e056a98fa..c1db483bc230 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -44,29 +44,23 @@ config CRYPTO_LIB_BLAKE2S_GENERIC implementation is enabled, this implementation serves the users of CRYPTO_LIB_BLAKE2S. -config CRYPTO_ARCH_HAVE_LIB_CHACHA - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the ChaCha library interface, - either builtin or as a module. - -config CRYPTO_LIB_CHACHA_GENERIC +config CRYPTO_LIB_CHACHA tristate - default CRYPTO_LIB_CHACHA if !CRYPTO_ARCH_HAVE_LIB_CHACHA select CRYPTO_LIB_UTILS help - This symbol can be selected by arch implementations of the ChaCha - library interface that require the generic code as a fallback, e.g., - for SIMD implementations. If no arch specific implementation is - enabled, this implementation serves the users of CRYPTO_LIB_CHACHA. + Enable the ChaCha library interface. Select this if your module uses + chacha_crypt() or hchacha_block(). -config CRYPTO_LIB_CHACHA - tristate - help - Enable the ChaCha library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. +config CRYPTO_LIB_CHACHA_ARCH + bool + depends on CRYPTO_LIB_CHACHA && !UML && !KMSAN + default y if ARM + default y if ARM64 && KERNEL_MODE_NEON + default y if MIPS && CPU_MIPS32_R2 + default y if PPC64 && CPU_LITTLE_ENDIAN && VSX + default y if RISCV && 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO + default y if S390 + default y if X86_64 config CRYPTO_ARCH_HAVE_LIB_CURVE25519 bool @@ -218,21 +212,6 @@ if !KMSAN # avoid false positives from assembly if ARM source "lib/crypto/arm/Kconfig" endif -if ARM64 -source "lib/crypto/arm64/Kconfig" -endif -if MIPS -source "lib/crypto/mips/Kconfig" -endif -if PPC -source "lib/crypto/powerpc/Kconfig" -endif -if RISCV -source "lib/crypto/riscv/Kconfig" -endif -if S390 -source "lib/crypto/s390/Kconfig" -endif if X86 source "lib/crypto/x86/Kconfig" endif diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index a006048ba2bd..baafd58b5dfd 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -15,11 +15,6 @@ obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o obj-$(CONFIG_CRYPTO_LIB_UTILS) += libcryptoutils.o libcryptoutils-y := memneq.o utils.o -# chacha20_block() is used by the /dev/random driver which is always builtin -obj-y += chacha-block-generic.o -obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC) += libchacha.o -libchacha-y := chacha.o - obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o libaes-y := aes.o @@ -40,6 +35,39 @@ libblake2s-y := blake2s.o libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o libblake2s-$(CONFIG_CRYPTO_SELFTESTS) += blake2s-selftest.o +################################################################################ + +# chacha20_block() is used by the /dev/random driver which is always builtin +obj-y += chacha-block-generic.o + +obj-$(CONFIG_CRYPTO_LIB_CHACHA) += libchacha.o +libchacha-y := chacha.o + +ifeq ($(CONFIG_CRYPTO_LIB_CHACHA_ARCH),y) +CFLAGS_chacha.o += -I$(src)/$(SRCARCH) + +ifeq ($(CONFIG_ARM),y) +libchacha-y += arm/chacha-scalar-core.o +libchacha-$(CONFIG_KERNEL_MODE_NEON) += arm/chacha-neon-core.o +endif + +libchacha-$(CONFIG_ARM64) += arm64/chacha-neon-core.o + +ifeq ($(CONFIG_MIPS),y) +libchacha-y += mips/chacha-core.o +AFLAGS_mips/chacha-core.o += -O2 # needed to fill branch delay slots +endif + +libchacha-$(CONFIG_PPC) += powerpc/chacha-p10le-8x.o +libchacha-$(CONFIG_RISCV) += riscv/chacha-riscv64-zvkb.o +libchacha-$(CONFIG_S390) += s390/chacha-s390.o +libchacha-$(CONFIG_X86) += x86/chacha-ssse3-x86_64.o \ + x86/chacha-avx2-x86_64.o \ + x86/chacha-avx512vl-x86_64.o +endif # CONFIG_CRYPTO_LIB_CHACHA_ARCH + +################################################################################ + obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o libchacha20poly1305-$(CONFIG_CRYPTO_SELFTESTS) += chacha20poly1305-selftest.o @@ -231,11 +259,6 @@ obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o obj-$(CONFIG_ARM) += arm/ -obj-$(CONFIG_ARM64) += arm64/ -obj-$(CONFIG_MIPS) += mips/ -obj-$(CONFIG_PPC) += powerpc/ -obj-$(CONFIG_RISCV) += riscv/ -obj-$(CONFIG_S390) += s390/ obj-$(CONFIG_X86) += x86/ # clean-files must be defined unconditionally diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index 0d821e282c64..740341aa35d2 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -12,8 +12,3 @@ config CRYPTO_BLAKE2S_ARM BLAKE2b, but slower than the NEON implementation of BLAKE2b. There is no NEON implementation of BLAKE2s, since NEON doesn't really help with it. - -config CRYPTO_CHACHA20_NEON - tristate - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile index 9f70e61d419e..0574b0e9739e 100644 --- a/lib/crypto/arm/Makefile +++ b/lib/crypto/arm/Makefile @@ -2,7 +2,3 @@ obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o libblake2s-arm-y := blake2s-core.o blake2s-glue.o - -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o -chacha-neon-y := chacha-scalar-core.o chacha-glue.o -chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o diff --git a/lib/crypto/arm/chacha-glue.c b/lib/crypto/arm/chacha-glue.c deleted file mode 100644 index 67ba045cae35..000000000000 --- a/lib/crypto/arm/chacha-glue.c +++ /dev/null @@ -1,131 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha and HChaCha functions (ARM optimized) - * - * Copyright (C) 2016-2019 Linaro, Ltd. - * Copyright (C) 2015 Martin Willi - */ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, int nrounds); -asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, - int nrounds, unsigned int nbytes); -asmlinkage void hchacha_block_arm(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); -asmlinkage void hchacha_block_neon(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); - -asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, - const struct chacha_state *state, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); - -static inline bool neon_usable(void) -{ - return static_branch_likely(&use_neon) && crypto_simd_usable(); -} - -static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 buf[CHACHA_BLOCK_SIZE]; - - while (bytes > CHACHA_BLOCK_SIZE) { - unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); - - chacha_4block_xor_neon(state, dst, src, nrounds, l); - bytes -= l; - src += l; - dst += l; - state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } - if (bytes) { - const u8 *s = src; - u8 *d = dst; - - if (bytes != CHACHA_BLOCK_SIZE) - s = d = memcpy(buf, src, bytes); - chacha_block_xor_neon(state, d, s, nrounds); - if (d != dst) - memcpy(dst, buf, bytes); - state->x[12]++; - } -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { - hchacha_block_arm(state, out, nrounds); - } else { - kernel_neon_begin(); - hchacha_block_neon(state, out, nrounds); - kernel_neon_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || - bytes <= CHACHA_BLOCK_SIZE) { - chacha_doarm(dst, src, bytes, state, nrounds); - state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); - return; - } - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_arm_mod_init(void) -{ - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { - switch (read_cpuid_part()) { - case ARM_CPU_PART_CORTEX_A7: - case ARM_CPU_PART_CORTEX_A5: - /* - * The Cortex-A7 and Cortex-A5 do not perform well with - * the NEON implementation but do incredibly with the - * scalar one and use less power. - */ - break; - default: - static_branch_enable(&use_neon); - } - } - return 0; -} -subsys_initcall(chacha_arm_mod_init); - -static void __exit chacha_arm_mod_exit(void) -{ -} -module_exit(chacha_arm_mod_exit); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm/chacha.h b/lib/crypto/arm/chacha.h new file mode 100644 index 000000000000..0cae30f8ee5d --- /dev/null +++ b/lib/crypto/arm/chacha.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ChaCha and HChaCha functions (ARM optimized) + * + * Copyright (C) 2016-2019 Linaro, Ltd. + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include + +#include +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, unsigned int nbytes); +asmlinkage void hchacha_block_arm(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const struct chacha_state *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } + if (bytes) { + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); + state->x[12]++; + } +} + +static void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + break; + default: + static_branch_enable(&use_neon); + } + } +} diff --git a/lib/crypto/arm64/Kconfig b/lib/crypto/arm64/Kconfig deleted file mode 100644 index 07c8a4f0ab03..000000000000 --- a/lib/crypto/arm64/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA20_NEON - tristate - depends on KERNEL_MODE_NEON - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/arm64/Makefile b/lib/crypto/arm64/Makefile deleted file mode 100644 index d49cceca3d1c..000000000000 --- a/lib/crypto/arm64/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o -chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o diff --git a/lib/crypto/arm64/chacha-neon-glue.c b/lib/crypto/arm64/chacha-neon-glue.c deleted file mode 100644 index 48097aa34af7..000000000000 --- a/lib/crypto/arm64/chacha-neon-glue.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * ChaCha and HChaCha functions (ARM64 optimized) - * - * Copyright (C) 2016 - 2017 Linaro, Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on: - * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code - * - * Copyright (C) 2015 Martin Willi - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include - -#include -#include -#include - -asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, int nrounds); -asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, - u8 *dst, const u8 *src, - int nrounds, int bytes); -asmlinkage void hchacha_block_neon(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, - int bytes, int nrounds) -{ - while (bytes > 0) { - int l = min(bytes, CHACHA_BLOCK_SIZE * 5); - - if (l <= CHACHA_BLOCK_SIZE) { - u8 buf[CHACHA_BLOCK_SIZE]; - - memcpy(buf, src, l); - chacha_block_xor_neon(state, buf, buf, nrounds); - memcpy(dst, buf, l); - state->x[12] += 1; - break; - } - chacha_4block_xor_neon(state, dst, src, nrounds, l); - bytes -= l; - src += l; - dst += l; - state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); - } -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { - hchacha_block_generic(state, out, nrounds); - } else { - kernel_neon_begin(); - hchacha_block_neon(state, out, nrounds); - kernel_neon_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_neon_begin(); - chacha_doneon(state, dst, src, todo, nrounds); - kernel_neon_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_simd_mod_init(void) -{ - if (cpu_have_named_feature(ASIMD)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(chacha_simd_mod_init); - -static void __exit chacha_simd_mod_exit(void) -{ -} -module_exit(chacha_simd_mod_exit); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM64 optimized)"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm64/chacha.h b/lib/crypto/arm64/chacha.h new file mode 100644 index 000000000000..ba6c22d46086 --- /dev/null +++ b/lib/crypto/arm64/chacha.h @@ -0,0 +1,99 @@ +/* + * ChaCha and HChaCha functions (ARM64 optimized) + * + * Copyright (C) 2016 - 2017 Linaro, Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on: + * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code + * + * Copyright (C) 2015 Martin Willi + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, int bytes); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + int bytes, int nrounds) +{ + while (bytes > 0) { + int l = min(bytes, CHACHA_BLOCK_SIZE * 5); + + if (l <= CHACHA_BLOCK_SIZE) { + u8 buf[CHACHA_BLOCK_SIZE]; + + memcpy(buf, src, l); + chacha_block_xor_neon(state, buf, buf, nrounds); + memcpy(dst, buf, l); + state->x[12] += 1; + break; + } + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } +} + +static void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { + hchacha_block_generic(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/chacha.c b/lib/crypto/chacha.c index 26862ad90a96..e0c7cb4af318 100644 --- a/lib/crypto/chacha.c +++ b/lib/crypto/chacha.c @@ -11,8 +11,9 @@ #include #include -void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) +static void __maybe_unused +chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) { /* aligned to potentially speed up crypto_xor() */ u8 stream[CHACHA_BLOCK_SIZE] __aligned(sizeof(long)); @@ -29,7 +30,41 @@ void chacha_crypt_generic(struct chacha_state *state, u8 *dst, const u8 *src, crypto_xor_cpy(dst, src, stream, bytes); } } -EXPORT_SYMBOL(chacha_crypt_generic); + +#ifdef CONFIG_CRYPTO_LIB_CHACHA_ARCH +#include "chacha.h" /* $(SRCARCH)/chacha.h */ +#else +#define chacha_crypt_arch chacha_crypt_generic +#define hchacha_block_arch hchacha_block_generic +#endif + +void chacha_crypt(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + chacha_crypt_arch(state, dst, src, bytes, nrounds); +} +EXPORT_SYMBOL_GPL(chacha_crypt); + +void hchacha_block(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + hchacha_block_arch(state, out, nrounds); +} +EXPORT_SYMBOL_GPL(hchacha_block); + +#ifdef chacha_mod_init_arch +static int __init chacha_mod_init(void) +{ + chacha_mod_init_arch(); + return 0; +} +subsys_initcall(chacha_mod_init); + +static void __exit chacha_mod_exit(void) +{ +} +module_exit(chacha_mod_exit); +#endif MODULE_DESCRIPTION("ChaCha stream cipher (RFC7539)"); MODULE_LICENSE("GPL"); diff --git a/lib/crypto/mips/Kconfig b/lib/crypto/mips/Kconfig deleted file mode 100644 index 94c1a0892c20..000000000000 --- a/lib/crypto/mips/Kconfig +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_MIPS - tristate - depends on CPU_MIPS32_R2 - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/mips/Makefile b/lib/crypto/mips/Makefile deleted file mode 100644 index b5ea0e25c21e..000000000000 --- a/lib/crypto/mips/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o -chacha-mips-y := chacha-core.o chacha-glue.o -AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots diff --git a/lib/crypto/mips/chacha-glue.c b/lib/crypto/mips/chacha-glue.c deleted file mode 100644 index f8390af21dc9..000000000000 --- a/lib/crypto/mips/chacha-glue.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha and HChaCha functions (MIPS optimized) - * - * Copyright (C) 2019 Linaro, Ltd. - */ - -#include -#include -#include - -asmlinkage void chacha_crypt_arch(struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int bytes, int nrounds); -EXPORT_SYMBOL(chacha_crypt_arch); - -asmlinkage void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); -EXPORT_SYMBOL(hchacha_block_arch); - -MODULE_DESCRIPTION("ChaCha and HChaCha functions (MIPS optimized)"); -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/mips/chacha.h b/lib/crypto/mips/chacha.h new file mode 100644 index 000000000000..0c18c0dc2a40 --- /dev/null +++ b/lib/crypto/mips/chacha.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ChaCha and HChaCha functions (MIPS optimized) + * + * Copyright (C) 2019 Linaro, Ltd. + */ + +#include + +asmlinkage void chacha_crypt_arch(struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int bytes, int nrounds); +asmlinkage void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); diff --git a/lib/crypto/powerpc/Kconfig b/lib/crypto/powerpc/Kconfig deleted file mode 100644 index e41012a61876..000000000000 --- a/lib/crypto/powerpc/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA20_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/powerpc/Makefile b/lib/crypto/powerpc/Makefile deleted file mode 100644 index 778a04edd226..000000000000 --- a/lib/crypto/powerpc/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o -chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o diff --git a/lib/crypto/powerpc/chacha-p10-glue.c b/lib/crypto/powerpc/chacha-p10-glue.c deleted file mode 100644 index 5d3d5506d7f9..000000000000 --- a/lib/crypto/powerpc/chacha-p10-glue.c +++ /dev/null @@ -1,94 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * ChaCha stream cipher (P10 accelerated) - * - * Copyright 2023- IBM Corp. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void chacha_p10le_8x(const struct chacha_state *state, u8 *dst, - const u8 *src, unsigned int len, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); - -static void vsx_begin(void) -{ - preempt_disable(); - enable_kernel_vsx(); -} - -static void vsx_end(void) -{ - disable_kernel_vsx(); - preempt_enable(); -} - -static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - unsigned int l = bytes & ~0x0FF; - - if (l > 0) { - chacha_p10le_8x(state, dst, src, l, nrounds); - bytes -= l; - src += l; - dst += l; - state->x[12] += l / CHACHA_BLOCK_SIZE; - } - - if (bytes > 0) - chacha_crypt_generic(state, dst, src, bytes, nrounds); -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || - !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - vsx_begin(); - chacha_p10_do_8x(state, dst, src, todo, nrounds); - vsx_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_p10_init(void) -{ - if (cpu_has_feature(CPU_FTR_ARCH_31)) - static_branch_enable(&have_p10); - return 0; -} -subsys_initcall(chacha_p10_init); - -static void __exit chacha_p10_exit(void) -{ -} -module_exit(chacha_p10_exit); - -MODULE_DESCRIPTION("ChaCha stream cipher (P10 accelerated)"); -MODULE_AUTHOR("Danny Tsen "); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/chacha.h b/lib/crypto/powerpc/chacha.h new file mode 100644 index 000000000000..1df6e1ce31c4 --- /dev/null +++ b/lib/crypto/powerpc/chacha.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ChaCha stream cipher (P10 accelerated) + * + * Copyright 2023- IBM Corp. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void chacha_p10le_8x(const struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); + +static void vsx_begin(void) +{ + preempt_disable(); + enable_kernel_vsx(); +} + +static void vsx_end(void) +{ + disable_kernel_vsx(); + preempt_enable(); +} + +static void chacha_p10_do_8x(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + unsigned int l = bytes & ~0x0FF; + + if (l > 0) { + chacha_p10le_8x(state, dst, src, l, nrounds); + bytes -= l; + src += l; + dst += l; + state->x[12] += l / CHACHA_BLOCK_SIZE; + } + + if (bytes > 0) + chacha_crypt_generic(state, dst, src, bytes, nrounds); +} + +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + vsx_begin(); + chacha_p10_do_8x(state, dst, src, todo, nrounds); + vsx_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + static_branch_enable(&have_p10); +} diff --git a/lib/crypto/riscv/Kconfig b/lib/crypto/riscv/Kconfig deleted file mode 100644 index bc7a43f33eb3..000000000000 --- a/lib/crypto/riscv/Kconfig +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_RISCV64 - tristate - depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO - default CRYPTO_LIB_CHACHA - select CRYPTO_ARCH_HAVE_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC diff --git a/lib/crypto/riscv/Makefile b/lib/crypto/riscv/Makefile deleted file mode 100644 index e27b78f317fc..000000000000 --- a/lib/crypto/riscv/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o -chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o diff --git a/lib/crypto/riscv/chacha-riscv64-glue.c b/lib/crypto/riscv/chacha-riscv64-glue.c deleted file mode 100644 index a15f0aca3fc4..000000000000 --- a/lib/crypto/riscv/chacha-riscv64-glue.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ChaCha stream cipher (RISC-V optimized) - * - * Copyright (C) 2023 SiFive, Inc. - * Author: Jerry Shih - */ - -#include -#include -#include -#include -#include -#include - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb); - -asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out, - size_t nblocks, int nrounds); - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - u8 block_buffer[CHACHA_BLOCK_SIZE]; - unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE; - unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE; - - if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable()) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - kernel_vector_begin(); - if (full_blocks) { - chacha_zvkb(state, src, dst, full_blocks, nrounds); - src += full_blocks * CHACHA_BLOCK_SIZE; - dst += full_blocks * CHACHA_BLOCK_SIZE; - } - if (tail_bytes) { - memcpy(block_buffer, src, tail_bytes); - chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds); - memcpy(dst, block_buffer, tail_bytes); - } - kernel_vector_end(); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init riscv64_chacha_mod_init(void) -{ - if (riscv_isa_extension_available(NULL, ZVKB) && - riscv_vector_vlen() >= 128) - static_branch_enable(&use_zvkb); - return 0; -} -subsys_initcall(riscv64_chacha_mod_init); - -static void __exit riscv64_chacha_mod_exit(void) -{ -} -module_exit(riscv64_chacha_mod_exit); - -MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)"); -MODULE_AUTHOR("Jerry Shih "); -MODULE_LICENSE("GPL"); diff --git a/lib/crypto/riscv/chacha.h b/lib/crypto/riscv/chacha.h new file mode 100644 index 000000000000..5c000c6aef4b --- /dev/null +++ b/lib/crypto/riscv/chacha.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * ChaCha stream cipher (RISC-V optimized) + * + * Copyright (C) 2023 SiFive, Inc. + * Author: Jerry Shih + */ + +#include +#include +#include +#include + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb); + +asmlinkage void chacha_zvkb(struct chacha_state *state, const u8 *in, u8 *out, + size_t nblocks, int nrounds); + +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + u8 block_buffer[CHACHA_BLOCK_SIZE]; + unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE; + unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE; + + if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + kernel_vector_begin(); + if (full_blocks) { + chacha_zvkb(state, src, dst, full_blocks, nrounds); + src += full_blocks * CHACHA_BLOCK_SIZE; + dst += full_blocks * CHACHA_BLOCK_SIZE; + } + if (tail_bytes) { + memcpy(block_buffer, src, tail_bytes); + chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds); + memcpy(dst, block_buffer, tail_bytes); + } + kernel_vector_end(); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (riscv_isa_extension_available(NULL, ZVKB) && + riscv_vector_vlen() >= 128) + static_branch_enable(&use_zvkb); +} diff --git a/lib/crypto/s390/Kconfig b/lib/crypto/s390/Kconfig deleted file mode 100644 index 069b355fe51a..000000000000 --- a/lib/crypto/s390/Kconfig +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_CHACHA_S390 - tristate - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/s390/Makefile b/lib/crypto/s390/Makefile deleted file mode 100644 index 06c2cf77178e..000000000000 --- a/lib/crypto/s390/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_CHACHA_S390) += chacha_s390.o -chacha_s390-y := chacha-glue.o chacha-s390.o diff --git a/lib/crypto/s390/chacha-glue.c b/lib/crypto/s390/chacha-glue.c deleted file mode 100644 index d8137387fe28..000000000000 --- a/lib/crypto/s390/chacha-glue.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * ChaCha stream cipher (s390 optimized) - * - * Copyright IBM Corp. 2021 - */ - -#define KMSG_COMPONENT "chacha_s390" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include "chacha-s390.h" - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - /* TODO: implement hchacha_block_arch() in assembly */ - hchacha_block_generic(state, out, nrounds); -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - /* s390 chacha20 implementation has 20 rounds hard-coded, - * it cannot handle a block of data or less, but otherwise - * it can handle data of arbitrary size - */ - if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { - chacha_crypt_generic(state, dst, src, bytes, nrounds); - } else { - DECLARE_KERNEL_FPU_ONSTACK32(vxstate); - - kernel_fpu_begin(&vxstate, KERNEL_VXR); - chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); - kernel_fpu_end(&vxstate, KERNEL_VXR); - - state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / - CHACHA_BLOCK_SIZE; - } -} -EXPORT_SYMBOL(chacha_crypt_arch); - -MODULE_DESCRIPTION("ChaCha stream cipher (s390 optimized)"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/s390/chacha.h b/lib/crypto/s390/chacha.h new file mode 100644 index 000000000000..fd9c4a422365 --- /dev/null +++ b/lib/crypto/s390/chacha.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ChaCha stream cipher (s390 optimized) + * + * Copyright IBM Corp. 2021 + */ + +#include +#include +#include +#include +#include +#include "chacha-s390.h" + +#define hchacha_block_arch hchacha_block_generic /* not implemented yet */ + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + /* s390 chacha20 implementation has 20 rounds hard-coded, + * it cannot handle a block of data or less, but otherwise + * it can handle data of arbitrary size + */ + if (bytes <= CHACHA_BLOCK_SIZE || nrounds != 20 || !cpu_has_vx()) { + chacha_crypt_generic(state, dst, src, bytes, nrounds); + } else { + DECLARE_KERNEL_FPU_ONSTACK32(vxstate); + + kernel_fpu_begin(&vxstate, KERNEL_VXR); + chacha20_vx(dst, src, bytes, &state->x[4], &state->x[12]); + kernel_fpu_end(&vxstate, KERNEL_VXR); + + state->x[12] += round_up(bytes, CHACHA_BLOCK_SIZE) / + CHACHA_BLOCK_SIZE; + } +} diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index 24dc9a59b272..eb47da71aa6b 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -11,10 +11,3 @@ config CRYPTO_BLAKE2S_X86 Architecture: x86_64 using: - SSSE3 (Supplemental SSE3) - AVX-512 (Advanced Vector Extensions-512) - -config CRYPTO_CHACHA20_X86_64 - tristate - depends on 64BIT - default CRYPTO_LIB_CHACHA - select CRYPTO_LIB_CHACHA_GENERIC - select CRYPTO_ARCH_HAVE_LIB_CHACHA diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile index 16c9d76f9947..4454556b243e 100644 --- a/lib/crypto/x86/Makefile +++ b/lib/crypto/x86/Makefile @@ -2,6 +2,3 @@ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o - -obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha-x86_64.o -chacha-x86_64-y := chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx512vl-x86_64.o chacha_glue.o diff --git a/lib/crypto/x86/chacha.h b/lib/crypto/x86/chacha.h new file mode 100644 index 000000000000..10cf8f1c569d --- /dev/null +++ b/lib/crypto/x86/chacha.h @@ -0,0 +1,176 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * ChaCha and HChaCha functions (x86_64 optimized) + * + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include +#include + +asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void hchacha_block_ssse3(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); +asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state, + u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); + +static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) +{ + len = min(len, maxblocks * CHACHA_BLOCK_SIZE); + return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; +} + +static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + if (static_branch_likely(&chacha_use_avx512vl)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state->x[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state->x[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state->x[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_2block_xor_avx512vl(state, dst, src, bytes, + nrounds); + state->x[12] += chacha_advance(bytes, 2); + return; + } + } + + if (static_branch_likely(&chacha_use_avx2)) { + while (bytes >= CHACHA_BLOCK_SIZE * 8) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 8; + src += CHACHA_BLOCK_SIZE * 8; + dst += CHACHA_BLOCK_SIZE * 8; + state->x[12] += 8; + } + if (bytes > CHACHA_BLOCK_SIZE * 4) { + chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 8); + return; + } + if (bytes > CHACHA_BLOCK_SIZE * 2) { + chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 4); + return; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 2); + return; + } + } + + while (bytes >= CHACHA_BLOCK_SIZE * 4) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + bytes -= CHACHA_BLOCK_SIZE * 4; + src += CHACHA_BLOCK_SIZE * 4; + dst += CHACHA_BLOCK_SIZE * 4; + state->x[12] += 4; + } + if (bytes > CHACHA_BLOCK_SIZE) { + chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); + state->x[12] += chacha_advance(bytes, 4); + return; + } + if (bytes) { + chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); + state->x[12]++; + } +} + +static void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!static_branch_likely(&chacha_use_simd)) { + hchacha_block_generic(state, out, nrounds); + } else { + kernel_fpu_begin(); + hchacha_block_ssse3(state, out, nrounds); + kernel_fpu_end(); + } +} + +static void chacha_crypt_arch(struct chacha_state *state, u8 *dst, + const u8 *src, unsigned int bytes, int nrounds) +{ + if (!static_branch_likely(&chacha_use_simd) || + bytes <= CHACHA_BLOCK_SIZE) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_fpu_begin(); + chacha_dosimd(state, dst, src, todo, nrounds); + kernel_fpu_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} + +#define chacha_mod_init_arch chacha_mod_init_arch +static void chacha_mod_init_arch(void) +{ + if (!boot_cpu_has(X86_FEATURE_SSSE3)) + return; + + static_branch_enable(&chacha_use_simd); + + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { + static_branch_enable(&chacha_use_avx2); + + if (boot_cpu_has(X86_FEATURE_AVX512VL) && + boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ + static_branch_enable(&chacha_use_avx512vl); + } +} diff --git a/lib/crypto/x86/chacha_glue.c b/lib/crypto/x86/chacha_glue.c deleted file mode 100644 index de7da9d512af..000000000000 --- a/lib/crypto/x86/chacha_glue.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * ChaCha and HChaCha functions (x86_64 optimized) - * - * Copyright (C) 2015 Martin Willi - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void chacha_block_xor_ssse3(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_4block_xor_ssse3(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void hchacha_block_ssse3(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds); - -asmlinkage void chacha_2block_xor_avx2(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_4block_xor_avx2(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_8block_xor_avx2(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); - -asmlinkage void chacha_2block_xor_avx512vl(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_4block_xor_avx512vl(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); -asmlinkage void chacha_8block_xor_avx512vl(const struct chacha_state *state, - u8 *dst, const u8 *src, - unsigned int len, int nrounds); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl); - -static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks) -{ - len = min(len, maxblocks * CHACHA_BLOCK_SIZE); - return round_up(len, CHACHA_BLOCK_SIZE) / CHACHA_BLOCK_SIZE; -} - -static void chacha_dosimd(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (static_branch_likely(&chacha_use_avx512vl)) { - while (bytes >= CHACHA_BLOCK_SIZE * 8) { - chacha_8block_xor_avx512vl(state, dst, src, bytes, - nrounds); - bytes -= CHACHA_BLOCK_SIZE * 8; - src += CHACHA_BLOCK_SIZE * 8; - dst += CHACHA_BLOCK_SIZE * 8; - state->x[12] += 8; - } - if (bytes > CHACHA_BLOCK_SIZE * 4) { - chacha_8block_xor_avx512vl(state, dst, src, bytes, - nrounds); - state->x[12] += chacha_advance(bytes, 8); - return; - } - if (bytes > CHACHA_BLOCK_SIZE * 2) { - chacha_4block_xor_avx512vl(state, dst, src, bytes, - nrounds); - state->x[12] += chacha_advance(bytes, 4); - return; - } - if (bytes) { - chacha_2block_xor_avx512vl(state, dst, src, bytes, - nrounds); - state->x[12] += chacha_advance(bytes, 2); - return; - } - } - - if (static_branch_likely(&chacha_use_avx2)) { - while (bytes >= CHACHA_BLOCK_SIZE * 8) { - chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 8; - src += CHACHA_BLOCK_SIZE * 8; - dst += CHACHA_BLOCK_SIZE * 8; - state->x[12] += 8; - } - if (bytes > CHACHA_BLOCK_SIZE * 4) { - chacha_8block_xor_avx2(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 8); - return; - } - if (bytes > CHACHA_BLOCK_SIZE * 2) { - chacha_4block_xor_avx2(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 4); - return; - } - if (bytes > CHACHA_BLOCK_SIZE) { - chacha_2block_xor_avx2(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 2); - return; - } - } - - while (bytes >= CHACHA_BLOCK_SIZE * 4) { - chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); - bytes -= CHACHA_BLOCK_SIZE * 4; - src += CHACHA_BLOCK_SIZE * 4; - dst += CHACHA_BLOCK_SIZE * 4; - state->x[12] += 4; - } - if (bytes > CHACHA_BLOCK_SIZE) { - chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds); - state->x[12] += chacha_advance(bytes, 4); - return; - } - if (bytes) { - chacha_block_xor_ssse3(state, dst, src, bytes, nrounds); - state->x[12]++; - } -} - -void hchacha_block_arch(const struct chacha_state *state, - u32 out[HCHACHA_OUT_WORDS], int nrounds) -{ - if (!static_branch_likely(&chacha_use_simd)) { - hchacha_block_generic(state, out, nrounds); - } else { - kernel_fpu_begin(); - hchacha_block_ssse3(state, out, nrounds); - kernel_fpu_end(); - } -} -EXPORT_SYMBOL(hchacha_block_arch); - -void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, - unsigned int bytes, int nrounds) -{ - if (!static_branch_likely(&chacha_use_simd) || - bytes <= CHACHA_BLOCK_SIZE) - return chacha_crypt_generic(state, dst, src, bytes, nrounds); - - do { - unsigned int todo = min_t(unsigned int, bytes, SZ_4K); - - kernel_fpu_begin(); - chacha_dosimd(state, dst, src, todo, nrounds); - kernel_fpu_end(); - - bytes -= todo; - src += todo; - dst += todo; - } while (bytes); -} -EXPORT_SYMBOL(chacha_crypt_arch); - -static int __init chacha_simd_mod_init(void) -{ - if (!boot_cpu_has(X86_FEATURE_SSSE3)) - return 0; - - static_branch_enable(&chacha_use_simd); - - if (boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) { - static_branch_enable(&chacha_use_avx2); - - if (boot_cpu_has(X86_FEATURE_AVX512VL) && - boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */ - static_branch_enable(&chacha_use_avx512vl); - } - return 0; -} -subsys_initcall(chacha_simd_mod_init); - -static void __exit chacha_simd_mod_exit(void) -{ -} -module_exit(chacha_simd_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("ChaCha and HChaCha functions (x86_64 optimized)"); -- cgit v1.2.3 From 453eda46b7f807f6fc4283f9639085697100ec08 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:26 -0700 Subject: lib/crypto: x86/blake2s: Reduce size of BLAKE2S_SIGMA2 Save 480 bytes of .rodata by replacing the .long constants with .bytes, and using the vpmovzxbd instruction to expand them. Also update the code to do the loads before incrementing %rax rather than after. This avoids the need for the first load to use an offset. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/x86/blake2s-core.S | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/crypto/x86/blake2s-core.S b/lib/crypto/x86/blake2s-core.S index ac1c845445a4..ef8e9f427aab 100644 --- a/lib/crypto/x86/blake2s-core.S +++ b/lib/crypto/x86/blake2s-core.S @@ -29,19 +29,19 @@ SIGMA: .byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6 .byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4 .byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12 -.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640 +.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 160 .align 64 SIGMA2: -.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 -.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 -.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 -.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 -.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 -.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 -.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 -.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 -.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 -.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 +.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13 +.byte 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7 +.byte 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9 +.byte 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5 +.byte 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12 +.byte 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9 +.byte 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0 +.byte 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10 +.byte 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14 +.byte 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9 .text SYM_FUNC_START(blake2s_compress_ssse3) @@ -193,9 +193,9 @@ SYM_FUNC_START(blake2s_compress_avx512) leaq SIGMA2(%rip),%rax movb $0xa,%cl .Lblake2s_compress_avx512_roundloop: - addq $0x40,%rax - vmovdqa -0x40(%rax),%ymm8 - vmovdqa -0x20(%rax),%ymm9 + vpmovzxbd (%rax),%ymm8 + vpmovzxbd 0x8(%rax),%ymm9 + addq $0x10,%rax vpermi2d %ymm7,%ymm6,%ymm8 vpermi2d %ymm7,%ymm6,%ymm9 vmovdqa %ymm8,%ymm6 -- cgit v1.2.3 From 126f5d90f6c855b39eebec17f93c2f9d2ce01ebb Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:27 -0700 Subject: lib/crypto: blake2s: Remove obsolete self-test Remove the original BLAKE2s self-test, since it will be superseded by blake2s_kunit. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 - lib/crypto/blake2s-selftest.c | 651 ------------------------------------------ lib/crypto/blake2s.c | 10 - 3 files changed, 662 deletions(-) delete mode 100644 lib/crypto/blake2s-selftest.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index baafd58b5dfd..b2d2745879d1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o obj-y += libblake2s.o libblake2s-y := blake2s.o libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o -libblake2s-$(CONFIG_CRYPTO_SELFTESTS) += blake2s-selftest.o ################################################################################ diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c deleted file mode 100644 index d0634ed6a937..000000000000 --- a/lib/crypto/blake2s-selftest.c +++ /dev/null @@ -1,651 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include -#include -#include -#include - -/* - * blake2s_testvecs[] generated with the program below (using libb2-dev and - * libssl-dev [OpenSSL]) - * - * #include - * #include - * #include - * - * #include - * - * #define BLAKE2S_TESTVEC_COUNT 256 - * - * static void print_vec(const uint8_t vec[], int len) - * { - * int i; - * - * printf(" { "); - * for (i = 0; i < len; i++) { - * if (i && (i % 12) == 0) - * printf("\n "); - * printf("0x%02x, ", vec[i]); - * } - * printf("},\n"); - * } - * - * int main(void) - * { - * uint8_t key[BLAKE2S_KEYBYTES]; - * uint8_t buf[BLAKE2S_TESTVEC_COUNT]; - * uint8_t hash[BLAKE2S_OUTBYTES]; - * int i, j; - * - * key[0] = key[1] = 1; - * for (i = 2; i < BLAKE2S_KEYBYTES; ++i) - * key[i] = key[i - 2] + key[i - 1]; - * - * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) - * buf[i] = (uint8_t)i; - * - * printf("static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = {\n"); - * - * for (i = 0; i < BLAKE2S_TESTVEC_COUNT; ++i) { - * int outlen = 1 + i % BLAKE2S_OUTBYTES; - * int keylen = (13 * i) % (BLAKE2S_KEYBYTES + 1); - * - * blake2s(hash, buf, key + BLAKE2S_KEYBYTES - keylen, outlen, i, - * keylen); - * print_vec(hash, outlen); - * } - * printf("};\n\n"); - * - * return 0; - *} - */ -static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { - { 0xa1, }, - { 0x7c, 0x89, }, - { 0x74, 0x0e, 0xd4, }, - { 0x47, 0x0c, 0x21, 0x15, }, - { 0x18, 0xd6, 0x9c, 0xa6, 0xc4, }, - { 0x13, 0x5d, 0x16, 0x63, 0x2e, 0xf9, }, - { 0x2c, 0xb5, 0x04, 0xb7, 0x99, 0xe2, 0x73, }, - { 0x9a, 0x0f, 0xd2, 0x39, 0xd6, 0x68, 0x1b, 0x92, }, - { 0xc8, 0xde, 0x7a, 0xea, 0x2f, 0xf4, 0xd2, 0xe3, 0x2b, }, - { 0x5b, 0xf9, 0x43, 0x52, 0x0c, 0x12, 0xba, 0xb5, 0x93, 0x9f, }, - { 0xc6, 0x2c, 0x4e, 0x80, 0xfc, 0x32, 0x5b, 0x33, 0xb8, 0xb8, 0x0a, }, - { 0xa7, 0x5c, 0xfd, 0x3a, 0xcc, 0xbf, 0x90, 0xca, 0xb7, 0x97, 0xde, 0xd8, }, - { 0x66, 0xca, 0x3c, 0xc4, 0x19, 0xef, 0x92, 0x66, 0x3f, 0x21, 0x8f, 0xda, - 0xb7, }, - { 0xba, 0xe5, 0xbb, 0x30, 0x25, 0x94, 0x6d, 0xc3, 0x89, 0x09, 0xc4, 0x25, - 0x52, 0x3e, }, - { 0xa2, 0xef, 0x0e, 0x52, 0x0b, 0x5f, 0xa2, 0x01, 0x6d, 0x0a, 0x25, 0xbc, - 0x57, 0xe2, 0x27, }, - { 0x4f, 0xe0, 0xf9, 0x52, 0x12, 0xda, 0x84, 0xb7, 0xab, 0xae, 0xb0, 0xa6, - 0x47, 0x2a, 0xc7, 0xf5, }, - { 0x56, 0xe7, 0xa8, 0x1c, 0x4c, 0xca, 0xed, 0x90, 0x31, 0xec, 0x87, 0x43, - 0xe7, 0x72, 0x08, 0xec, 0xbe, }, - { 0x7e, 0xdf, 0x80, 0x1c, 0x93, 0x33, 0xfd, 0x53, 0x44, 0xba, 0xfd, 0x96, - 0xe1, 0xbb, 0xb5, 0x65, 0xa5, 0x00, }, - { 0xec, 0x6b, 0xed, 0xf7, 0x7b, 0x62, 0x1d, 0x7d, 0xf4, 0x82, 0xf3, 0x1e, - 0x18, 0xff, 0x2b, 0xc4, 0x06, 0x20, 0x2a, }, - { 0x74, 0x98, 0xd7, 0x68, 0x63, 0xed, 0x87, 0xe4, 0x5d, 0x8d, 0x9e, 0x1d, - 0xfd, 0x2a, 0xbb, 0x86, 0xac, 0xe9, 0x2a, 0x89, }, - { 0x89, 0xc3, 0x88, 0xce, 0x2b, 0x33, 0x1e, 0x10, 0xd1, 0x37, 0x20, 0x86, - 0x28, 0x43, 0x70, 0xd9, 0xfb, 0x96, 0xd9, 0xb5, 0xd3, }, - { 0xcb, 0x56, 0x74, 0x41, 0x8d, 0x80, 0x01, 0x9a, 0x6b, 0x38, 0xe1, 0x41, - 0xad, 0x9c, 0x62, 0x74, 0xce, 0x35, 0xd5, 0x6c, 0x89, 0x6e, }, - { 0x79, 0xaf, 0x94, 0x59, 0x99, 0x26, 0xe1, 0xc9, 0x34, 0xfe, 0x7c, 0x22, - 0xf7, 0x43, 0xd7, 0x65, 0xd4, 0x48, 0x18, 0xac, 0x3d, 0xfd, 0x93, }, - { 0x85, 0x0d, 0xff, 0xb8, 0x3e, 0x87, 0x41, 0xb0, 0x95, 0xd3, 0x3d, 0x00, - 0x47, 0x55, 0x9e, 0xd2, 0x69, 0xea, 0xbf, 0xe9, 0x7a, 0x2d, 0x61, 0x45, }, - { 0x03, 0xe0, 0x85, 0xec, 0x54, 0xb5, 0x16, 0x53, 0xa8, 0xc4, 0x71, 0xe9, - 0x6a, 0xe7, 0xcb, 0xc4, 0x15, 0x02, 0xfc, 0x34, 0xa4, 0xa4, 0x28, 0x13, - 0xd1, }, - { 0xe3, 0x34, 0x4b, 0xe1, 0xd0, 0x4b, 0x55, 0x61, 0x8f, 0xc0, 0x24, 0x05, - 0xe6, 0xe0, 0x3d, 0x70, 0x24, 0x4d, 0xda, 0xb8, 0x91, 0x05, 0x29, 0x07, - 0x01, 0x3e, }, - { 0x61, 0xff, 0x01, 0x72, 0xb1, 0x4d, 0xf6, 0xfe, 0xd1, 0xd1, 0x08, 0x74, - 0xe6, 0x91, 0x44, 0xeb, 0x61, 0xda, 0x40, 0xaf, 0xfc, 0x8c, 0x91, 0x6b, - 0xec, 0x13, 0xed, }, - { 0xd4, 0x40, 0xd2, 0xa0, 0x7f, 0xc1, 0x58, 0x0c, 0x85, 0xa0, 0x86, 0xc7, - 0x86, 0xb9, 0x61, 0xc9, 0xea, 0x19, 0x86, 0x1f, 0xab, 0x07, 0xce, 0x37, - 0x72, 0x67, 0x09, 0xfc, }, - { 0x9e, 0xf8, 0x18, 0x67, 0x93, 0x10, 0x9b, 0x39, 0x75, 0xe8, 0x8b, 0x38, - 0x82, 0x7d, 0xb8, 0xb7, 0xa5, 0xaf, 0xe6, 0x6a, 0x22, 0x5e, 0x1f, 0x9c, - 0x95, 0x29, 0x19, 0xf2, 0x4b, }, - { 0xc8, 0x62, 0x25, 0xf5, 0x98, 0xc9, 0xea, 0xe5, 0x29, 0x3a, 0xd3, 0x22, - 0xeb, 0xeb, 0x07, 0x7c, 0x15, 0x07, 0xee, 0x15, 0x61, 0xbb, 0x05, 0x30, - 0x99, 0x7f, 0x11, 0xf6, 0x0a, 0x1d, }, - { 0x68, 0x70, 0xf7, 0x90, 0xa1, 0x8b, 0x1f, 0x0f, 0xbb, 0xce, 0xd2, 0x0e, - 0x33, 0x1f, 0x7f, 0xa9, 0x78, 0xa8, 0xa6, 0x81, 0x66, 0xab, 0x8d, 0xcd, - 0x58, 0x55, 0x3a, 0x0b, 0x7a, 0xdb, 0xb5, }, - { 0xdd, 0x35, 0xd2, 0xb4, 0xf6, 0xc7, 0xea, 0xab, 0x64, 0x24, 0x4e, 0xfe, - 0xe5, 0x3d, 0x4e, 0x95, 0x8b, 0x6d, 0x6c, 0xbc, 0xb0, 0xf8, 0x88, 0x61, - 0x09, 0xb7, 0x78, 0xa3, 0x31, 0xfe, 0xd9, 0x2f, }, - { 0x0a, }, - { 0x6e, 0xd4, }, - { 0x64, 0xe9, 0xd1, }, - { 0x30, 0xdd, 0x71, 0xef, }, - { 0x11, 0xb5, 0x0c, 0x87, 0xc9, }, - { 0x06, 0x1c, 0x6d, 0x04, 0x82, 0xd0, }, - { 0x5c, 0x42, 0x0b, 0xee, 0xc5, 0x9c, 0xb2, }, - { 0xe8, 0x29, 0xd6, 0xb4, 0x5d, 0xf7, 0x2b, 0x93, }, - { 0x18, 0xca, 0x27, 0x72, 0x43, 0x39, 0x16, 0xbc, 0x6a, }, - { 0x39, 0x8f, 0xfd, 0x64, 0xf5, 0x57, 0x23, 0xb0, 0x45, 0xf8, }, - { 0xbb, 0x3a, 0x78, 0x6b, 0x02, 0x1d, 0x0b, 0x16, 0xe3, 0xb2, 0x9a, }, - { 0xb8, 0xb4, 0x0b, 0xe5, 0xd4, 0x1d, 0x0d, 0x85, 0x49, 0x91, 0x35, 0xfa, }, - { 0x6d, 0x48, 0x2a, 0x0c, 0x42, 0x08, 0xbd, 0xa9, 0x78, 0x6f, 0x18, 0xaf, - 0xe2, }, - { 0x10, 0x45, 0xd4, 0x58, 0x88, 0xec, 0x4e, 0x1e, 0xf6, 0x14, 0x92, 0x64, - 0x7e, 0xb0, }, - { 0x8b, 0x0b, 0x95, 0xee, 0x92, 0xc6, 0x3b, 0x91, 0xf1, 0x1e, 0xeb, 0x51, - 0x98, 0x0a, 0x8d, }, - { 0xa3, 0x50, 0x4d, 0xa5, 0x1d, 0x03, 0x68, 0xe9, 0x57, 0x78, 0xd6, 0x04, - 0xf1, 0xc3, 0x94, 0xd8, }, - { 0xb8, 0x66, 0x6e, 0xdd, 0x46, 0x15, 0xae, 0x3d, 0x83, 0x7e, 0xcf, 0xe7, - 0x2c, 0xe8, 0x8f, 0xc7, 0x34, }, - { 0x2e, 0xc0, 0x1f, 0x29, 0xea, 0xf6, 0xb9, 0xe2, 0xc2, 0x93, 0xeb, 0x41, - 0x0d, 0xf0, 0x0a, 0x13, 0x0e, 0xa2, }, - { 0x71, 0xb8, 0x33, 0xa9, 0x1b, 0xac, 0xf1, 0xb5, 0x42, 0x8f, 0x5e, 0x81, - 0x34, 0x43, 0xb7, 0xa4, 0x18, 0x5c, 0x47, }, - { 0xda, 0x45, 0xb8, 0x2e, 0x82, 0x1e, 0xc0, 0x59, 0x77, 0x9d, 0xfa, 0xb4, - 0x1c, 0x5e, 0xa0, 0x2b, 0x33, 0x96, 0x5a, 0x58, }, - { 0xe3, 0x09, 0x05, 0xa9, 0xeb, 0x48, 0x13, 0xad, 0x71, 0x88, 0x81, 0x9a, - 0x3e, 0x2c, 0xe1, 0x23, 0x99, 0x13, 0x35, 0x9f, 0xb5, }, - { 0xb7, 0x86, 0x2d, 0x16, 0xe1, 0x04, 0x00, 0x47, 0x47, 0x61, 0x31, 0xfb, - 0x14, 0xac, 0xd8, 0xe9, 0xe3, 0x49, 0xbd, 0xf7, 0x9c, 0x3f, }, - { 0x7f, 0xd9, 0x95, 0xa8, 0xa7, 0xa0, 0xcc, 0xba, 0xef, 0xb1, 0x0a, 0xa9, - 0x21, 0x62, 0x08, 0x0f, 0x1b, 0xff, 0x7b, 0x9d, 0xae, 0xb2, 0x95, }, - { 0x85, 0x99, 0xea, 0x33, 0xe0, 0x56, 0xff, 0x13, 0xc6, 0x61, 0x8c, 0xf9, - 0x57, 0x05, 0x03, 0x11, 0xf9, 0xfb, 0x3a, 0xf7, 0xce, 0xbb, 0x52, 0x30, }, - { 0xb2, 0x72, 0x9c, 0xf8, 0x77, 0x4e, 0x8f, 0x6b, 0x01, 0x6c, 0xff, 0x4e, - 0x4f, 0x02, 0xd2, 0xbc, 0xeb, 0x51, 0x28, 0x99, 0x50, 0xab, 0xc4, 0x42, - 0xe3, }, - { 0x8b, 0x0a, 0xb5, 0x90, 0x8f, 0xf5, 0x7b, 0xdd, 0xba, 0x47, 0x37, 0xc9, - 0x2a, 0xd5, 0x4b, 0x25, 0x08, 0x8b, 0x02, 0x17, 0xa7, 0x9e, 0x6b, 0x6e, - 0xe3, 0x90, }, - { 0x90, 0xdd, 0xf7, 0x75, 0xa7, 0xa3, 0x99, 0x5e, 0x5b, 0x7d, 0x75, 0xc3, - 0x39, 0x6b, 0xa0, 0xe2, 0x44, 0x53, 0xb1, 0x9e, 0xc8, 0xf1, 0x77, 0x10, - 0x58, 0x06, 0x9a, }, - { 0x99, 0x52, 0xf0, 0x49, 0xa8, 0x8c, 0xec, 0xa6, 0x97, 0x32, 0x13, 0xb5, - 0xf7, 0xa3, 0x8e, 0xfb, 0x4b, 0x59, 0x31, 0x3d, 0x01, 0x59, 0x98, 0x5d, - 0x53, 0x03, 0x1a, 0x39, }, - { 0x9f, 0xe0, 0xc2, 0xe5, 0x5d, 0x93, 0xd6, 0x9b, 0x47, 0x8f, 0x9b, 0xe0, - 0x26, 0x35, 0x84, 0x20, 0x1d, 0xc5, 0x53, 0x10, 0x0f, 0x22, 0xb9, 0xb5, - 0xd4, 0x36, 0xb1, 0xac, 0x73, }, - { 0x30, 0x32, 0x20, 0x3b, 0x10, 0x28, 0xec, 0x1f, 0x4f, 0x9b, 0x47, 0x59, - 0xeb, 0x7b, 0xee, 0x45, 0xfb, 0x0c, 0x49, 0xd8, 0x3d, 0x69, 0xbd, 0x90, - 0x2c, 0xf0, 0x9e, 0x8d, 0xbf, 0xd5, }, - { 0x2a, 0x37, 0x73, 0x7f, 0xf9, 0x96, 0x19, 0xaa, 0x25, 0xd8, 0x13, 0x28, - 0x01, 0x29, 0x89, 0xdf, 0x6e, 0x0c, 0x9b, 0x43, 0x44, 0x51, 0xe9, 0x75, - 0x26, 0x0c, 0xb7, 0x87, 0x66, 0x0b, 0x5f, }, - { 0x23, 0xdf, 0x96, 0x68, 0x91, 0x86, 0xd0, 0x93, 0x55, 0x33, 0x24, 0xf6, - 0xba, 0x08, 0x75, 0x5b, 0x59, 0x11, 0x69, 0xb8, 0xb9, 0xe5, 0x2c, 0x77, - 0x02, 0xf6, 0x47, 0xee, 0x81, 0xdd, 0xb9, 0x06, }, - { 0x9d, }, - { 0x9d, 0x7d, }, - { 0xfd, 0xc3, 0xda, }, - { 0xe8, 0x82, 0xcd, 0x21, }, - { 0xc3, 0x1d, 0x42, 0x4c, 0x74, }, - { 0xe9, 0xda, 0xf1, 0xa2, 0xe5, 0x7c, }, - { 0x52, 0xb8, 0x6f, 0x81, 0x5c, 0x3a, 0x4c, }, - { 0x5b, 0x39, 0x26, 0xfc, 0x92, 0x5e, 0xe0, 0x49, }, - { 0x59, 0xe4, 0x7c, 0x93, 0x1c, 0xf9, 0x28, 0x93, 0xde, }, - { 0xde, 0xdf, 0xb2, 0x43, 0x61, 0x0b, 0x86, 0x16, 0x4c, 0x2e, }, - { 0x14, 0x8f, 0x75, 0x51, 0xaf, 0xb9, 0xee, 0x51, 0x5a, 0xae, 0x23, }, - { 0x43, 0x5f, 0x50, 0xd5, 0x70, 0xb0, 0x5b, 0x87, 0xf5, 0xd9, 0xb3, 0x6d, }, - { 0x66, 0x0a, 0x64, 0x93, 0x79, 0x71, 0x94, 0x40, 0xb7, 0x68, 0x2d, 0xd3, - 0x63, }, - { 0x15, 0x00, 0xc4, 0x0c, 0x7d, 0x1b, 0x10, 0xa9, 0x73, 0x1b, 0x90, 0x6f, - 0xe6, 0xa9, }, - { 0x34, 0x75, 0xf3, 0x86, 0x8f, 0x56, 0xcf, 0x2a, 0x0a, 0xf2, 0x62, 0x0a, - 0xf6, 0x0e, 0x20, }, - { 0xb1, 0xde, 0xc9, 0xf5, 0xdb, 0xf3, 0x2f, 0x4c, 0xd6, 0x41, 0x7d, 0x39, - 0x18, 0x3e, 0xc7, 0xc3, }, - { 0xc5, 0x89, 0xb2, 0xf8, 0xb8, 0xc0, 0xa3, 0xb9, 0x3b, 0x10, 0x6d, 0x7c, - 0x92, 0xfc, 0x7f, 0x34, 0x41, }, - { 0xc4, 0xd8, 0xef, 0xba, 0xef, 0xd2, 0xaa, 0xc5, 0x6c, 0x8e, 0x3e, 0xbb, - 0x12, 0xfc, 0x0f, 0x72, 0xbf, 0x0f, }, - { 0xdd, 0x91, 0xd1, 0x15, 0x9e, 0x7d, 0xf8, 0xc1, 0xb9, 0x14, 0x63, 0x96, - 0xb5, 0xcb, 0x83, 0x1d, 0x35, 0x1c, 0xec, }, - { 0xa9, 0xf8, 0x52, 0xc9, 0x67, 0x76, 0x2b, 0xad, 0xfb, 0xd8, 0x3a, 0xa6, - 0x74, 0x02, 0xae, 0xb8, 0x25, 0x2c, 0x63, 0x49, }, - { 0x77, 0x1f, 0x66, 0x70, 0xfd, 0x50, 0x29, 0xaa, 0xeb, 0xdc, 0xee, 0xba, - 0x75, 0x98, 0xdc, 0x93, 0x12, 0x3f, 0xdc, 0x7c, 0x38, }, - { 0xe2, 0xe1, 0x89, 0x5c, 0x37, 0x38, 0x6a, 0xa3, 0x40, 0xac, 0x3f, 0xb0, - 0xca, 0xfc, 0xa7, 0xf3, 0xea, 0xf9, 0x0f, 0x5d, 0x8e, 0x39, }, - { 0x0f, 0x67, 0xc8, 0x38, 0x01, 0xb1, 0xb7, 0xb8, 0xa2, 0xe7, 0x0a, 0x6d, - 0xd2, 0x63, 0x69, 0x9e, 0xcc, 0xf0, 0xf2, 0xbe, 0x9b, 0x98, 0xdd, }, - { 0x13, 0xe1, 0x36, 0x30, 0xfe, 0xc6, 0x01, 0x8a, 0xa1, 0x63, 0x96, 0x59, - 0xc2, 0xa9, 0x68, 0x3f, 0x58, 0xd4, 0x19, 0x0c, 0x40, 0xf3, 0xde, 0x02, }, - { 0xa3, 0x9e, 0xce, 0xda, 0x42, 0xee, 0x8c, 0x6c, 0x5a, 0x7d, 0xdc, 0x89, - 0x02, 0x77, 0xdd, 0xe7, 0x95, 0xbb, 0xff, 0x0d, 0xa4, 0xb5, 0x38, 0x1e, - 0xaf, }, - { 0x9a, 0xf6, 0xb5, 0x9a, 0x4f, 0xa9, 0x4f, 0x2c, 0x35, 0x3c, 0x24, 0xdc, - 0x97, 0x6f, 0xd9, 0xa1, 0x7d, 0x1a, 0x85, 0x0b, 0xf5, 0xda, 0x2e, 0xe7, - 0xb1, 0x1d, }, - { 0x84, 0x1e, 0x8e, 0x3d, 0x45, 0xa5, 0xf2, 0x27, 0xf3, 0x31, 0xfe, 0xb9, - 0xfb, 0xc5, 0x45, 0x99, 0x99, 0xdd, 0x93, 0x43, 0x02, 0xee, 0x58, 0xaf, - 0xee, 0x6a, 0xbe, }, - { 0x07, 0x2f, 0xc0, 0xa2, 0x04, 0xc4, 0xab, 0x7c, 0x26, 0xbb, 0xa8, 0xd8, - 0xe3, 0x1c, 0x75, 0x15, 0x64, 0x5d, 0x02, 0x6a, 0xf0, 0x86, 0xe9, 0xcd, - 0x5c, 0xef, 0xa3, 0x25, }, - { 0x2f, 0x3b, 0x1f, 0xb5, 0x91, 0x8f, 0x86, 0xe0, 0xdc, 0x31, 0x48, 0xb6, - 0xa1, 0x8c, 0xfd, 0x75, 0xbb, 0x7d, 0x3d, 0xc1, 0xf0, 0x10, 0x9a, 0xd8, - 0x4b, 0x0e, 0xe3, 0x94, 0x9f, }, - { 0x29, 0xbb, 0x8f, 0x6c, 0xd1, 0xf2, 0xb6, 0xaf, 0xe5, 0xe3, 0x2d, 0xdc, - 0x6f, 0xa4, 0x53, 0x88, 0xd8, 0xcf, 0x4d, 0x45, 0x42, 0x62, 0xdb, 0xdf, - 0xf8, 0x45, 0xc2, 0x13, 0xec, 0x35, }, - { 0x06, 0x3c, 0xe3, 0x2c, 0x15, 0xc6, 0x43, 0x03, 0x81, 0xfb, 0x08, 0x76, - 0x33, 0xcb, 0x02, 0xc1, 0xba, 0x33, 0xe5, 0xe0, 0xd1, 0x92, 0xa8, 0x46, - 0x28, 0x3f, 0x3e, 0x9d, 0x2c, 0x44, 0x54, }, - { 0xea, 0xbb, 0x96, 0xf8, 0xd1, 0x8b, 0x04, 0x11, 0x40, 0x78, 0x42, 0x02, - 0x19, 0xd1, 0xbc, 0x65, 0x92, 0xd3, 0xc3, 0xd6, 0xd9, 0x19, 0xe7, 0xc3, - 0x40, 0x97, 0xbd, 0xd4, 0xed, 0xfa, 0x5e, 0x28, }, - { 0x02, }, - { 0x52, 0xa8, }, - { 0x38, 0x25, 0x0d, }, - { 0xe3, 0x04, 0xd4, 0x92, }, - { 0x97, 0xdb, 0xf7, 0x81, 0xca, }, - { 0x8a, 0x56, 0x9d, 0x62, 0x56, 0xcc, }, - { 0xa1, 0x8e, 0x3c, 0x72, 0x8f, 0x63, 0x03, }, - { 0xf7, 0xf3, 0x39, 0x09, 0x0a, 0xa1, 0xbb, 0x23, }, - { 0x6b, 0x03, 0xc0, 0xe9, 0xd9, 0x83, 0x05, 0x22, 0x01, }, - { 0x1b, 0x4b, 0xf5, 0xd6, 0x4f, 0x05, 0x75, 0x91, 0x4c, 0x7f, }, - { 0x4c, 0x8c, 0x25, 0x20, 0x21, 0xcb, 0xc2, 0x4b, 0x3a, 0x5b, 0x8d, }, - { 0x56, 0xe2, 0x77, 0xa0, 0xb6, 0x9f, 0x81, 0xec, 0x83, 0x75, 0xc4, 0xf9, }, - { 0x71, 0x70, 0x0f, 0xad, 0x4d, 0x35, 0x81, 0x9d, 0x88, 0x69, 0xf9, 0xaa, - 0xd3, }, - { 0x50, 0x6e, 0x86, 0x6e, 0x43, 0xc0, 0xc2, 0x44, 0xc2, 0xe2, 0xa0, 0x1c, - 0xb7, 0x9a, }, - { 0xe4, 0x7e, 0x72, 0xc6, 0x12, 0x8e, 0x7c, 0xfc, 0xbd, 0xe2, 0x08, 0x31, - 0x3d, 0x47, 0x3d, }, - { 0x08, 0x97, 0x5b, 0x80, 0xae, 0xc4, 0x1d, 0x50, 0x77, 0xdf, 0x1f, 0xd0, - 0x24, 0xf0, 0x17, 0xc0, }, - { 0x01, 0xb6, 0x29, 0xf4, 0xaf, 0x78, 0x5f, 0xb6, 0x91, 0xdd, 0x76, 0x76, - 0xd2, 0xfd, 0x0c, 0x47, 0x40, }, - { 0xa1, 0xd8, 0x09, 0x97, 0x7a, 0xa6, 0xc8, 0x94, 0xf6, 0x91, 0x7b, 0xae, - 0x2b, 0x9f, 0x0d, 0x83, 0x48, 0xf7, }, - { 0x12, 0xd5, 0x53, 0x7d, 0x9a, 0xb0, 0xbe, 0xd9, 0xed, 0xe9, 0x9e, 0xee, - 0x61, 0x5b, 0x42, 0xf2, 0xc0, 0x73, 0xc0, }, - { 0xd5, 0x77, 0xd6, 0x5c, 0x6e, 0xa5, 0x69, 0x2b, 0x3b, 0x8c, 0xd6, 0x7d, - 0x1d, 0xbe, 0x2c, 0xa1, 0x02, 0x21, 0xcd, 0x29, }, - { 0xa4, 0x98, 0x80, 0xca, 0x22, 0xcf, 0x6a, 0xab, 0x5e, 0x40, 0x0d, 0x61, - 0x08, 0x21, 0xef, 0xc0, 0x6c, 0x52, 0xb4, 0xb0, 0x53, }, - { 0xbf, 0xaf, 0x8f, 0x3b, 0x7a, 0x97, 0x33, 0xe5, 0xca, 0x07, 0x37, 0xfd, - 0x15, 0xdf, 0xce, 0x26, 0x2a, 0xb1, 0xa7, 0x0b, 0xb3, 0xac, }, - { 0x16, 0x22, 0xe1, 0xbc, 0x99, 0x4e, 0x01, 0xf0, 0xfa, 0xff, 0x8f, 0xa5, - 0x0c, 0x61, 0xb0, 0xad, 0xcc, 0xb1, 0xe1, 0x21, 0x46, 0xfa, 0x2e, }, - { 0x11, 0x5b, 0x0b, 0x2b, 0xe6, 0x14, 0xc1, 0xd5, 0x4d, 0x71, 0x5e, 0x17, - 0xea, 0x23, 0xdd, 0x6c, 0xbd, 0x1d, 0xbe, 0x12, 0x1b, 0xee, 0x4c, 0x1a, }, - { 0x40, 0x88, 0x22, 0xf3, 0x20, 0x6c, 0xed, 0xe1, 0x36, 0x34, 0x62, 0x2c, - 0x98, 0x83, 0x52, 0xe2, 0x25, 0xee, 0xe9, 0xf5, 0xe1, 0x17, 0xf0, 0x5c, - 0xae, }, - { 0xc3, 0x76, 0x37, 0xde, 0x95, 0x8c, 0xca, 0x2b, 0x0c, 0x23, 0xe7, 0xb5, - 0x38, 0x70, 0x61, 0xcc, 0xff, 0xd3, 0x95, 0x7b, 0xf3, 0xff, 0x1f, 0x9d, - 0x59, 0x00, }, - { 0x0c, 0x19, 0x52, 0x05, 0x22, 0x53, 0xcb, 0x48, 0xd7, 0x10, 0x0e, 0x7e, - 0x14, 0x69, 0xb5, 0xa2, 0x92, 0x43, 0xa3, 0x9e, 0x4b, 0x8f, 0x51, 0x2c, - 0x5a, 0x2c, 0x3b, }, - { 0xe1, 0x9d, 0x70, 0x70, 0x28, 0xec, 0x86, 0x40, 0x55, 0x33, 0x56, 0xda, - 0x88, 0xca, 0xee, 0xc8, 0x6a, 0x20, 0xb1, 0xe5, 0x3d, 0x57, 0xf8, 0x3c, - 0x10, 0x07, 0x2a, 0xc4, }, - { 0x0b, 0xae, 0xf1, 0xc4, 0x79, 0xee, 0x1b, 0x3d, 0x27, 0x35, 0x8d, 0x14, - 0xd6, 0xae, 0x4e, 0x3c, 0xe9, 0x53, 0x50, 0xb5, 0xcc, 0x0c, 0xf7, 0xdf, - 0xee, 0xa1, 0x74, 0xd6, 0x71, }, - { 0xe6, 0xa4, 0xf4, 0x99, 0x98, 0xb9, 0x80, 0xea, 0x96, 0x7f, 0x4f, 0x33, - 0xcf, 0x74, 0x25, 0x6f, 0x17, 0x6c, 0xbf, 0xf5, 0x5c, 0x38, 0xd0, 0xff, - 0x96, 0xcb, 0x13, 0xf9, 0xdf, 0xfd, }, - { 0xbe, 0x92, 0xeb, 0xba, 0x44, 0x2c, 0x24, 0x74, 0xd4, 0x03, 0x27, 0x3c, - 0x5d, 0x5b, 0x03, 0x30, 0x87, 0x63, 0x69, 0xe0, 0xb8, 0x94, 0xf4, 0x44, - 0x7e, 0xad, 0xcd, 0x20, 0x12, 0x16, 0x79, }, - { 0x30, 0xf1, 0xc4, 0x8e, 0x05, 0x90, 0x2a, 0x97, 0x63, 0x94, 0x46, 0xff, - 0xce, 0xd8, 0x67, 0xa7, 0xac, 0x33, 0x8c, 0x95, 0xb7, 0xcd, 0xa3, 0x23, - 0x98, 0x9d, 0x76, 0x6c, 0x9d, 0xa8, 0xd6, 0x8a, }, - { 0xbe, }, - { 0x17, 0x6c, }, - { 0x1a, 0x42, 0x4f, }, - { 0xba, 0xaf, 0xb7, 0x65, }, - { 0xc2, 0x63, 0x43, 0x6a, 0xea, }, - { 0xe4, 0x4d, 0xad, 0xf2, 0x0b, 0x02, }, - { 0x04, 0xc7, 0xc4, 0x7f, 0xa9, 0x2b, 0xce, }, - { 0x66, 0xf6, 0x67, 0xcb, 0x03, 0x53, 0xc8, 0xf1, }, - { 0x56, 0xa3, 0x60, 0x78, 0xc9, 0x5f, 0x70, 0x1b, 0x5e, }, - { 0x99, 0xff, 0x81, 0x7c, 0x13, 0x3c, 0x29, 0x79, 0x4b, 0x65, }, - { 0x51, 0x10, 0x50, 0x93, 0x01, 0x93, 0xb7, 0x01, 0xc9, 0x18, 0xb7, }, - { 0x8e, 0x3c, 0x42, 0x1e, 0x5e, 0x7d, 0xc1, 0x50, 0x70, 0x1f, 0x00, 0x98, }, - { 0x5f, 0xd9, 0x9b, 0xc8, 0xd7, 0xb2, 0x72, 0x62, 0x1a, 0x1e, 0xba, 0x92, - 0xe9, }, - { 0x70, 0x2b, 0xba, 0xfe, 0xad, 0x5d, 0x96, 0x3f, 0x27, 0xc2, 0x41, 0x6d, - 0xc4, 0xb3, }, - { 0xae, 0xe0, 0xd5, 0xd4, 0xc7, 0xae, 0x15, 0x5e, 0xdc, 0xdd, 0x33, 0x60, - 0xd7, 0xd3, 0x5e, }, - { 0x79, 0x8e, 0xbc, 0x9e, 0x20, 0xb9, 0x19, 0x4b, 0x63, 0x80, 0xf3, 0x16, - 0xaf, 0x39, 0xbd, 0x92, }, - { 0xc2, 0x0e, 0x85, 0xa0, 0x0b, 0x9a, 0xb0, 0xec, 0xde, 0x38, 0xd3, 0x10, - 0xd9, 0xa7, 0x66, 0x27, 0xcf, }, - { 0x0e, 0x3b, 0x75, 0x80, 0x67, 0x14, 0x0c, 0x02, 0x90, 0xd6, 0xb3, 0x02, - 0x81, 0xf6, 0xa6, 0x87, 0xce, 0x58, }, - { 0x79, 0xb5, 0xe9, 0x5d, 0x52, 0x4d, 0xf7, 0x59, 0xf4, 0x2e, 0x27, 0xdd, - 0xb3, 0xed, 0x57, 0x5b, 0x82, 0xea, 0x6f, }, - { 0xa2, 0x97, 0xf5, 0x80, 0x02, 0x3d, 0xde, 0xa3, 0xf9, 0xf6, 0xab, 0xe3, - 0x57, 0x63, 0x7b, 0x9b, 0x10, 0x42, 0x6f, 0xf2, }, - { 0x12, 0x7a, 0xfc, 0xb7, 0x67, 0x06, 0x0c, 0x78, 0x1a, 0xfe, 0x88, 0x4f, - 0xc6, 0xac, 0x52, 0x96, 0x64, 0x28, 0x97, 0x84, 0x06, }, - { 0xc5, 0x04, 0x44, 0x6b, 0xb2, 0xa5, 0xa4, 0x66, 0xe1, 0x76, 0xa2, 0x51, - 0xf9, 0x59, 0x69, 0x97, 0x56, 0x0b, 0xbf, 0x50, 0xb3, 0x34, }, - { 0x21, 0x32, 0x6b, 0x42, 0xb5, 0xed, 0x71, 0x8d, 0xf7, 0x5a, 0x35, 0xe3, - 0x90, 0xe2, 0xee, 0xaa, 0x89, 0xf6, 0xc9, 0x9c, 0x4d, 0x73, 0xf4, }, - { 0x4c, 0xa6, 0x09, 0xf4, 0x48, 0xe7, 0x46, 0xbc, 0x49, 0xfc, 0xe5, 0xda, - 0xd1, 0x87, 0x13, 0x17, 0x4c, 0x59, 0x71, 0x26, 0x5b, 0x2c, 0x42, 0xb7, }, - { 0x13, 0x63, 0xf3, 0x40, 0x02, 0xe5, 0xa3, 0x3a, 0x5e, 0x8e, 0xf8, 0xb6, - 0x8a, 0x49, 0x60, 0x76, 0x34, 0x72, 0x94, 0x73, 0xf6, 0xd9, 0x21, 0x6a, - 0x26, }, - { 0xdf, 0x75, 0x16, 0x10, 0x1b, 0x5e, 0x81, 0xc3, 0xc8, 0xde, 0x34, 0x24, - 0xb0, 0x98, 0xeb, 0x1b, 0x8f, 0xa1, 0x9b, 0x05, 0xee, 0xa5, 0xe9, 0x35, - 0xf4, 0x1d, }, - { 0xcd, 0x21, 0x93, 0x6e, 0x5b, 0xa0, 0x26, 0x2b, 0x21, 0x0e, 0xa0, 0xb9, - 0x1c, 0xb5, 0xbb, 0xb8, 0xf8, 0x1e, 0xff, 0x5c, 0xa8, 0xf9, 0x39, 0x46, - 0x4e, 0x29, 0x26, }, - { 0x73, 0x7f, 0x0e, 0x3b, 0x0b, 0x5c, 0xf9, 0x60, 0xaa, 0x88, 0xa1, 0x09, - 0xb1, 0x5d, 0x38, 0x7b, 0x86, 0x8f, 0x13, 0x7a, 0x8d, 0x72, 0x7a, 0x98, - 0x1a, 0x5b, 0xff, 0xc9, }, - { 0xd3, 0x3c, 0x61, 0x71, 0x44, 0x7e, 0x31, 0x74, 0x98, 0x9d, 0x9a, 0xd2, - 0x27, 0xf3, 0x46, 0x43, 0x42, 0x51, 0xd0, 0x5f, 0xe9, 0x1c, 0x5c, 0x69, - 0xbf, 0xf6, 0xbe, 0x3c, 0x40, }, - { 0x31, 0x99, 0x31, 0x9f, 0xaa, 0x43, 0x2e, 0x77, 0x3e, 0x74, 0x26, 0x31, - 0x5e, 0x61, 0xf1, 0x87, 0xe2, 0xeb, 0x9b, 0xcd, 0xd0, 0x3a, 0xee, 0x20, - 0x7e, 0x10, 0x0a, 0x0b, 0x7e, 0xfa, }, - { 0xa4, 0x27, 0x80, 0x67, 0x81, 0x2a, 0xa7, 0x62, 0xf7, 0x6e, 0xda, 0xd4, - 0x5c, 0x39, 0x74, 0xad, 0x7e, 0xbe, 0xad, 0xa5, 0x84, 0x7f, 0xa9, 0x30, - 0x5d, 0xdb, 0xe2, 0x05, 0x43, 0xf7, 0x1b, }, - { 0x0b, 0x37, 0xd8, 0x02, 0xe1, 0x83, 0xd6, 0x80, 0xf2, 0x35, 0xc2, 0xb0, - 0x37, 0xef, 0xef, 0x5e, 0x43, 0x93, 0xf0, 0x49, 0x45, 0x0a, 0xef, 0xb5, - 0x76, 0x70, 0x12, 0x44, 0xc4, 0xdb, 0xf5, 0x7a, }, - { 0x1f, }, - { 0x82, 0x60, }, - { 0xcc, 0xe3, 0x08, }, - { 0x56, 0x17, 0xe4, 0x59, }, - { 0xe2, 0xd7, 0x9e, 0xc4, 0x4c, }, - { 0xb2, 0xad, 0xd3, 0x78, 0x58, 0x5a, }, - { 0xce, 0x43, 0xb4, 0x02, 0x96, 0xab, 0x3c, }, - { 0xe6, 0x05, 0x1a, 0x73, 0x22, 0x32, 0xbb, 0x77, }, - { 0x23, 0xe7, 0xda, 0xfe, 0x2c, 0xef, 0x8c, 0x22, 0xec, }, - { 0xe9, 0x8e, 0x55, 0x38, 0xd1, 0xd7, 0x35, 0x23, 0x98, 0xc7, }, - { 0xb5, 0x81, 0x1a, 0xe5, 0xb5, 0xa5, 0xd9, 0x4d, 0xca, 0x41, 0xe7, }, - { 0x41, 0x16, 0x16, 0x95, 0x8d, 0x9e, 0x0c, 0xea, 0x8c, 0x71, 0x9a, 0xc1, }, - { 0x7c, 0x33, 0xc0, 0xa4, 0x00, 0x62, 0xea, 0x60, 0x67, 0xe4, 0x20, 0xbc, - 0x5b, }, - { 0xdb, 0xb1, 0xdc, 0xfd, 0x08, 0xc0, 0xde, 0x82, 0xd1, 0xde, 0x38, 0xc0, - 0x90, 0x48, }, - { 0x37, 0x18, 0x2e, 0x0d, 0x61, 0xaa, 0x61, 0xd7, 0x86, 0x20, 0x16, 0x60, - 0x04, 0xd9, 0xd5, }, - { 0xb0, 0xcf, 0x2c, 0x4c, 0x5e, 0x5b, 0x4f, 0x2a, 0x23, 0x25, 0x58, 0x47, - 0xe5, 0x31, 0x06, 0x70, }, - { 0x91, 0xa0, 0xa3, 0x86, 0x4e, 0xe0, 0x72, 0x38, 0x06, 0x67, 0x59, 0x5c, - 0x70, 0x25, 0xdb, 0x33, 0x27, }, - { 0x44, 0x58, 0x66, 0xb8, 0x58, 0xc7, 0x13, 0xed, 0x4c, 0xc0, 0xf4, 0x9a, - 0x1e, 0x67, 0x75, 0x33, 0xb6, 0xb8, }, - { 0x7f, 0x98, 0x4a, 0x8e, 0x50, 0xa2, 0x5c, 0xcd, 0x59, 0xde, 0x72, 0xb3, - 0x9d, 0xc3, 0x09, 0x8a, 0xab, 0x56, 0xf1, }, - { 0x80, 0x96, 0x49, 0x1a, 0x59, 0xa2, 0xc5, 0xd5, 0xa7, 0x20, 0x8a, 0xb7, - 0x27, 0x62, 0x84, 0x43, 0xc6, 0xe1, 0x1b, 0x5d, }, - { 0x6b, 0xb7, 0x2b, 0x26, 0x62, 0x14, 0x70, 0x19, 0x3d, 0x4d, 0xac, 0xac, - 0x63, 0x58, 0x5e, 0x94, 0xb5, 0xb7, 0xe8, 0xe8, 0xa2, }, - { 0x20, 0xa8, 0xc0, 0xfd, 0x63, 0x3d, 0x6e, 0x98, 0xcf, 0x0c, 0x49, 0x98, - 0xe4, 0x5a, 0xfe, 0x8c, 0xaa, 0x70, 0x82, 0x1c, 0x7b, 0x74, }, - { 0xc8, 0xe8, 0xdd, 0xdf, 0x69, 0x30, 0x01, 0xc2, 0x0f, 0x7e, 0x2f, 0x11, - 0xcc, 0x3e, 0x17, 0xa5, 0x69, 0x40, 0x3f, 0x0e, 0x79, 0x7f, 0xcf, }, - { 0xdb, 0x61, 0xc0, 0xe2, 0x2e, 0x49, 0x07, 0x31, 0x1d, 0x91, 0x42, 0x8a, - 0xfc, 0x5e, 0xd3, 0xf8, 0x56, 0x1f, 0x2b, 0x73, 0xfd, 0x9f, 0xb2, 0x8e, }, - { 0x0c, 0x89, 0x55, 0x0c, 0x1f, 0x59, 0x2c, 0x9d, 0x1b, 0x29, 0x1d, 0x41, - 0x1d, 0xe6, 0x47, 0x8f, 0x8c, 0x2b, 0xea, 0x8f, 0xf0, 0xff, 0x21, 0x70, - 0x88, }, - { 0x12, 0x18, 0x95, 0xa6, 0x59, 0xb1, 0x31, 0x24, 0x45, 0x67, 0x55, 0xa4, - 0x1a, 0x2d, 0x48, 0x67, 0x1b, 0x43, 0x88, 0x2d, 0x8e, 0xa0, 0x70, 0xb3, - 0xc6, 0xbb, }, - { 0xe7, 0xb1, 0x1d, 0xb2, 0x76, 0x4d, 0x68, 0x68, 0x68, 0x23, 0x02, 0x55, - 0x3a, 0xe2, 0xe5, 0xd5, 0x4b, 0x43, 0xf9, 0x34, 0x77, 0x5c, 0xa1, 0xf5, - 0x55, 0xfd, 0x4f, }, - { 0x8c, 0x87, 0x5a, 0x08, 0x3a, 0x73, 0xad, 0x61, 0xe1, 0xe7, 0x99, 0x7e, - 0xf0, 0x5d, 0xe9, 0x5d, 0x16, 0x43, 0x80, 0x2f, 0xd0, 0x66, 0x34, 0xe2, - 0x42, 0x64, 0x3b, 0x1a, }, - { 0x39, 0xc1, 0x99, 0xcf, 0x22, 0xbf, 0x16, 0x8f, 0x9f, 0x80, 0x7f, 0x95, - 0x0a, 0x05, 0x67, 0x27, 0xe7, 0x15, 0xdf, 0x9d, 0xb2, 0xfe, 0x1c, 0xb5, - 0x1d, 0x60, 0x8f, 0x8a, 0x1d, }, - { 0x9b, 0x6e, 0x08, 0x09, 0x06, 0x73, 0xab, 0x68, 0x02, 0x62, 0x1a, 0xe4, - 0xd4, 0xdf, 0xc7, 0x02, 0x4c, 0x6a, 0x5f, 0xfd, 0x23, 0xac, 0xae, 0x6d, - 0x43, 0xa4, 0x7a, 0x50, 0x60, 0x3c, }, - { 0x1d, 0xb4, 0xc6, 0xe1, 0xb1, 0x4b, 0xe3, 0xf2, 0xe2, 0x1a, 0x73, 0x1b, - 0xa0, 0x92, 0xa7, 0xf5, 0xff, 0x8f, 0x8b, 0x5d, 0xdf, 0xa8, 0x04, 0xb3, - 0xb0, 0xf7, 0xcc, 0x12, 0xfa, 0x35, 0x46, }, - { 0x49, 0x45, 0x97, 0x11, 0x0f, 0x1c, 0x60, 0x8e, 0xe8, 0x47, 0x30, 0xcf, - 0x60, 0xa8, 0x71, 0xc5, 0x1b, 0xe9, 0x39, 0x4d, 0x49, 0xb6, 0x12, 0x1f, - 0x24, 0xab, 0x37, 0xff, 0x83, 0xc2, 0xe1, 0x3a, }, - { 0x60, }, - { 0x24, 0x26, }, - { 0x47, 0xeb, 0xc9, }, - { 0x4a, 0xd0, 0xbc, 0xf0, }, - { 0x8e, 0x2b, 0xc9, 0x85, 0x3c, }, - { 0xa2, 0x07, 0x15, 0xb8, 0x12, 0x74, }, - { 0x0f, 0xdb, 0x5b, 0x33, 0x69, 0xfe, 0x4b, }, - { 0xa2, 0x86, 0x54, 0xf4, 0xfd, 0xb2, 0xd4, 0xe6, }, - { 0xbb, 0x84, 0x78, 0x49, 0x27, 0x8e, 0x61, 0xda, 0x60, }, - { 0x04, 0xc3, 0xcd, 0xaa, 0x8f, 0xa7, 0x03, 0xc9, 0xf9, 0xb6, }, - { 0xf8, 0x27, 0x1d, 0x61, 0xdc, 0x21, 0x42, 0xdd, 0xad, 0x92, 0x40, }, - { 0x12, 0x87, 0xdf, 0xc2, 0x41, 0x45, 0x5a, 0x36, 0x48, 0x5b, 0x51, 0x2b, }, - { 0xbb, 0x37, 0x5d, 0x1f, 0xf1, 0x68, 0x7a, 0xc4, 0xa5, 0xd2, 0xa4, 0x91, - 0x8d, }, - { 0x5b, 0x27, 0xd1, 0x04, 0x54, 0x52, 0x9f, 0xa3, 0x47, 0x86, 0x33, 0x33, - 0xbf, 0xa0, }, - { 0xcf, 0x04, 0xea, 0xf8, 0x03, 0x2a, 0x43, 0xff, 0xa6, 0x68, 0x21, 0x4c, - 0xd5, 0x4b, 0xed, }, - { 0xaf, 0xb8, 0xbc, 0x63, 0x0f, 0x18, 0x4d, 0xe2, 0x7a, 0xdd, 0x46, 0x44, - 0xc8, 0x24, 0x0a, 0xb7, }, - { 0x3e, 0xdc, 0x36, 0xe4, 0x89, 0xb1, 0xfa, 0xc6, 0x40, 0x93, 0x2e, 0x75, - 0xb2, 0x15, 0xd1, 0xb1, 0x10, }, - { 0x6c, 0xd8, 0x20, 0x3b, 0x82, 0x79, 0xf9, 0xc8, 0xbc, 0x9d, 0xe0, 0x35, - 0xbe, 0x1b, 0x49, 0x1a, 0xbc, 0x3a, }, - { 0x78, 0x65, 0x2c, 0xbe, 0x35, 0x67, 0xdc, 0x78, 0xd4, 0x41, 0xf6, 0xc9, - 0xde, 0xde, 0x1f, 0x18, 0x13, 0x31, 0x11, }, - { 0x8a, 0x7f, 0xb1, 0x33, 0x8f, 0x0c, 0x3c, 0x0a, 0x06, 0x61, 0xf0, 0x47, - 0x29, 0x1b, 0x29, 0xbc, 0x1c, 0x47, 0xef, 0x7a, }, - { 0x65, 0x91, 0xf1, 0xe6, 0xb3, 0x96, 0xd3, 0x8c, 0xc2, 0x4a, 0x59, 0x35, - 0x72, 0x8e, 0x0b, 0x9a, 0x87, 0xca, 0x34, 0x7b, 0x63, }, - { 0x5f, 0x08, 0x87, 0x80, 0x56, 0x25, 0x89, 0x77, 0x61, 0x8c, 0x64, 0xa1, - 0x59, 0x6d, 0x59, 0x62, 0xe8, 0x4a, 0xc8, 0x58, 0x99, 0xd1, }, - { 0x23, 0x87, 0x1d, 0xed, 0x6f, 0xf2, 0x91, 0x90, 0xe2, 0xfe, 0x43, 0x21, - 0xaf, 0x97, 0xc6, 0xbc, 0xd7, 0x15, 0xc7, 0x2d, 0x08, 0x77, 0x91, }, - { 0x90, 0x47, 0x9a, 0x9e, 0x3a, 0xdf, 0xf3, 0xc9, 0x4c, 0x1e, 0xa7, 0xd4, - 0x6a, 0x32, 0x90, 0xfe, 0xb7, 0xb6, 0x7b, 0xfa, 0x96, 0x61, 0xfb, 0xa4, }, - { 0xb1, 0x67, 0x60, 0x45, 0xb0, 0x96, 0xc5, 0x15, 0x9f, 0x4d, 0x26, 0xd7, - 0x9d, 0xf1, 0xf5, 0x6d, 0x21, 0x00, 0x94, 0x31, 0x64, 0x94, 0xd3, 0xa7, - 0xd3, }, - { 0x02, 0x3e, 0xaf, 0xf3, 0x79, 0x73, 0xa5, 0xf5, 0xcc, 0x7a, 0x7f, 0xfb, - 0x79, 0x2b, 0x85, 0x8c, 0x88, 0x72, 0x06, 0xbe, 0xfe, 0xaf, 0xc1, 0x16, - 0xa6, 0xd6, }, - { 0x2a, 0xb0, 0x1a, 0xe5, 0xaa, 0x6e, 0xb3, 0xae, 0x53, 0x85, 0x33, 0x80, - 0x75, 0xae, 0x30, 0xe6, 0xb8, 0x72, 0x42, 0xf6, 0x25, 0x4f, 0x38, 0x88, - 0x55, 0xd1, 0xa9, }, - { 0x90, 0xd8, 0x0c, 0xc0, 0x93, 0x4b, 0x4f, 0x9e, 0x65, 0x6c, 0xa1, 0x54, - 0xa6, 0xf6, 0x6e, 0xca, 0xd2, 0xbb, 0x7e, 0x6a, 0x1c, 0xd3, 0xce, 0x46, - 0xef, 0xb0, 0x00, 0x8d, }, - { 0xed, 0x9c, 0x49, 0xcd, 0xc2, 0xde, 0x38, 0x0e, 0xe9, 0x98, 0x6c, 0xc8, - 0x90, 0x9e, 0x3c, 0xd4, 0xd3, 0xeb, 0x88, 0x32, 0xc7, 0x28, 0xe3, 0x94, - 0x1c, 0x9f, 0x8b, 0xf3, 0xcb, }, - { 0xac, 0xe7, 0x92, 0x16, 0xb4, 0x14, 0xa0, 0xe4, 0x04, 0x79, 0xa2, 0xf4, - 0x31, 0xe6, 0x0c, 0x26, 0xdc, 0xbf, 0x2f, 0x69, 0x1b, 0x55, 0x94, 0x67, - 0xda, 0x0c, 0xd7, 0x32, 0x1f, 0xef, }, - { 0x68, 0x63, 0x85, 0x57, 0x95, 0x9e, 0x42, 0x27, 0x41, 0x43, 0x42, 0x02, - 0xa5, 0x78, 0xa7, 0xc6, 0x43, 0xc1, 0x6a, 0xba, 0x70, 0x80, 0xcd, 0x04, - 0xb6, 0x78, 0x76, 0x29, 0xf3, 0xe8, 0xa0, }, - { 0xe6, 0xac, 0x8d, 0x9d, 0xf0, 0xc0, 0xf7, 0xf7, 0xe3, 0x3e, 0x4e, 0x28, - 0x0f, 0x59, 0xb2, 0x67, 0x9e, 0x84, 0x34, 0x42, 0x96, 0x30, 0x2b, 0xca, - 0x49, 0xb6, 0xc5, 0x9a, 0x84, 0x59, 0xa7, 0x81, }, - { 0x7e, }, - { 0x1e, 0x21, }, - { 0x26, 0xd3, 0xdd, }, - { 0x2c, 0xd4, 0xb3, 0x3d, }, - { 0x86, 0x7b, 0x76, 0x3c, 0xf0, }, - { 0x12, 0xc3, 0x70, 0x1d, 0x55, 0x18, }, - { 0x96, 0xc2, 0xbd, 0x61, 0x55, 0xf4, 0x24, }, - { 0x20, 0x51, 0xf7, 0x86, 0x58, 0x8f, 0x07, 0x2a, }, - { 0x93, 0x15, 0xa8, 0x1d, 0xda, 0x97, 0xee, 0x0e, 0x6c, }, - { 0x39, 0x93, 0xdf, 0xd5, 0x0e, 0xca, 0xdc, 0x7a, 0x92, 0xce, }, - { 0x60, 0xd5, 0xfd, 0xf5, 0x1b, 0x26, 0x82, 0x26, 0x73, 0x02, 0xbc, }, - { 0x98, 0xf2, 0x34, 0xe1, 0xf5, 0xfb, 0x00, 0xac, 0x10, 0x4a, 0x38, 0x9f, }, - { 0xda, 0x3a, 0x92, 0x8a, 0xd0, 0xcd, 0x12, 0xcd, 0x15, 0xbb, 0xab, 0x77, - 0x66, }, - { 0xa2, 0x92, 0x1a, 0xe5, 0xca, 0x0c, 0x30, 0x75, 0xeb, 0xaf, 0x00, 0x31, - 0x55, 0x66, }, - { 0x06, 0xea, 0xfd, 0x3e, 0x86, 0x38, 0x62, 0x4e, 0xa9, 0x12, 0xa4, 0x12, - 0x43, 0xbf, 0xa1, }, - { 0xe4, 0x71, 0x7b, 0x94, 0xdb, 0xa0, 0xd2, 0xff, 0x9b, 0xeb, 0xad, 0x8e, - 0x95, 0x8a, 0xc5, 0xed, }, - { 0x25, 0x5a, 0x77, 0x71, 0x41, 0x0e, 0x7a, 0xe9, 0xed, 0x0c, 0x10, 0xef, - 0xf6, 0x2b, 0x3a, 0xba, 0x60, }, - { 0xee, 0xe2, 0xa3, 0x67, 0x64, 0x1d, 0xc6, 0x04, 0xc4, 0xe1, 0x68, 0xd2, - 0x6e, 0xd2, 0x91, 0x75, 0x53, 0x07, }, - { 0xe0, 0xf6, 0x4d, 0x8f, 0x68, 0xfc, 0x06, 0x7e, 0x18, 0x79, 0x7f, 0x2b, - 0x6d, 0xef, 0x46, 0x7f, 0xab, 0xb2, 0xad, }, - { 0x3d, 0x35, 0x88, 0x9f, 0x2e, 0xcf, 0x96, 0x45, 0x07, 0x60, 0x71, 0x94, - 0x00, 0x8d, 0xbf, 0xf4, 0xef, 0x46, 0x2e, 0x3c, }, - { 0x43, 0xcf, 0x98, 0xf7, 0x2d, 0xf4, 0x17, 0xe7, 0x8c, 0x05, 0x2d, 0x9b, - 0x24, 0xfb, 0x4d, 0xea, 0x4a, 0xec, 0x01, 0x25, 0x29, }, - { 0x8e, 0x73, 0x9a, 0x78, 0x11, 0xfe, 0x48, 0xa0, 0x3b, 0x1a, 0x26, 0xdf, - 0x25, 0xe9, 0x59, 0x1c, 0x70, 0x07, 0x9f, 0xdc, 0xa0, 0xa6, }, - { 0xe8, 0x47, 0x71, 0xc7, 0x3e, 0xdf, 0xb5, 0x13, 0xb9, 0x85, 0x13, 0xa8, - 0x54, 0x47, 0x6e, 0x59, 0x96, 0x09, 0x13, 0x5f, 0x82, 0x16, 0x0b, }, - { 0xfb, 0xc0, 0x8c, 0x03, 0x21, 0xb3, 0xc4, 0xb5, 0x43, 0x32, 0x6c, 0xea, - 0x7f, 0xa8, 0x43, 0x91, 0xe8, 0x4e, 0x3f, 0xbf, 0x45, 0x58, 0x6a, 0xa3, }, - { 0x55, 0xf8, 0xf3, 0x00, 0x76, 0x09, 0xef, 0x69, 0x5d, 0xd2, 0x8a, 0xf2, - 0x65, 0xc3, 0xcb, 0x9b, 0x43, 0xfd, 0xb1, 0x7e, 0x7f, 0xa1, 0x94, 0xb0, - 0xd7, }, - { 0xaa, 0x13, 0xc1, 0x51, 0x40, 0x6d, 0x8d, 0x4c, 0x0a, 0x95, 0x64, 0x7b, - 0xd1, 0x96, 0xb6, 0x56, 0xb4, 0x5b, 0xcf, 0xd6, 0xd9, 0x15, 0x97, 0xdd, - 0xb6, 0xef, }, - { 0xaf, 0xb7, 0x36, 0xb0, 0x04, 0xdb, 0xd7, 0x9c, 0x9a, 0x44, 0xc4, 0xf6, - 0x1f, 0x12, 0x21, 0x2d, 0x59, 0x30, 0x54, 0xab, 0x27, 0x61, 0xa3, 0x57, - 0xef, 0xf8, 0x53, }, - { 0x97, 0x34, 0x45, 0x3e, 0xce, 0x7c, 0x35, 0xa2, 0xda, 0x9f, 0x4b, 0x46, - 0x6c, 0x11, 0x67, 0xff, 0x2f, 0x76, 0x58, 0x15, 0x71, 0xfa, 0x44, 0x89, - 0x89, 0xfd, 0xf7, 0x99, }, - { 0x1f, 0xb1, 0x62, 0xeb, 0x83, 0xc5, 0x9c, 0x89, 0xf9, 0x2c, 0xd2, 0x03, - 0x61, 0xbc, 0xbb, 0xa5, 0x74, 0x0e, 0x9b, 0x7e, 0x82, 0x3e, 0x70, 0x0a, - 0xa9, 0x8f, 0x2b, 0x59, 0xfb, }, - { 0xf8, 0xca, 0x5e, 0x3a, 0x4f, 0x9e, 0x10, 0x69, 0x10, 0xd5, 0x4c, 0xeb, - 0x1a, 0x0f, 0x3c, 0x6a, 0x98, 0xf5, 0xb0, 0x97, 0x5b, 0x37, 0x2f, 0x0d, - 0xbd, 0x42, 0x4b, 0x69, 0xa1, 0x82, }, - { 0x12, 0x8c, 0x6d, 0x52, 0x08, 0xef, 0x74, 0xb2, 0xe6, 0xaa, 0xd3, 0xb0, - 0x26, 0xb0, 0xd9, 0x94, 0xb6, 0x11, 0x45, 0x0e, 0x36, 0x71, 0x14, 0x2d, - 0x41, 0x8c, 0x21, 0x53, 0x31, 0xe9, 0x68, }, - { 0xee, 0xea, 0x0d, 0x89, 0x47, 0x7e, 0x72, 0xd1, 0xd8, 0xce, 0x58, 0x4c, - 0x94, 0x1f, 0x0d, 0x51, 0x08, 0xa3, 0xb6, 0x3d, 0xe7, 0x82, 0x46, 0x92, - 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, -}; - -static bool __init noinline_for_stack blake2s_digest_test(void) -{ - u8 key[BLAKE2S_KEY_SIZE]; - u8 buf[ARRAY_SIZE(blake2s_testvecs)]; - u8 hash[BLAKE2S_HASH_SIZE]; - struct blake2s_state state; - bool success = true; - int i, l; - - key[0] = key[1] = 1; - for (i = 2; i < sizeof(key); ++i) - key[i] = key[i - 2] + key[i - 1]; - - for (i = 0; i < sizeof(buf); ++i) - buf[i] = (u8)i; - - for (i = l = 0; i < ARRAY_SIZE(blake2s_testvecs); l = (l + 37) % ++i) { - int outlen = 1 + i % BLAKE2S_HASH_SIZE; - int keylen = (13 * i) % (BLAKE2S_KEY_SIZE + 1); - - blake2s(hash, buf, key + BLAKE2S_KEY_SIZE - keylen, outlen, i, - keylen); - if (memcmp(hash, blake2s_testvecs[i], outlen)) { - pr_err("blake2s self-test %d: FAIL\n", i + 1); - success = false; - } - - if (!keylen) - blake2s_init(&state, outlen); - else - blake2s_init_key(&state, outlen, - key + BLAKE2S_KEY_SIZE - keylen, - keylen); - - blake2s_update(&state, buf, l); - blake2s_update(&state, buf + l, i - l); - blake2s_final(&state, hash); - if (memcmp(hash, blake2s_testvecs[i], outlen)) { - pr_err("blake2s init/update/final self-test %d: FAIL\n", - i + 1); - success = false; - } - } - - return success; -} - -static bool __init noinline_for_stack blake2s_random_test(void) -{ - struct blake2s_state state; - bool success = true; - int i, l; - - for (i = 0; i < 32; ++i) { - enum { TEST_ALIGNMENT = 16 }; - u8 blocks[BLAKE2S_BLOCK_SIZE * 2 + TEST_ALIGNMENT - 1] - __aligned(TEST_ALIGNMENT); - u8 *unaligned_block = blocks + BLAKE2S_BLOCK_SIZE; - struct blake2s_state state1, state2; - - get_random_bytes(blocks, sizeof(blocks)); - get_random_bytes(&state, sizeof(state)); - -#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \ - defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) - memcpy(&state1, &state, sizeof(state1)); - memcpy(&state2, &state, sizeof(state2)); - blake2s_compress(&state1, blocks, 2, BLAKE2S_BLOCK_SIZE); - blake2s_compress_generic(&state2, blocks, 2, BLAKE2S_BLOCK_SIZE); - if (memcmp(&state1, &state2, sizeof(state1))) { - pr_err("blake2s random compress self-test %d: FAIL\n", - i + 1); - success = false; - } -#endif - - memcpy(&state1, &state, sizeof(state1)); - blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE); - for (l = 1; l < TEST_ALIGNMENT; ++l) { - memcpy(unaligned_block + l, blocks, - BLAKE2S_BLOCK_SIZE); - memcpy(&state2, &state, sizeof(state2)); - blake2s_compress(&state2, unaligned_block + l, 1, - BLAKE2S_BLOCK_SIZE); - if (memcmp(&state1, &state2, sizeof(state1))) { - pr_err("blake2s random compress align %d self-test %d: FAIL\n", - l, i + 1); - success = false; - } - } - } - - return success; -} - -bool __init blake2s_selftest(void) -{ - bool success; - - success = blake2s_digest_test(); - success &= blake2s_random_test(); - - return success; -} diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index f6ec68c3dcda..51f2dd7a38a4 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -59,14 +58,5 @@ void blake2s_final(struct blake2s_state *state, u8 *out) } EXPORT_SYMBOL(blake2s_final); -static int __init blake2s_mod_init(void) -{ - if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) && - WARN_ON(!blake2s_selftest())) - return -ENODEV; - return 0; -} - -module_init(blake2s_mod_init); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld "); -- cgit v1.2.3 From 56e48d4e138cb105a17e0f8c257f3bc41b1bd69d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:28 -0700 Subject: lib/crypto: blake2s: Always enable arch-optimized BLAKE2s code When support for a crypto algorithm is enabled, the arch-optimized implementation of that algorithm should be enabled too. We've learned this the hard way many times over the years: people regularly forget to enable the arch-optimized implementations of the crypto algorithms, resulting in significant performance being left on the table. Currently, BLAKE2s support is always enabled ('obj-y'), since random.c uses it. Therefore, the arch-optimized BLAKE2s code, which exists for ARM and x86_64, should be always enabled too. Let's do that. Note that the effect on kernel image size is very small and should not be a concern. On ARM, enabling CRYPTO_BLAKE2S_ARM actually *shrinks* the kernel size by about 1200 bytes, since the ARM-optimized blake2s_compress() completely replaces the generic blake2s_compress(). On x86_64, enabling CRYPTO_BLAKE2S_X86 increases the kernel size by about 1400 bytes, as the generic blake2s_compress() is still included as a fallback; however, for context, that is only about a quarter the size of the generic blake2s_compress(). The x86_64 optimized BLAKE2s code uses much less icache at runtime than the generic code. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/arm/Kconfig | 2 +- lib/crypto/x86/Kconfig | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index 740341aa35d2..a5607ad079c4 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config CRYPTO_BLAKE2S_ARM - bool "Hash functions: BLAKE2s" + def_bool y select CRYPTO_ARCH_HAVE_LIB_BLAKE2S help BLAKE2s cryptographic hash function (RFC 7693) diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index eb47da71aa6b..ffa718321369 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config CRYPTO_BLAKE2S_X86 - bool "Hash functions: BLAKE2s (SSSE3/AVX-512)" + def_bool y depends on 64BIT select CRYPTO_LIB_BLAKE2S_GENERIC select CRYPTO_ARCH_HAVE_LIB_BLAKE2S -- cgit v1.2.3 From 5d313a7625fabe9b92eaca7c5ce0e6d1019d279a Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:29 -0700 Subject: lib/crypto: blake2s: Move generic code into blake2s.c Move blake2s_compress_generic() from blake2s-generic.c to blake2s.c. For now it's still guarded by CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC, but this prepares for changing it to a 'static __maybe_unused' function and just using the compiler to automatically decide its inclusion. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 - lib/crypto/blake2s-generic.c | 111 ------------------------------------------- lib/crypto/blake2s.c | 94 ++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+), 112 deletions(-) delete mode 100644 lib/crypto/blake2s-generic.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index b2d2745879d1..c9193f0604d9 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -32,7 +32,6 @@ obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o # blake2s is used by the /dev/random driver which is always builtin obj-y += libblake2s.o libblake2s-y := blake2s.o -libblake2s-$(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) += blake2s-generic.o ################################################################################ diff --git a/lib/crypto/blake2s-generic.c b/lib/crypto/blake2s-generic.c deleted file mode 100644 index 9828176a2efe..000000000000 --- a/lib/crypto/blake2s-generic.c +++ /dev/null @@ -1,111 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - * - * This is an implementation of the BLAKE2s hash and PRF functions. - * - * Information: https://blake2.net/ - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -static const u8 blake2s_sigma[10][16] = { - { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, - { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, - { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, - { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, - { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, - { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, - { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, - { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, - { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, - { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, -}; - -static inline void blake2s_increment_counter(struct blake2s_state *state, - const u32 inc) -{ - state->t[0] += inc; - state->t[1] += (state->t[0] < inc); -} - -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) - __weak __alias(blake2s_compress_generic); - -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) -{ - u32 m[16]; - u32 v[16]; - int i; - - WARN_ON(IS_ENABLED(DEBUG) && - (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); - - while (nblocks > 0) { - blake2s_increment_counter(state, inc); - memcpy(m, block, BLAKE2S_BLOCK_SIZE); - le32_to_cpu_array(m, ARRAY_SIZE(m)); - memcpy(v, state->h, 32); - v[ 8] = BLAKE2S_IV0; - v[ 9] = BLAKE2S_IV1; - v[10] = BLAKE2S_IV2; - v[11] = BLAKE2S_IV3; - v[12] = BLAKE2S_IV4 ^ state->t[0]; - v[13] = BLAKE2S_IV5 ^ state->t[1]; - v[14] = BLAKE2S_IV6 ^ state->f[0]; - v[15] = BLAKE2S_IV7 ^ state->f[1]; - -#define G(r, i, a, b, c, d) do { \ - a += b + m[blake2s_sigma[r][2 * i + 0]]; \ - d = ror32(d ^ a, 16); \ - c += d; \ - b = ror32(b ^ c, 12); \ - a += b + m[blake2s_sigma[r][2 * i + 1]]; \ - d = ror32(d ^ a, 8); \ - c += d; \ - b = ror32(b ^ c, 7); \ -} while (0) - -#define ROUND(r) do { \ - G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ - G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ - G(r, 2, v[2], v[ 6], v[10], v[14]); \ - G(r, 3, v[3], v[ 7], v[11], v[15]); \ - G(r, 4, v[0], v[ 5], v[10], v[15]); \ - G(r, 5, v[1], v[ 6], v[11], v[12]); \ - G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ - G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ -} while (0) - ROUND(0); - ROUND(1); - ROUND(2); - ROUND(3); - ROUND(4); - ROUND(5); - ROUND(6); - ROUND(7); - ROUND(8); - ROUND(9); - -#undef G -#undef ROUND - - for (i = 0; i < 8; ++i) - state->h[i] ^= v[i] ^ v[i + 8]; - - block += BLAKE2S_BLOCK_SIZE; - --nblocks; - } -} - -EXPORT_SYMBOL(blake2s_compress_generic); diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 51f2dd7a38a4..b5b75ade4658 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,6 +16,100 @@ #include #include +#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC +static const u8 blake2s_sigma[10][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, +}; + +static inline void blake2s_increment_counter(struct blake2s_state *state, + const u32 inc) +{ + state->t[0] += inc; + state->t[1] += (state->t[0] < inc); +} + +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) + __weak __alias(blake2s_compress_generic); + +void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) +{ + u32 m[16]; + u32 v[16]; + int i; + + WARN_ON(IS_ENABLED(DEBUG) && + (nblocks > 1 && inc != BLAKE2S_BLOCK_SIZE)); + + while (nblocks > 0) { + blake2s_increment_counter(state, inc); + memcpy(m, block, BLAKE2S_BLOCK_SIZE); + le32_to_cpu_array(m, ARRAY_SIZE(m)); + memcpy(v, state->h, 32); + v[ 8] = BLAKE2S_IV0; + v[ 9] = BLAKE2S_IV1; + v[10] = BLAKE2S_IV2; + v[11] = BLAKE2S_IV3; + v[12] = BLAKE2S_IV4 ^ state->t[0]; + v[13] = BLAKE2S_IV5 ^ state->t[1]; + v[14] = BLAKE2S_IV6 ^ state->f[0]; + v[15] = BLAKE2S_IV7 ^ state->f[1]; + +#define G(r, i, a, b, c, d) do { \ + a += b + m[blake2s_sigma[r][2 * i + 0]]; \ + d = ror32(d ^ a, 16); \ + c += d; \ + b = ror32(b ^ c, 12); \ + a += b + m[blake2s_sigma[r][2 * i + 1]]; \ + d = ror32(d ^ a, 8); \ + c += d; \ + b = ror32(b ^ c, 7); \ +} while (0) + +#define ROUND(r) do { \ + G(r, 0, v[0], v[ 4], v[ 8], v[12]); \ + G(r, 1, v[1], v[ 5], v[ 9], v[13]); \ + G(r, 2, v[2], v[ 6], v[10], v[14]); \ + G(r, 3, v[3], v[ 7], v[11], v[15]); \ + G(r, 4, v[0], v[ 5], v[10], v[15]); \ + G(r, 5, v[1], v[ 6], v[11], v[12]); \ + G(r, 6, v[2], v[ 7], v[ 8], v[13]); \ + G(r, 7, v[3], v[ 4], v[ 9], v[14]); \ +} while (0) + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + +#undef G +#undef ROUND + + for (i = 0; i < 8; ++i) + state->h[i] ^= v[i] ^ v[i + 8]; + + block += BLAKE2S_BLOCK_SIZE; + --nblocks; + } +} +EXPORT_SYMBOL(blake2s_compress_generic); +#endif /* CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC */ + static inline void blake2s_set_lastblock(struct blake2s_state *state) { state->f[0] = -1; -- cgit v1.2.3 From 39ee3970f26d55b57343da392d45117d7f893205 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:30 -0700 Subject: lib/crypto: blake2s: Consolidate into single C translation unit As was done with the other algorithms, reorganize the BLAKE2s code so that the generic implementation and the arch-specific "glue" code is consolidated into a single translation unit, so that the compiler will inline the functions and automatically decide whether to include the generic code in the resulting binary or not. Similarly, also consolidate the build rules into lib/crypto/{Makefile,Kconfig}. This removes the last uses of lib/crypto/{arm,x86}/{Makefile,Kconfig}, so remove those too. Don't keep the !KMSAN dependency. It was needed only for other algorithms such as ChaCha that initialize memory from assembly code. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 29 ++++-------------- lib/crypto/Makefile | 13 ++++---- lib/crypto/arm/Kconfig | 14 --------- lib/crypto/arm/Makefile | 4 --- lib/crypto/arm/blake2s-core.S | 5 +++- lib/crypto/arm/blake2s-glue.c | 7 ----- lib/crypto/arm/blake2s.h | 5 ++++ lib/crypto/blake2s.c | 29 +++++++++++------- lib/crypto/x86/Kconfig | 13 -------- lib/crypto/x86/Makefile | 4 --- lib/crypto/x86/blake2s-glue.c | 70 ------------------------------------------- lib/crypto/x86/blake2s.h | 64 +++++++++++++++++++++++++++++++++++++++ 12 files changed, 106 insertions(+), 151 deletions(-) delete mode 100644 lib/crypto/arm/Kconfig delete mode 100644 lib/crypto/arm/Makefile delete mode 100644 lib/crypto/arm/blake2s-glue.c create mode 100644 lib/crypto/arm/blake2s.h delete mode 100644 lib/crypto/x86/Kconfig delete mode 100644 lib/crypto/x86/Makefile delete mode 100644 lib/crypto/x86/blake2s-glue.c create mode 100644 lib/crypto/x86/blake2s.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index c1db483bc230..37d85e0c9b97 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -28,21 +28,13 @@ config CRYPTO_LIB_ARC4 config CRYPTO_LIB_GF128MUL tristate -config CRYPTO_ARCH_HAVE_LIB_BLAKE2S - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Blake2s library interface, - either builtin or as a module. +# BLAKE2s support is always built-in, so there's no CRYPTO_LIB_BLAKE2S option. -config CRYPTO_LIB_BLAKE2S_GENERIC - def_bool !CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - This symbol can be depended upon by arch implementations of the - Blake2s library interface that require the generic code as a - fallback, e.g., for SIMD implementations. If no arch specific - implementation is enabled, this implementation serves the users - of CRYPTO_LIB_BLAKE2S. +config CRYPTO_LIB_BLAKE2S_ARCH + bool + depends on !UML + default y if ARM + default y if X86_64 config CRYPTO_LIB_CHACHA tristate @@ -208,13 +200,4 @@ config CRYPTO_LIB_SM3 source "lib/crypto/tests/Kconfig" -if !KMSAN # avoid false positives from assembly -if ARM -source "lib/crypto/arm/Kconfig" -endif -if X86 -source "lib/crypto/x86/Kconfig" -endif -endif - endmenu diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index c9193f0604d9..ad27c5bf99e1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -29,9 +29,15 @@ libarc4-y := arc4.o obj-$(CONFIG_CRYPTO_LIB_GF128MUL) += gf128mul.o +################################################################################ + # blake2s is used by the /dev/random driver which is always builtin -obj-y += libblake2s.o -libblake2s-y := blake2s.o +obj-y += blake2s.o +ifeq ($(CONFIG_CRYPTO_LIB_BLAKE2S_ARCH),y) +CFLAGS_blake2s.o += -I$(src)/$(SRCARCH) +obj-$(CONFIG_ARM) += arm/blake2s-core.o +obj-$(CONFIG_X86) += x86/blake2s-core.o +endif ################################################################################ @@ -256,9 +262,6 @@ obj-$(CONFIG_CRYPTO_SELFTESTS_FULL) += simd.o obj-$(CONFIG_CRYPTO_LIB_SM3) += libsm3.o libsm3-y := sm3.o -obj-$(CONFIG_ARM) += arm/ -obj-$(CONFIG_X86) += x86/ - # clean-files must be defined unconditionally clean-files += arm/sha256-core.S arm/sha512-core.S clean-files += arm64/sha256-core.S arm64/sha512-core.S diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig deleted file mode 100644 index a5607ad079c4..000000000000 --- a/lib/crypto/arm/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_BLAKE2S_ARM - def_bool y - select CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - BLAKE2s cryptographic hash function (RFC 7693) - - Architecture: arm - - This is faster than the generic implementations of BLAKE2s and - BLAKE2b, but slower than the NEON implementation of BLAKE2b. - There is no NEON implementation of BLAKE2s, since NEON doesn't - really help with it. diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile deleted file mode 100644 index 0574b0e9739e..000000000000 --- a/lib/crypto/arm/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o -libblake2s-arm-y := blake2s-core.o blake2s-glue.o diff --git a/lib/crypto/arm/blake2s-core.S b/lib/crypto/arm/blake2s-core.S index df40e46601f1..293f44fa8f31 100644 --- a/lib/crypto/arm/blake2s-core.S +++ b/lib/crypto/arm/blake2s-core.S @@ -1,6 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * BLAKE2s digest algorithm, ARM scalar implementation + * BLAKE2s digest algorithm, ARM scalar implementation. This is faster + * than the generic implementations of BLAKE2s and BLAKE2b, but slower + * than the NEON implementation of BLAKE2b. There is no NEON + * implementation of BLAKE2s, since NEON doesn't really help with it. * * Copyright 2020 Google LLC * diff --git a/lib/crypto/arm/blake2s-glue.c b/lib/crypto/arm/blake2s-glue.c deleted file mode 100644 index 0238a70d9581..000000000000 --- a/lib/crypto/arm/blake2s-glue.c +++ /dev/null @@ -1,7 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -#include -#include - -/* defined in blake2s-core.S */ -EXPORT_SYMBOL(blake2s_compress); diff --git a/lib/crypto/arm/blake2s.h b/lib/crypto/arm/blake2s.h new file mode 100644 index 000000000000..aa7a97139ea7 --- /dev/null +++ b/lib/crypto/arm/blake2s.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* defined in blake2s-core.S */ +void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, u32 inc); diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index b5b75ade4658..5638ed9d882d 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -8,7 +8,7 @@ * */ -#include +#include #include #include #include @@ -16,7 +16,6 @@ #include #include -#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC static const u8 blake2s_sigma[10][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, @@ -37,12 +36,9 @@ static inline void blake2s_increment_counter(struct blake2s_state *state, state->t[1] += (state->t[0] < inc); } -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) - __weak __alias(blake2s_compress_generic); - -void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) +static void __maybe_unused +blake2s_compress_generic(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) { u32 m[16]; u32 v[16]; @@ -107,8 +103,12 @@ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, --nblocks; } } -EXPORT_SYMBOL(blake2s_compress_generic); -#endif /* CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC */ + +#ifdef CONFIG_CRYPTO_LIB_BLAKE2S_ARCH +#include "blake2s.h" /* $(SRCARCH)/blake2s.h */ +#else +#define blake2s_compress blake2s_compress_generic +#endif static inline void blake2s_set_lastblock(struct blake2s_state *state) { @@ -152,5 +152,14 @@ void blake2s_final(struct blake2s_state *state, u8 *out) } EXPORT_SYMBOL(blake2s_final); +#ifdef blake2s_mod_init_arch +static int __init blake2s_mod_init(void) +{ + blake2s_mod_init_arch(); + return 0; +} +subsys_initcall(blake2s_mod_init); +#endif + MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig deleted file mode 100644 index ffa718321369..000000000000 --- a/lib/crypto/x86/Kconfig +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -config CRYPTO_BLAKE2S_X86 - def_bool y - depends on 64BIT - select CRYPTO_LIB_BLAKE2S_GENERIC - select CRYPTO_ARCH_HAVE_LIB_BLAKE2S - help - BLAKE2s cryptographic hash function (RFC 7693) - - Architecture: x86_64 using: - - SSSE3 (Supplemental SSE3) - - AVX-512 (Advanced Vector Extensions-512) diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile deleted file mode 100644 index 4454556b243e..000000000000 --- a/lib/crypto/x86/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only - -obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o -libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o diff --git a/lib/crypto/x86/blake2s-glue.c b/lib/crypto/x86/blake2s-glue.c deleted file mode 100644 index adc296cd17c9..000000000000 --- a/lib/crypto/x86/blake2s-glue.c +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, - const u8 *block, const size_t nblocks, - const u32 inc); -asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, - const u8 *block, const size_t nblocks, - const u32 inc); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); -static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); - -void blake2s_compress(struct blake2s_state *state, const u8 *block, - size_t nblocks, const u32 inc) -{ - /* SIMD disables preemption, so relax after processing each page. */ - BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); - - if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { - blake2s_compress_generic(state, block, nblocks, inc); - return; - } - - do { - const size_t blocks = min_t(size_t, nblocks, - SZ_4K / BLAKE2S_BLOCK_SIZE); - - kernel_fpu_begin(); - if (static_branch_likely(&blake2s_use_avx512)) - blake2s_compress_avx512(state, block, blocks, inc); - else - blake2s_compress_ssse3(state, block, blocks, inc); - kernel_fpu_end(); - - nblocks -= blocks; - block += blocks * BLAKE2S_BLOCK_SIZE; - } while (nblocks); -} -EXPORT_SYMBOL(blake2s_compress); - -static int __init blake2s_mod_init(void) -{ - if (boot_cpu_has(X86_FEATURE_SSSE3)) - static_branch_enable(&blake2s_use_ssse3); - - if (boot_cpu_has(X86_FEATURE_AVX) && - boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_AVX512F) && - boot_cpu_has(X86_FEATURE_AVX512VL) && - cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | - XFEATURE_MASK_AVX512, NULL)) - static_branch_enable(&blake2s_use_avx512); - - return 0; -} - -subsys_initcall(blake2s_mod_init); diff --git a/lib/crypto/x86/blake2s.h b/lib/crypto/x86/blake2s.h new file mode 100644 index 000000000000..b6d30d2fa045 --- /dev/null +++ b/lib/crypto/x86/blake2s.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); +asmlinkage void blake2s_compress_avx512(struct blake2s_state *state, + const u8 *block, const size_t nblocks, + const u32 inc); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); +static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); + +static void blake2s_compress(struct blake2s_state *state, const u8 *block, + size_t nblocks, const u32 inc) +{ + /* SIMD disables preemption, so relax after processing each page. */ + BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); + + if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { + blake2s_compress_generic(state, block, nblocks, inc); + return; + } + + do { + const size_t blocks = min_t(size_t, nblocks, + SZ_4K / BLAKE2S_BLOCK_SIZE); + + kernel_fpu_begin(); + if (static_branch_likely(&blake2s_use_avx512)) + blake2s_compress_avx512(state, block, blocks, inc); + else + blake2s_compress_ssse3(state, block, blocks, inc); + kernel_fpu_end(); + + nblocks -= blocks; + block += blocks * BLAKE2S_BLOCK_SIZE; + } while (nblocks); +} + +#define blake2s_mod_init_arch blake2s_mod_init_arch +static void blake2s_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + static_branch_enable(&blake2s_use_ssse3); + + if (boot_cpu_has(X86_FEATURE_AVX) && + boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_AVX512F) && + boot_cpu_has(X86_FEATURE_AVX512VL) && + cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | + XFEATURE_MASK_AVX512, NULL)) + static_branch_enable(&blake2s_use_avx512); +} -- cgit v1.2.3 From 362f92286065d9f8282da5def89e173a12191568 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 27 Aug 2025 08:11:31 -0700 Subject: lib/crypto: tests: Add KUnit tests for BLAKE2s Add a KUnit test suite for BLAKE2s. Most of the core test logic is in the previously-added hash-test-template.h. This commit just adds the actual KUnit suite, commits the generated test vectors to the tree so that gen-hash-testvecs.py won't have to be run at build time, and adds a few BLAKE2s-specific test cases. This is the replacement for blake2s-selftest, which an earlier commit removed. Improvements over blake2s-selftest include integration with KUnit, more comprehensive test cases, and support for benchmarking. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20250827151131.27733-13-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 10 ++ lib/crypto/tests/Makefile | 1 + lib/crypto/tests/blake2s-testvecs.h | 238 ++++++++++++++++++++++++++++++++++++ lib/crypto/tests/blake2s_kunit.c | 134 ++++++++++++++++++++ 4 files changed, 383 insertions(+) create mode 100644 lib/crypto/tests/blake2s-testvecs.h create mode 100644 lib/crypto/tests/blake2s_kunit.c (limited to 'lib') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index c21d53fd4b0c..fd341aa12f15 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -1,5 +1,15 @@ # SPDX-License-Identifier: GPL-2.0-or-later +config CRYPTO_LIB_BLAKE2S_KUNIT_TEST + tristate "KUnit tests for BLAKE2s" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + # No need to select CRYPTO_LIB_BLAKE2S here, as that option doesn't + # exist; the BLAKE2s code is always built-in for the /dev/random driver. + help + KUnit tests for the BLAKE2s cryptographic hash function. + config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index f6f82c6f9cb5..be7de929af2c 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later +obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o diff --git a/lib/crypto/tests/blake2s-testvecs.h b/lib/crypto/tests/blake2s-testvecs.h new file mode 100644 index 000000000000..6f978b79a59b --- /dev/null +++ b/lib/crypto/tests/blake2s-testvecs.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* This file was generated by: ./scripts/crypto/gen-hash-testvecs.py blake2s */ + +static const struct { + size_t data_len; + u8 digest[BLAKE2S_HASH_SIZE]; +} hash_testvecs[] = { + { + .data_len = 0, + .digest = { + 0x69, 0x21, 0x7a, 0x30, 0x79, 0x90, 0x80, 0x94, + 0xe1, 0x11, 0x21, 0xd0, 0x42, 0x35, 0x4a, 0x7c, + 0x1f, 0x55, 0xb6, 0x48, 0x2c, 0xa1, 0xa5, 0x1e, + 0x1b, 0x25, 0x0d, 0xfd, 0x1e, 0xd0, 0xee, 0xf9, + }, + }, + { + .data_len = 1, + .digest = { + 0x7c, 0xab, 0x53, 0xe2, 0x48, 0x87, 0xdf, 0x64, + 0x98, 0x6a, 0xc1, 0x7e, 0xf0, 0x01, 0x4d, 0xc9, + 0x07, 0x4f, 0xb8, 0x2f, 0x46, 0xd7, 0xee, 0xa9, + 0xad, 0xe5, 0xf8, 0x21, 0xac, 0xfe, 0x17, 0x58, + }, + }, + { + .data_len = 2, + .digest = { + 0x5e, 0x63, 0x2c, 0xd0, 0xf8, 0x7b, 0xf5, 0xae, + 0x61, 0x97, 0x94, 0x57, 0xc8, 0x76, 0x22, 0xd9, + 0x8b, 0x04, 0x5e, 0xf1, 0x5d, 0xd0, 0xfc, 0xd9, + 0x0c, 0x19, 0x2e, 0xe2, 0xc5, 0xd9, 0x73, 0x51, + }, + }, + { + .data_len = 3, + .digest = { + 0x33, 0x65, 0xa6, 0x37, 0xbf, 0xf8, 0x4f, 0x15, + 0x4c, 0xac, 0x9e, 0xa4, 0x3b, 0x02, 0x07, 0x0c, + 0x80, 0x86, 0x0d, 0x6c, 0xe4, 0xaf, 0x1c, 0xbc, + 0x0b, 0x9c, 0x0a, 0x98, 0xc2, 0x99, 0x71, 0xcd, + }, + }, + { + .data_len = 16, + .digest = { + 0x59, 0xd2, 0x10, 0xd3, 0x75, 0xac, 0x48, 0x32, + 0xb1, 0xea, 0xee, 0xcf, 0x0a, 0xd2, 0x8b, 0x15, + 0x5d, 0x72, 0x71, 0x4c, 0xa7, 0x29, 0xb0, 0x7a, + 0x44, 0x48, 0x8a, 0x54, 0x54, 0x54, 0x41, 0xf5, + }, + }, + { + .data_len = 32, + .digest = { + 0xdc, 0xfc, 0x46, 0x81, 0xc6, 0x1b, 0x2b, 0x47, + 0x8b, 0xed, 0xe0, 0x73, 0x34, 0x38, 0x53, 0x92, + 0x97, 0x2f, 0xfb, 0x51, 0xab, 0x4f, 0x2d, 0x9d, + 0x69, 0x04, 0xa9, 0x5d, 0x33, 0xef, 0xcb, 0x1c, + }, + }, + { + .data_len = 48, + .digest = { + 0xd6, 0x2a, 0x7f, 0x96, 0x04, 0x4d, 0x16, 0xc8, + 0x49, 0xe0, 0x37, 0x33, 0xe3, 0x7b, 0x34, 0x56, + 0x99, 0xc5, 0x78, 0x57, 0x06, 0x02, 0xb4, 0xea, + 0x80, 0xc4, 0xf8, 0x8f, 0x8d, 0x2b, 0xe4, 0x05, + }, + }, + { + .data_len = 49, + .digest = { + 0x8b, 0x58, 0x62, 0xb5, 0x85, 0xf6, 0x83, 0x36, + 0xf5, 0x34, 0xb8, 0xd4, 0xbc, 0x5c, 0x8b, 0x38, + 0xfd, 0x15, 0xcd, 0x44, 0x83, 0x25, 0x71, 0xe1, + 0xd5, 0xe8, 0xa1, 0xa4, 0x36, 0x98, 0x7e, 0x68, + }, + }, + { + .data_len = 63, + .digest = { + 0x7e, 0xeb, 0x06, 0x87, 0xdf, 0x1a, 0xdc, 0xe5, + 0xfb, 0x64, 0xd4, 0xd1, 0x5d, 0x9e, 0x75, 0xc0, + 0xb9, 0xad, 0x55, 0x6c, 0xe6, 0xba, 0x4d, 0x98, + 0x2f, 0xbf, 0x72, 0xad, 0x61, 0x37, 0xf6, 0x11, + }, + }, + { + .data_len = 64, + .digest = { + 0x72, 0xdb, 0x43, 0x16, 0x57, 0x8e, 0x3a, 0x96, + 0xf3, 0x98, 0x19, 0x24, 0x17, 0x3b, 0xe8, 0xad, + 0xa1, 0x9b, 0xa4, 0x1b, 0x74, 0x85, 0x2e, 0x24, + 0x70, 0xea, 0x31, 0x5a, 0x1c, 0xbe, 0x43, 0xb5, + }, + }, + { + .data_len = 65, + .digest = { + 0x32, 0x48, 0xb0, 0xf0, 0x3f, 0xbb, 0xd2, 0xa3, + 0xfd, 0xf6, 0x28, 0x4a, 0x2a, 0xc5, 0xbe, 0x4b, + 0x73, 0x50, 0x63, 0xd6, 0x16, 0x00, 0xef, 0xed, + 0xfe, 0x97, 0x41, 0x29, 0xb2, 0x84, 0xc4, 0xa3, + }, + }, + { + .data_len = 127, + .digest = { + 0x17, 0xda, 0x6b, 0x96, 0x6a, 0xa6, 0xa4, 0xa6, + 0xa6, 0xf3, 0x9d, 0x18, 0x19, 0x8d, 0x98, 0x7c, + 0x66, 0x38, 0xe8, 0x99, 0xe7, 0x0a, 0x50, 0x92, + 0xaf, 0x11, 0x80, 0x05, 0x66, 0xed, 0xab, 0x74, + }, + }, + { + .data_len = 128, + .digest = { + 0x13, 0xd5, 0x8b, 0x22, 0xae, 0x90, 0x7b, 0x67, + 0x87, 0x4e, 0x3c, 0x35, 0x4e, 0x01, 0xf0, 0xb1, + 0xd3, 0xd1, 0x67, 0xbb, 0x43, 0xdb, 0x7c, 0x75, + 0xa4, 0xc7, 0x64, 0x83, 0x1e, 0x9b, 0x98, 0xad, + }, + }, + { + .data_len = 129, + .digest = { + 0x6f, 0xe0, 0x5d, 0x9d, 0xd5, 0x78, 0x29, 0xfb, + 0xd0, 0x77, 0xd1, 0x8a, 0xf0, 0x80, 0xcb, 0x81, + 0x71, 0x9e, 0x4d, 0x49, 0xde, 0x74, 0x2a, 0x37, + 0xc0, 0xd5, 0xf0, 0xfa, 0x50, 0xe6, 0x23, 0xfe, + }, + }, + { + .data_len = 256, + .digest = { + 0x89, 0xac, 0xf6, 0xe7, 0x5e, 0xba, 0x53, 0xf4, + 0x92, 0x32, 0xd5, 0x64, 0xfb, 0xc4, 0x08, 0xac, + 0x2c, 0x19, 0x6e, 0x63, 0x13, 0x75, 0xd0, 0x60, + 0x54, 0x35, 0x82, 0xc4, 0x6d, 0x03, 0x1a, 0x05, + }, + }, + { + .data_len = 511, + .digest = { + 0x1c, 0xaf, 0x94, 0x7d, 0x9c, 0xce, 0x57, 0x64, + 0xf8, 0xa8, 0x25, 0x45, 0x32, 0x86, 0x2b, 0x04, + 0xb3, 0x2e, 0x67, 0xca, 0x73, 0x04, 0x2f, 0xab, + 0xcc, 0xda, 0x9e, 0x42, 0xa1, 0xaf, 0x83, 0x5a, + }, + }, + { + .data_len = 513, + .digest = { + 0x21, 0xdf, 0xdc, 0x29, 0xd9, 0xfc, 0x7b, 0xe7, + 0x3a, 0xc4, 0xe1, 0x61, 0xc5, 0xb5, 0xe1, 0xee, + 0x7a, 0x9d, 0x0c, 0x66, 0x36, 0x63, 0xe4, 0x12, + 0x62, 0xe2, 0xf5, 0x68, 0x72, 0xfc, 0x1e, 0x18, + }, + }, + { + .data_len = 1000, + .digest = { + 0x6e, 0xc7, 0x2e, 0xac, 0xd0, 0xbb, 0x22, 0xe0, + 0xc2, 0x40, 0xb2, 0xfe, 0x8c, 0xaf, 0x9e, 0xcf, + 0x32, 0x06, 0xc6, 0x45, 0x29, 0xbd, 0xe0, 0x7f, + 0x53, 0x32, 0xc3, 0x2b, 0x2f, 0x68, 0x12, 0xcd, + }, + }, + { + .data_len = 3333, + .digest = { + 0x76, 0xba, 0x52, 0xb5, 0x09, 0xf5, 0x19, 0x09, + 0x70, 0x1c, 0x09, 0x28, 0xb4, 0xaa, 0x98, 0x6a, + 0x79, 0xe7, 0x5e, 0xcd, 0xe8, 0xa4, 0x73, 0x69, + 0x1f, 0xf8, 0x05, 0x0a, 0xb4, 0xfe, 0xf9, 0x63, + }, + }, + { + .data_len = 4096, + .digest = { + 0xf7, 0xad, 0xf9, 0xc8, 0x0e, 0x04, 0x2f, 0xdf, + 0xbe, 0x39, 0x79, 0x07, 0x0d, 0xd8, 0x1b, 0x06, + 0x42, 0x3a, 0x43, 0x93, 0xf6, 0x7c, 0xc4, 0xe5, + 0xc2, 0xd5, 0xd0, 0xa6, 0x35, 0x6c, 0xbd, 0x17, + }, + }, + { + .data_len = 4128, + .digest = { + 0x38, 0xd7, 0xab, 0x7e, 0x08, 0xdc, 0x1e, 0xab, + 0x55, 0xbb, 0x3b, 0x7b, 0x6a, 0x17, 0xcc, 0x79, + 0xa7, 0x02, 0x62, 0x66, 0x9b, 0xca, 0xee, 0xc0, + 0x3d, 0x75, 0x34, 0x2e, 0x55, 0x82, 0x26, 0x3c, + }, + }, + { + .data_len = 4160, + .digest = { + 0xf7, 0xeb, 0x2f, 0x24, 0x98, 0x54, 0x04, 0x5a, + 0x19, 0xe4, 0x12, 0x9d, 0x97, 0xbc, 0x87, 0xa5, + 0x0b, 0x85, 0x29, 0xa1, 0x36, 0x89, 0xc9, 0xba, + 0xa0, 0xe0, 0xac, 0x99, 0x7d, 0xa4, 0x51, 0x9f, + }, + }, + { + .data_len = 4224, + .digest = { + 0x8f, 0xe8, 0xa7, 0x79, 0x02, 0xbb, 0x4a, 0x56, + 0x66, 0x91, 0xef, 0x22, 0xd1, 0x09, 0x26, 0x6c, + 0xa9, 0x13, 0xd7, 0x44, 0xc7, 0x19, 0x9c, 0x0b, + 0xfb, 0x4f, 0xca, 0x72, 0x8f, 0x34, 0xf7, 0x82, + }, + }, + { + .data_len = 16384, + .digest = { + 0xaa, 0x21, 0xbb, 0x25, 0x4b, 0x66, 0x6e, 0x29, + 0x71, 0xc1, 0x44, 0x67, 0x19, 0xed, 0xe6, 0xe6, + 0x61, 0x13, 0xf4, 0xb7, 0x02, 0x94, 0x81, 0x0f, + 0xa7, 0x4d, 0xbb, 0x2c, 0xb8, 0xeb, 0x41, 0x0e, + }, + }, +}; + +static const u8 hash_testvec_consolidated[BLAKE2S_HASH_SIZE] = { + 0x84, 0x21, 0xbb, 0x73, 0x64, 0x47, 0x45, 0xe0, + 0xc1, 0x83, 0x78, 0xf1, 0xea, 0xe5, 0xfd, 0xdb, + 0x01, 0xda, 0xb7, 0x86, 0x70, 0x3b, 0x83, 0xb3, + 0xbc, 0xd9, 0xfd, 0x96, 0xbd, 0x50, 0x06, 0x67, +}; + +static const u8 blake2s_keyed_testvec_consolidated[BLAKE2S_HASH_SIZE] = { + 0xa6, 0xad, 0xcd, 0xb8, 0xd9, 0xdd, 0xc7, 0x70, + 0x07, 0x09, 0x7f, 0x9f, 0x41, 0xa9, 0x70, 0xa4, + 0x1c, 0xca, 0x61, 0xbb, 0x58, 0xb5, 0xb2, 0x1d, + 0xd1, 0x71, 0x16, 0xb0, 0x49, 0x4f, 0x9e, 0x1b, +}; diff --git a/lib/crypto/tests/blake2s_kunit.c b/lib/crypto/tests/blake2s_kunit.c new file mode 100644 index 000000000000..057c40132246 --- /dev/null +++ b/lib/crypto/tests/blake2s_kunit.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2025 Google LLC + */ +#include +#include "blake2s-testvecs.h" + +/* + * The following are compatibility functions that present BLAKE2s as an unkeyed + * hash function that produces hashes of fixed length BLAKE2S_HASH_SIZE, so that + * hash-test-template.h can be reused to test it. + */ + +static void blake2s_default(const u8 *data, size_t len, + u8 out[BLAKE2S_HASH_SIZE]) +{ + blake2s(out, data, NULL, BLAKE2S_HASH_SIZE, len, 0); +} + +static void blake2s_init_default(struct blake2s_state *state) +{ + blake2s_init(state, BLAKE2S_HASH_SIZE); +} + +/* + * Generate the HASH_KUNIT_CASES using hash-test-template.h. These test BLAKE2s + * with a key length of 0 and a hash length of BLAKE2S_HASH_SIZE. + */ +#define HASH blake2s_default +#define HASH_CTX blake2s_state +#define HASH_SIZE BLAKE2S_HASH_SIZE +#define HASH_INIT blake2s_init_default +#define HASH_UPDATE blake2s_update +#define HASH_FINAL blake2s_final +#include "hash-test-template.h" + +/* + * BLAKE2s specific test case which tests all possible combinations of key + * length and hash length. + */ +static void test_blake2s_all_key_and_hash_lens(struct kunit *test) +{ + const size_t data_len = 100; + u8 *data = &test_buf[0]; + u8 *key = data + data_len; + u8 *hash = key + BLAKE2S_KEY_SIZE; + struct blake2s_state main_state; + u8 main_hash[BLAKE2S_HASH_SIZE]; + + rand_bytes_seeded_from_len(data, data_len); + blake2s_init(&main_state, BLAKE2S_HASH_SIZE); + for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) { + rand_bytes_seeded_from_len(key, key_len); + for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) { + blake2s(hash, data, key, out_len, data_len, key_len); + blake2s_update(&main_state, hash, out_len); + } + } + blake2s_final(&main_state, main_hash); + KUNIT_ASSERT_MEMEQ(test, main_hash, blake2s_keyed_testvec_consolidated, + BLAKE2S_HASH_SIZE); +} + +/* + * BLAKE2s specific test case which tests using a guarded buffer for all allowed + * key lengths. Also tests both blake2s() and blake2s_init_key(). + */ +static void test_blake2s_with_guarded_key_buf(struct kunit *test) +{ + const size_t data_len = 100; + + rand_bytes(test_buf, data_len); + for (int key_len = 0; key_len <= BLAKE2S_KEY_SIZE; key_len++) { + u8 key[BLAKE2S_KEY_SIZE]; + u8 *guarded_key = &test_buf[TEST_BUF_LEN - key_len]; + u8 hash1[BLAKE2S_HASH_SIZE]; + u8 hash2[BLAKE2S_HASH_SIZE]; + struct blake2s_state state; + + rand_bytes(key, key_len); + memcpy(guarded_key, key, key_len); + + blake2s(hash1, test_buf, key, + BLAKE2S_HASH_SIZE, data_len, key_len); + blake2s(hash2, test_buf, guarded_key, + BLAKE2S_HASH_SIZE, data_len, key_len); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE); + + blake2s_init_key(&state, BLAKE2S_HASH_SIZE, + guarded_key, key_len); + blake2s_update(&state, test_buf, data_len); + blake2s_final(&state, hash2); + KUNIT_ASSERT_MEMEQ(test, hash1, hash2, BLAKE2S_HASH_SIZE); + } +} + +/* + * BLAKE2s specific test case which tests using a guarded output buffer for all + * allowed output lengths. + */ +static void test_blake2s_with_guarded_out_buf(struct kunit *test) +{ + const size_t data_len = 100; + + rand_bytes(test_buf, data_len); + for (int out_len = 1; out_len <= BLAKE2S_HASH_SIZE; out_len++) { + u8 hash[BLAKE2S_HASH_SIZE]; + u8 *guarded_hash = &test_buf[TEST_BUF_LEN - out_len]; + + blake2s(hash, test_buf, NULL, out_len, data_len, 0); + blake2s(guarded_hash, test_buf, NULL, out_len, data_len, 0); + KUNIT_ASSERT_MEMEQ(test, hash, guarded_hash, out_len); + } +} + +static struct kunit_case blake2s_test_cases[] = { + HASH_KUNIT_CASES, + KUNIT_CASE(test_blake2s_all_key_and_hash_lens), + KUNIT_CASE(test_blake2s_with_guarded_key_buf), + KUNIT_CASE(test_blake2s_with_guarded_out_buf), + KUNIT_CASE(benchmark_hash), + {}, +}; + +static struct kunit_suite blake2s_test_suite = { + .name = "blake2s", + .test_cases = blake2s_test_cases, + .suite_init = hash_suite_init, + .suite_exit = hash_suite_exit, +}; +kunit_test_suite(blake2s_test_suite); + +MODULE_DESCRIPTION("KUnit tests and benchmark for BLAKE2s"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From afc4e4a5f122183b38095daba2264123cc86d8ab Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:18 -0700 Subject: lib/crypto: tests: Migrate Curve25519 self-test to KUnit Move the Curve25519 test from an ad-hoc self-test to a KUnit test. Generally keep the same test logic for now, just translated to KUnit. There's one exception, which is that I dropped the incomplete test of curve25519_generic(). The approach I'm taking to cover the different implementations with the KUnit tests is to just rely on booting kernels in QEMU with different '-cpu' options, rather than try to make the tests (incompletely) test multiple implementations on one CPU. This way, both the test and the library API are simpler. This commit makes the file lib/crypto/curve25519.c no longer needed, as its only purpose was to call the self-test. However, keep it for now, since a later commit will add code to it again. Temporarily omit the default value of CRYPTO_SELFTESTS that the other lib/crypto/ KUnit tests have. It would cause a recursive kconfig dependency, since the Curve25519 code is still entangled with CRYPTO. A later commit will fix that. Link: https://lore.kernel.org/r/20250906213523.84915-8-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Makefile | 1 - lib/crypto/curve25519-selftest.c | 1321 ---------------------------------- lib/crypto/curve25519.c | 3 - lib/crypto/tests/Kconfig | 9 + lib/crypto/tests/Makefile | 1 + lib/crypto/tests/curve25519_kunit.c | 1332 +++++++++++++++++++++++++++++++++++ 6 files changed, 1342 insertions(+), 1325 deletions(-) delete mode 100644 lib/crypto/curve25519-selftest.c create mode 100644 lib/crypto/tests/curve25519_kunit.c (limited to 'lib') diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index ad27c5bf99e1..6c3be971ace0 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -87,7 +87,6 @@ endif obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o libcurve25519-y += curve25519.o -libcurve25519-$(CONFIG_CRYPTO_SELFTESTS) += curve25519-selftest.o obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o libdes-y := des.o diff --git a/lib/crypto/curve25519-selftest.c b/lib/crypto/curve25519-selftest.c deleted file mode 100644 index c85e85381e78..000000000000 --- a/lib/crypto/curve25519-selftest.c +++ /dev/null @@ -1,1321 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - */ - -#include - -struct curve25519_test_vector { - u8 private[CURVE25519_KEY_SIZE]; - u8 public[CURVE25519_KEY_SIZE]; - u8 result[CURVE25519_KEY_SIZE]; - bool valid; -}; -static const struct curve25519_test_vector curve25519_test_vectors[] __initconst = { - { - .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, - 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, - 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, - 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, - .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, - 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, - 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, - 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, - .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, - 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, - 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, - 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, - .valid = true - }, - { - .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, - 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, - 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, - 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, - .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, - 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, - 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, - 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, - .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, - 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, - 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, - 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, - .valid = true - }, - { - .private = { 1 }, - .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, - 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, - 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, - 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, - .valid = true - }, - { - .private = { 1 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, - 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, - 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, - 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, - .valid = true - }, - { - .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, - 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, - 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, - 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, - .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, - 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, - 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, - 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, - .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, - 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, - 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, - 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, - .valid = true - }, - { - .private = { 1, 2, 3, 4 }, - .public = { 0 }, - .result = { 0 }, - .valid = false - }, - { - .private = { 2, 4, 6, 8 }, - .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, - 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, - 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, - 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, - .result = { 0 }, - .valid = false - }, - { - .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, - .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, - 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, - 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, - 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, - .valid = true - }, - { - .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, - .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, - 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, - 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, - 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, - .valid = true - }, - /* wycheproof - normal case */ - { - .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, - 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, - 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, - 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, - .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, - 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, - 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, - 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, - .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, - 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, - 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, - 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, - 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, - 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, - 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, - .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, - 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, - 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, - 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, - .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, - 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, - 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, - 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, - 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, - 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, - 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, - .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, - 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, - 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, - 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, - .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, - 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, - 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, - 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, - 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, - 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, - 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, - .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, - 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, - 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, - 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, - .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, - 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, - 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, - 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, - 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, - 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, - 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, - .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, - 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, - 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, - 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, - .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, - 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, - 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, - 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, - .valid = true - }, - /* wycheproof - public key on twist */ - { - .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, - 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, - 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, - 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, - .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, - 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, - 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, - 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, - .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, - 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, - 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, - 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, - .valid = true - }, - /* wycheproof - public key = 0 */ - { - .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, - 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, - 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, - 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, - .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key = 1 */ - { - .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, - 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, - 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, - 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, - .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, - 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, - 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, - 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, - .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, - 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, - 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, - 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, - 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, - 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, - 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, - .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, - 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, - 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, - 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, - 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, - 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, - 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, - .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, - 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, - 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, - .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, - 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, - 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, - 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, - 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, - 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, - 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, - .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, - 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, - 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, - .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, - 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, - 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, - 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, - 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, - 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, - 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, - .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, - 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, - 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, - 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, - .valid = true - }, - /* wycheproof - edge case on twist */ - { - .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, - 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, - 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, - 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, - .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, - 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, - 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, - 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, - 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, - 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, - 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, - .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, - 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, - 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, - 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, - 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, - 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, - 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, - 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, - .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, - 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, - 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, - 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, - 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, - 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, - 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, - .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, - 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, - 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, - 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, - 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, - 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, - 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, - .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, - 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, - 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, - 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, - .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, - 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, - 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, - 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, - 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, - 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, - 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, - 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, - 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, - 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, - 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, - 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, - 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, - 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, - .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, - 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, - 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, - 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, - .valid = true - }, - /* wycheproof - edge case for public key */ - { - .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, - 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, - 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, - 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, - .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, - 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, - 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, - 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, - .valid = true - }, - /* wycheproof - public key with low order */ - { - .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, - 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, - 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, - 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, - .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, - 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, - 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, - 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, - 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, - 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, - 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, - .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, - 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, - 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, - 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, - 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, - 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, - 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, - .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, - 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, - 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, - 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, - .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, - 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, - 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, - 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, - .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, - 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, - 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, - 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, - .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, - 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, - 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, - 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, - .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, - 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, - 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, - 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, - .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, - 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, - 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, - 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, - 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, - 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, - 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, - .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, - 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, - 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, - 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, - 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, - 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, - 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, - .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, - 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, - 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, - 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, - .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key with low order */ - { - .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, - 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, - 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, - 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, - .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = false - }, - /* wycheproof - public key >= p */ - { - .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, - 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, - 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, - 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, - .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, - 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, - 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, - 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, - 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, - 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, - 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, - .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, - 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, - 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, - 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, - 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, - 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, - 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, - .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, - 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, - 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, - 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, - 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, - 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, - 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, - 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, - 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, - 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, - 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, - 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, - 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, - .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, - 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, - 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, - 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, - 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, - 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, - 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, - .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, - 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, - 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, - 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, - 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, - 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, - 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, - .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, - .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, - 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, - 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, - 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, - 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, - 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, - 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, - .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, - 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, - 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, - 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, - 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, - 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, - 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, - .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, - 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, - 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, - 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, - 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, - 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, - 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, - .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, - 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, - 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, - 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, - 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, - 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, - 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, - .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, - 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, - 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, - 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, - 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, - 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, - 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, - .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, - 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, - 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, - 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, - 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, - 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, - 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, - .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, - 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, - 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, - 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, - 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, - 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, - 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, - .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, - 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, - 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, - 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, - 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, - 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, - 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, - .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, - 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, - 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, - 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, - .valid = true - }, - /* wycheproof - public key >= p */ - { - .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, - 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, - 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, - 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, - .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, - .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, - 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, - 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, - 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, - .valid = true - }, - /* wycheproof - RFC 7748 */ - { - .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, - 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, - 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, - 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, - .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, - 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, - 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, - 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, - .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, - 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, - 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, - 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, - .valid = true - }, - /* wycheproof - RFC 7748 */ - { - .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, - 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, - 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, - 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, - .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, - 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, - 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, - 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, - .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, - 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, - 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, - 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, - 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, - 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, - 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, - .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, - 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, - 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, - 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, - .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, - 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, - 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, - 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, - .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, - 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, - 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, - 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, - .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, - 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, - 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, - 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, - .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, - 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, - 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, - 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, - .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, - 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, - 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, - 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, - .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, - 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, - 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, - 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, - .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, - 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, - 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, - 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, - .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, - 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, - 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, - 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, - .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, - 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, - 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, - 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, - .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, - 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, - 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, - 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, - .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, - 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, - 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, - 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, - .valid = true - }, - /* wycheproof - edge case for shared secret */ - { - .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, - 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, - 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, - 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, - .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, - 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, - 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, - 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, - .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, - 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, - 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, - 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, - .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, - 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, - 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, - 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, - 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, - 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, - 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, - .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, - 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, - 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, - 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, - 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, - 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, - 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, - .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, - 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, - 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, - 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, - 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, - 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, - 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, - .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, - 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, - 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, - 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, - .valid = true - }, - /* wycheproof - checking for overflow */ - { - .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, - 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, - 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, - 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, - .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, - 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, - 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, - 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, - .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, - 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, - 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, - 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, - .valid = true - }, - /* wycheproof - private key == -1 (mod order) */ - { - .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, - 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, - .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, - 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, - 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, - 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, - .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, - 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, - 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, - 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, - .valid = true - }, - /* wycheproof - private key == 1 (mod order) on twist */ - { - .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, - 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, - .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, - 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, - 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, - 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, - .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, - 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, - 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, - 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, - .valid = true - } -}; - -bool __init curve25519_selftest(void) -{ - bool success = true, ret, ret2; - size_t i = 0, j; - u8 in[CURVE25519_KEY_SIZE]; - u8 out[CURVE25519_KEY_SIZE], out2[CURVE25519_KEY_SIZE], - out3[CURVE25519_KEY_SIZE]; - - for (i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { - memset(out, 0, CURVE25519_KEY_SIZE); - ret = curve25519(out, curve25519_test_vectors[i].private, - curve25519_test_vectors[i].public); - if (ret != curve25519_test_vectors[i].valid || - memcmp(out, curve25519_test_vectors[i].result, - CURVE25519_KEY_SIZE)) { - pr_err("curve25519 self-test %zu: FAIL\n", i + 1); - success = false; - } - } - - for (i = 0; i < 5; ++i) { - get_random_bytes(in, sizeof(in)); - ret = curve25519_generate_public(out, in); - ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); - curve25519_generic(out3, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); - if (ret != ret2 || - memcmp(out, out2, CURVE25519_KEY_SIZE) || - memcmp(out, out3, CURVE25519_KEY_SIZE)) { - pr_err("curve25519 basepoint self-test %zu: FAIL: input - 0x", - i + 1); - for (j = CURVE25519_KEY_SIZE; j-- > 0;) - printk(KERN_CONT "%02x", in[j]); - printk(KERN_CONT "\n"); - success = false; - } - } - - return success; -} diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 6850b76a80c9..25f16777865b 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -15,9 +15,6 @@ static int __init curve25519_init(void) { - if (IS_ENABLED(CONFIG_CRYPTO_SELFTESTS) && - WARN_ON(!curve25519_selftest())) - return -ENODEV; return 0; } diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index fd341aa12f15..eaca60d3e0a3 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -10,6 +10,15 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST help KUnit tests for the BLAKE2s cryptographic hash function. +config CRYPTO_LIB_CURVE25519_KUNIT_TEST + tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + select CRYPTO_LIB_BENCHMARK_VISIBLE + select CRYPTO_LIB_CURVE25519 + help + KUnit tests for the Curve25519 Diffie-Hellman function. + config CRYPTO_LIB_MD5_KUNIT_TEST tristate "KUnit tests for MD5" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/crypto/tests/Makefile b/lib/crypto/tests/Makefile index be7de929af2c..a71fad19922b 100644 --- a/lib/crypto/tests/Makefile +++ b/lib/crypto/tests/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-or-later obj-$(CONFIG_CRYPTO_LIB_BLAKE2S_KUNIT_TEST) += blake2s_kunit.o +obj-$(CONFIG_CRYPTO_LIB_CURVE25519_KUNIT_TEST) += curve25519_kunit.o obj-$(CONFIG_CRYPTO_LIB_MD5_KUNIT_TEST) += md5_kunit.o obj-$(CONFIG_CRYPTO_LIB_POLY1305_KUNIT_TEST) += poly1305_kunit.o obj-$(CONFIG_CRYPTO_LIB_SHA1_KUNIT_TEST) += sha1_kunit.o diff --git a/lib/crypto/tests/curve25519_kunit.c b/lib/crypto/tests/curve25519_kunit.c new file mode 100644 index 000000000000..68eab75d40dc --- /dev/null +++ b/lib/crypto/tests/curve25519_kunit.c @@ -0,0 +1,1332 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + */ + +#include +#include + +struct curve25519_test_vector { + u8 private[CURVE25519_KEY_SIZE]; + u8 public[CURVE25519_KEY_SIZE]; + u8 result[CURVE25519_KEY_SIZE]; + bool valid; +}; +static const struct curve25519_test_vector curve25519_test_vectors[] = { + { + .private = { 0x77, 0x07, 0x6d, 0x0a, 0x73, 0x18, 0xa5, 0x7d, + 0x3c, 0x16, 0xc1, 0x72, 0x51, 0xb2, 0x66, 0x45, + 0xdf, 0x4c, 0x2f, 0x87, 0xeb, 0xc0, 0x99, 0x2a, + 0xb1, 0x77, 0xfb, 0xa5, 0x1d, 0xb9, 0x2c, 0x2a }, + .public = { 0xde, 0x9e, 0xdb, 0x7d, 0x7b, 0x7d, 0xc1, 0xb4, + 0xd3, 0x5b, 0x61, 0xc2, 0xec, 0xe4, 0x35, 0x37, + 0x3f, 0x83, 0x43, 0xc8, 0x5b, 0x78, 0x67, 0x4d, + 0xad, 0xfc, 0x7e, 0x14, 0x6f, 0x88, 0x2b, 0x4f }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, + .valid = true + }, + { + .private = { 0x5d, 0xab, 0x08, 0x7e, 0x62, 0x4a, 0x8a, 0x4b, + 0x79, 0xe1, 0x7f, 0x8b, 0x83, 0x80, 0x0e, 0xe6, + 0x6f, 0x3b, 0xb1, 0x29, 0x26, 0x18, 0xb6, 0xfd, + 0x1c, 0x2f, 0x8b, 0x27, 0xff, 0x88, 0xe0, 0xeb }, + .public = { 0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, + 0x74, 0x8b, 0x7d, 0xdc, 0xb4, 0x3e, 0xf7, 0x5a, + 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38, 0x1a, 0xf4, + 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a }, + .result = { 0x4a, 0x5d, 0x9d, 0x5b, 0xa4, 0xce, 0x2d, 0xe1, + 0x72, 0x8e, 0x3b, 0xf4, 0x80, 0x35, 0x0f, 0x25, + 0xe0, 0x7e, 0x21, 0xc9, 0x47, 0xd1, 0x9e, 0x33, + 0x76, 0xf0, 0x9b, 0x3c, 0x1e, 0x16, 0x17, 0x42 }, + .valid = true + }, + { + .private = { 1 }, + .public = { 0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x3c, 0x77, 0x77, 0xca, 0xf9, 0x97, 0xb2, 0x64, + 0x41, 0x60, 0x77, 0x66, 0x5b, 0x4e, 0x22, 0x9d, + 0x0b, 0x95, 0x48, 0xdc, 0x0c, 0xd8, 0x19, 0x98, + 0xdd, 0xcd, 0xc5, 0xc8, 0x53, 0x3c, 0x79, 0x7f }, + .valid = true + }, + { + .private = { 1 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xb3, 0x2d, 0x13, 0x62, 0xc2, 0x48, 0xd6, 0x2f, + 0xe6, 0x26, 0x19, 0xcf, 0xf0, 0x4d, 0xd4, 0x3d, + 0xb7, 0x3f, 0xfc, 0x1b, 0x63, 0x08, 0xed, 0xe3, + 0x0b, 0x78, 0xd8, 0x73, 0x80, 0xf1, 0xe8, 0x34 }, + .valid = true + }, + { + .private = { 0xa5, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0xc4 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, + .valid = true + }, + { + .private = { 1, 2, 3, 4 }, + .public = { 0 }, + .result = { 0 }, + .valid = false + }, + { + .private = { 2, 4, 6, 8 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8 }, + .result = { 0 }, + .valid = false + }, + { + .private = { 0xff, 0xff, 0xff, 0xff, 0x0a, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0x0a, 0x00, 0xfb, 0x9f }, + .result = { 0x77, 0x52, 0xb6, 0x18, 0xc1, 0x2d, 0x48, 0xd2, + 0xc6, 0x93, 0x46, 0x83, 0x81, 0x7c, 0xc6, 0x57, + 0xf3, 0x31, 0x03, 0x19, 0x49, 0x48, 0x20, 0x05, + 0x42, 0x2b, 0x4e, 0xae, 0x8d, 0x1d, 0x43, 0x23 }, + .valid = true + }, + { + .private = { 0x8e, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8e, 0x06 }, + .result = { 0x5a, 0xdf, 0xaa, 0x25, 0x86, 0x8e, 0x32, 0x3d, + 0xae, 0x49, 0x62, 0xc1, 0x01, 0x5c, 0xb3, 0x12, + 0xe1, 0xc5, 0xc7, 0x9e, 0x95, 0x3f, 0x03, 0x99, + 0xb0, 0xba, 0x16, 0x22, 0xf3, 0xb6, 0xf7, 0x0c }, + .valid = true + }, + /* wycheproof - normal case */ + { + .private = { 0x48, 0x52, 0x83, 0x4d, 0x9d, 0x6b, 0x77, 0xda, + 0xde, 0xab, 0xaa, 0xf2, 0xe1, 0x1d, 0xca, 0x66, + 0xd1, 0x9f, 0xe7, 0x49, 0x93, 0xa7, 0xbe, 0xc3, + 0x6c, 0x6e, 0x16, 0xa0, 0x98, 0x3f, 0xea, 0xba }, + .public = { 0x9c, 0x64, 0x7d, 0x9a, 0xe5, 0x89, 0xb9, 0xf5, + 0x8f, 0xdc, 0x3c, 0xa4, 0x94, 0x7e, 0xfb, 0xc9, + 0x15, 0xc4, 0xb2, 0xe0, 0x8e, 0x74, 0x4a, 0x0e, + 0xdf, 0x46, 0x9d, 0xac, 0x59, 0xc8, 0xf8, 0x5a }, + .result = { 0x87, 0xb7, 0xf2, 0x12, 0xb6, 0x27, 0xf7, 0xa5, + 0x4c, 0xa5, 0xe0, 0xbc, 0xda, 0xdd, 0xd5, 0x38, + 0x9d, 0x9d, 0xe6, 0x15, 0x6c, 0xdb, 0xcf, 0x8e, + 0xbe, 0x14, 0xff, 0xbc, 0xfb, 0x43, 0x65, 0x51 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x58, 0x8c, 0x06, 0x1a, 0x50, 0x80, 0x4a, 0xc4, + 0x88, 0xad, 0x77, 0x4a, 0xc7, 0x16, 0xc3, 0xf5, + 0xba, 0x71, 0x4b, 0x27, 0x12, 0xe0, 0x48, 0x49, + 0x13, 0x79, 0xa5, 0x00, 0x21, 0x19, 0x98, 0xa8 }, + .public = { 0x63, 0xaa, 0x40, 0xc6, 0xe3, 0x83, 0x46, 0xc5, + 0xca, 0xf2, 0x3a, 0x6d, 0xf0, 0xa5, 0xe6, 0xc8, + 0x08, 0x89, 0xa0, 0x86, 0x47, 0xe5, 0x51, 0xb3, + 0x56, 0x34, 0x49, 0xbe, 0xfc, 0xfc, 0x97, 0x33 }, + .result = { 0xb1, 0xa7, 0x07, 0x51, 0x94, 0x95, 0xff, 0xff, + 0xb2, 0x98, 0xff, 0x94, 0x17, 0x16, 0xb0, 0x6d, + 0xfa, 0xb8, 0x7c, 0xf8, 0xd9, 0x11, 0x23, 0xfe, + 0x2b, 0xe9, 0xa2, 0x33, 0xdd, 0xa2, 0x22, 0x12 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0xb0, 0x5b, 0xfd, 0x32, 0xe5, 0x53, 0x25, 0xd9, + 0xfd, 0x64, 0x8c, 0xb3, 0x02, 0x84, 0x80, 0x39, + 0x00, 0x0b, 0x39, 0x0e, 0x44, 0xd5, 0x21, 0xe5, + 0x8a, 0xab, 0x3b, 0x29, 0xa6, 0x96, 0x0b, 0xa8 }, + .public = { 0x0f, 0x83, 0xc3, 0x6f, 0xde, 0xd9, 0xd3, 0x2f, + 0xad, 0xf4, 0xef, 0xa3, 0xae, 0x93, 0xa9, 0x0b, + 0xb5, 0xcf, 0xa6, 0x68, 0x93, 0xbc, 0x41, 0x2c, + 0x43, 0xfa, 0x72, 0x87, 0xdb, 0xb9, 0x97, 0x79 }, + .result = { 0x67, 0xdd, 0x4a, 0x6e, 0x16, 0x55, 0x33, 0x53, + 0x4c, 0x0e, 0x3f, 0x17, 0x2e, 0x4a, 0xb8, 0x57, + 0x6b, 0xca, 0x92, 0x3a, 0x5f, 0x07, 0xb2, 0xc0, + 0x69, 0xb4, 0xc3, 0x10, 0xff, 0x2e, 0x93, 0x5b }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x70, 0xe3, 0x4b, 0xcb, 0xe1, 0xf4, 0x7f, 0xbc, + 0x0f, 0xdd, 0xfd, 0x7c, 0x1e, 0x1a, 0xa5, 0x3d, + 0x57, 0xbf, 0xe0, 0xf6, 0x6d, 0x24, 0x30, 0x67, + 0xb4, 0x24, 0xbb, 0x62, 0x10, 0xbe, 0xd1, 0x9c }, + .public = { 0x0b, 0x82, 0x11, 0xa2, 0xb6, 0x04, 0x90, 0x97, + 0xf6, 0x87, 0x1c, 0x6c, 0x05, 0x2d, 0x3c, 0x5f, + 0xc1, 0xba, 0x17, 0xda, 0x9e, 0x32, 0xae, 0x45, + 0x84, 0x03, 0xb0, 0x5b, 0xb2, 0x83, 0x09, 0x2a }, + .result = { 0x4a, 0x06, 0x38, 0xcf, 0xaa, 0x9e, 0xf1, 0x93, + 0x3b, 0x47, 0xf8, 0x93, 0x92, 0x96, 0xa6, 0xb2, + 0x5b, 0xe5, 0x41, 0xef, 0x7f, 0x70, 0xe8, 0x44, + 0xc0, 0xbc, 0xc0, 0x0b, 0x13, 0x4d, 0xe6, 0x4a }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0x68, 0xc1, 0xf3, 0xa6, 0x53, 0xa4, 0xcd, 0xb1, + 0xd3, 0x7b, 0xba, 0x94, 0x73, 0x8f, 0x8b, 0x95, + 0x7a, 0x57, 0xbe, 0xb2, 0x4d, 0x64, 0x6e, 0x99, + 0x4d, 0xc2, 0x9a, 0x27, 0x6a, 0xad, 0x45, 0x8d }, + .public = { 0x34, 0x3a, 0xc2, 0x0a, 0x3b, 0x9c, 0x6a, 0x27, + 0xb1, 0x00, 0x81, 0x76, 0x50, 0x9a, 0xd3, 0x07, + 0x35, 0x85, 0x6e, 0xc1, 0xc8, 0xd8, 0xfc, 0xae, + 0x13, 0x91, 0x2d, 0x08, 0xd1, 0x52, 0xf4, 0x6c }, + .result = { 0x39, 0x94, 0x91, 0xfc, 0xe8, 0xdf, 0xab, 0x73, + 0xb4, 0xf9, 0xf6, 0x11, 0xde, 0x8e, 0xa0, 0xb2, + 0x7b, 0x28, 0xf8, 0x59, 0x94, 0x25, 0x0b, 0x0f, + 0x47, 0x5d, 0x58, 0x5d, 0x04, 0x2a, 0xc2, 0x07 }, + .valid = true + }, + /* wycheproof - public key on twist */ + { + .private = { 0xd8, 0x77, 0xb2, 0x6d, 0x06, 0xdf, 0xf9, 0xd9, + 0xf7, 0xfd, 0x4c, 0x5b, 0x37, 0x69, 0xf8, 0xcd, + 0xd5, 0xb3, 0x05, 0x16, 0xa5, 0xab, 0x80, 0x6b, + 0xe3, 0x24, 0xff, 0x3e, 0xb6, 0x9e, 0xa0, 0xb2 }, + .public = { 0xfa, 0x69, 0x5f, 0xc7, 0xbe, 0x8d, 0x1b, 0xe5, + 0xbf, 0x70, 0x48, 0x98, 0xf3, 0x88, 0xc4, 0x52, + 0xba, 0xfd, 0xd3, 0xb8, 0xea, 0xe8, 0x05, 0xf8, + 0x68, 0x1a, 0x8d, 0x15, 0xc2, 0xd4, 0xe1, 0x42 }, + .result = { 0x2c, 0x4f, 0xe1, 0x1d, 0x49, 0x0a, 0x53, 0x86, + 0x17, 0x76, 0xb1, 0x3b, 0x43, 0x54, 0xab, 0xd4, + 0xcf, 0x5a, 0x97, 0x69, 0x9d, 0xb6, 0xe6, 0xc6, + 0x8c, 0x16, 0x26, 0xd0, 0x76, 0x62, 0xf7, 0x58 }, + .valid = true + }, + /* wycheproof - public key = 0 */ + { + .private = { 0x20, 0x74, 0x94, 0x03, 0x8f, 0x2b, 0xb8, 0x11, + 0xd4, 0x78, 0x05, 0xbc, 0xdf, 0x04, 0xa2, 0xac, + 0x58, 0x5a, 0xda, 0x7f, 0x2f, 0x23, 0x38, 0x9b, + 0xfd, 0x46, 0x58, 0xf9, 0xdd, 0xd4, 0xde, 0xbc }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key = 1 */ + { + .private = { 0x20, 0x2e, 0x89, 0x72, 0xb6, 0x1c, 0x7e, 0x61, + 0x93, 0x0e, 0xb9, 0x45, 0x0b, 0x50, 0x70, 0xea, + 0xe1, 0xc6, 0x70, 0x47, 0x56, 0x85, 0x54, 0x1f, + 0x04, 0x76, 0x21, 0x7e, 0x48, 0x18, 0xcf, 0xab }, + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x38, 0xdd, 0xe9, 0xf3, 0xe7, 0xb7, 0x99, 0x04, + 0x5f, 0x9a, 0xc3, 0x79, 0x3d, 0x4a, 0x92, 0x77, + 0xda, 0xde, 0xad, 0xc4, 0x1b, 0xec, 0x02, 0x90, + 0xf8, 0x1f, 0x74, 0x4f, 0x73, 0x77, 0x5f, 0x84 }, + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x9a, 0x2c, 0xfe, 0x84, 0xff, 0x9c, 0x4a, 0x97, + 0x39, 0x62, 0x5c, 0xae, 0x4a, 0x3b, 0x82, 0xa9, + 0x06, 0x87, 0x7a, 0x44, 0x19, 0x46, 0xf8, 0xd7, + 0xb3, 0xd7, 0x95, 0xfe, 0x8f, 0x5d, 0x16, 0x39 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x98, 0x57, 0xa9, 0x14, 0xe3, 0xc2, 0x90, 0x36, + 0xfd, 0x9a, 0x44, 0x2b, 0xa5, 0x26, 0xb5, 0xcd, + 0xcd, 0xf2, 0x82, 0x16, 0x15, 0x3e, 0x63, 0x6c, + 0x10, 0x67, 0x7a, 0xca, 0xb6, 0xbd, 0x6a, 0xa5 }, + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x4d, 0xa4, 0xe0, 0xaa, 0x07, 0x2c, 0x23, 0x2e, + 0xe2, 0xf0, 0xfa, 0x4e, 0x51, 0x9a, 0xe5, 0x0b, + 0x52, 0xc1, 0xed, 0xd0, 0x8a, 0x53, 0x4d, 0x4e, + 0xf3, 0x46, 0xc2, 0xe1, 0x06, 0xd2, 0x1d, 0x60 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x48, 0xe2, 0x13, 0x0d, 0x72, 0x33, 0x05, 0xed, + 0x05, 0xe6, 0xe5, 0x89, 0x4d, 0x39, 0x8a, 0x5e, + 0x33, 0x36, 0x7a, 0x8c, 0x6a, 0xac, 0x8f, 0xcd, + 0xf0, 0xa8, 0x8e, 0x4b, 0x42, 0x82, 0x0d, 0xb7 }, + .public = { 0xff, 0xff, 0xff, 0x03, 0x00, 0x00, 0xf8, 0xff, + 0xff, 0x1f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0xfe, 0xff, 0xff, 0x07, 0x00, + 0x00, 0xf0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00 }, + .result = { 0x9e, 0xd1, 0x0c, 0x53, 0x74, 0x7f, 0x64, 0x7f, + 0x82, 0xf4, 0x51, 0x25, 0xd3, 0xde, 0x15, 0xa1, + 0xe6, 0xb8, 0x24, 0x49, 0x6a, 0xb4, 0x04, 0x10, + 0xff, 0xcc, 0x3c, 0xfe, 0x95, 0x76, 0x0f, 0x3b }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x28, 0xf4, 0x10, 0x11, 0x69, 0x18, 0x51, 0xb3, + 0xa6, 0x2b, 0x64, 0x15, 0x53, 0xb3, 0x0d, 0x0d, + 0xfd, 0xdc, 0xb8, 0xff, 0xfc, 0xf5, 0x37, 0x00, + 0xa7, 0xbe, 0x2f, 0x6a, 0x87, 0x2e, 0x9f, 0xb0 }, + .public = { 0x00, 0x00, 0x00, 0xfc, 0xff, 0xff, 0x07, 0x00, + 0x00, 0xe0, 0xff, 0xff, 0x3f, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0x01, 0x00, 0x00, 0xf8, 0xff, + 0xff, 0x0f, 0x00, 0x00, 0xc0, 0xff, 0xff, 0x7f }, + .result = { 0xcf, 0x72, 0xb4, 0xaa, 0x6a, 0xa1, 0xc9, 0xf8, + 0x94, 0xf4, 0x16, 0x5b, 0x86, 0x10, 0x9a, 0xa4, + 0x68, 0x51, 0x76, 0x48, 0xe1, 0xf0, 0xcc, 0x70, + 0xe1, 0xab, 0x08, 0x46, 0x01, 0x76, 0x50, 0x6b }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0x18, 0xa9, 0x3b, 0x64, 0x99, 0xb9, 0xf6, 0xb3, + 0x22, 0x5c, 0xa0, 0x2f, 0xef, 0x41, 0x0e, 0x0a, + 0xde, 0xc2, 0x35, 0x32, 0x32, 0x1d, 0x2d, 0x8e, + 0xf1, 0xa6, 0xd6, 0x02, 0xa8, 0xc6, 0x5b, 0x83 }, + .public = { 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x5d, 0x50, 0xb6, 0x28, 0x36, 0xbb, 0x69, 0x57, + 0x94, 0x10, 0x38, 0x6c, 0xf7, 0xbb, 0x81, 0x1c, + 0x14, 0xbf, 0x85, 0xb1, 0xc7, 0xb1, 0x7e, 0x59, + 0x24, 0xc7, 0xff, 0xea, 0x91, 0xef, 0x9e, 0x12 }, + .valid = true + }, + /* wycheproof - edge case on twist */ + { + .private = { 0xc0, 0x1d, 0x13, 0x05, 0xa1, 0x33, 0x8a, 0x1f, + 0xca, 0xc2, 0xba, 0x7e, 0x2e, 0x03, 0x2b, 0x42, + 0x7e, 0x0b, 0x04, 0x90, 0x31, 0x65, 0xac, 0xa9, + 0x57, 0xd8, 0xd0, 0x55, 0x3d, 0x87, 0x17, 0xb0 }, + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x19, 0x23, 0x0e, 0xb1, 0x48, 0xd5, 0xd6, 0x7c, + 0x3c, 0x22, 0xab, 0x1d, 0xae, 0xff, 0x80, 0xa5, + 0x7e, 0xae, 0x42, 0x65, 0xce, 0x28, 0x72, 0x65, + 0x7b, 0x2c, 0x80, 0x99, 0xfc, 0x69, 0x8e, 0x50 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x38, 0x6f, 0x7f, 0x16, 0xc5, 0x07, 0x31, 0xd6, + 0x4f, 0x82, 0xe6, 0xa1, 0x70, 0xb1, 0x42, 0xa4, + 0xe3, 0x4f, 0x31, 0xfd, 0x77, 0x68, 0xfc, 0xb8, + 0x90, 0x29, 0x25, 0xe7, 0xd1, 0xe2, 0x1a, 0xbe }, + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x0f, 0xca, 0xb5, 0xd8, 0x42, 0xa0, 0x78, 0xd7, + 0xa7, 0x1f, 0xc5, 0x9b, 0x57, 0xbf, 0xb4, 0xca, + 0x0b, 0xe6, 0x87, 0x3b, 0x49, 0xdc, 0xdb, 0x9f, + 0x44, 0xe1, 0x4a, 0xe8, 0xfb, 0xdf, 0xa5, 0x42 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xe0, 0x23, 0xa2, 0x89, 0xbd, 0x5e, 0x90, 0xfa, + 0x28, 0x04, 0xdd, 0xc0, 0x19, 0xa0, 0x5e, 0xf3, + 0xe7, 0x9d, 0x43, 0x4b, 0xb6, 0xea, 0x2f, 0x52, + 0x2e, 0xcb, 0x64, 0x3a, 0x75, 0x29, 0x6e, 0x95 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00 }, + .result = { 0x54, 0xce, 0x8f, 0x22, 0x75, 0xc0, 0x77, 0xe3, + 0xb1, 0x30, 0x6a, 0x39, 0x39, 0xc5, 0xe0, 0x3e, + 0xef, 0x6b, 0xbb, 0x88, 0x06, 0x05, 0x44, 0x75, + 0x8d, 0x9f, 0xef, 0x59, 0xb0, 0xbc, 0x3e, 0x4f }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x68, 0xf0, 0x10, 0xd6, 0x2e, 0xe8, 0xd9, 0x26, + 0x05, 0x3a, 0x36, 0x1c, 0x3a, 0x75, 0xc6, 0xea, + 0x4e, 0xbd, 0xc8, 0x60, 0x6a, 0xb2, 0x85, 0x00, + 0x3a, 0x6f, 0x8f, 0x40, 0x76, 0xb0, 0x1e, 0x83 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, + .result = { 0xf1, 0x36, 0x77, 0x5c, 0x5b, 0xeb, 0x0a, 0xf8, + 0x11, 0x0a, 0xf1, 0x0b, 0x20, 0x37, 0x23, 0x32, + 0x04, 0x3c, 0xab, 0x75, 0x24, 0x19, 0x67, 0x87, + 0x75, 0xa2, 0x23, 0xdf, 0x57, 0xc9, 0xd3, 0x0d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x58, 0xeb, 0xcb, 0x35, 0xb0, 0xf8, 0x84, 0x5c, + 0xaf, 0x1e, 0xc6, 0x30, 0xf9, 0x65, 0x76, 0xb6, + 0x2c, 0x4b, 0x7b, 0x6c, 0x36, 0xb2, 0x9d, 0xeb, + 0x2c, 0xb0, 0x08, 0x46, 0x51, 0x75, 0x5c, 0x96 }, + .public = { 0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xfb, 0xff, + 0xff, 0xdf, 0xff, 0xff, 0xdf, 0xff, 0xff, 0xff, + 0xfe, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xf7, 0xff, + 0xff, 0xf7, 0xff, 0xff, 0xbf, 0xff, 0xff, 0x3f }, + .result = { 0xbf, 0x9a, 0xff, 0xd0, 0x6b, 0x84, 0x40, 0x85, + 0x58, 0x64, 0x60, 0x96, 0x2e, 0xf2, 0x14, 0x6f, + 0xf3, 0xd4, 0x53, 0x3d, 0x94, 0x44, 0xaa, 0xb0, + 0x06, 0xeb, 0x88, 0xcc, 0x30, 0x54, 0x40, 0x7d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0x18, 0x8c, 0x4b, 0xc5, 0xb9, 0xc4, 0x4b, 0x38, + 0xbb, 0x65, 0x8b, 0x9b, 0x2a, 0xe8, 0x2d, 0x5b, + 0x01, 0x01, 0x5e, 0x09, 0x31, 0x84, 0xb1, 0x7c, + 0xb7, 0x86, 0x35, 0x03, 0xa7, 0x83, 0xe1, 0xbb }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .result = { 0xd4, 0x80, 0xde, 0x04, 0xf6, 0x99, 0xcb, 0x3b, + 0xe0, 0x68, 0x4a, 0x9c, 0xc2, 0xe3, 0x12, 0x81, + 0xea, 0x0b, 0xc5, 0xa9, 0xdc, 0xc1, 0x57, 0xd3, + 0xd2, 0x01, 0x58, 0xd4, 0x6c, 0xa5, 0x24, 0x6d }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xe0, 0x6c, 0x11, 0xbb, 0x2e, 0x13, 0xce, 0x3d, + 0xc7, 0x67, 0x3f, 0x67, 0xf5, 0x48, 0x22, 0x42, + 0x90, 0x94, 0x23, 0xa9, 0xae, 0x95, 0xee, 0x98, + 0x6a, 0x98, 0x8d, 0x98, 0xfa, 0xee, 0x23, 0xa2 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f, + 0xff, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0x7f }, + .result = { 0x4c, 0x44, 0x01, 0xcc, 0xe6, 0xb5, 0x1e, 0x4c, + 0xb1, 0x8f, 0x27, 0x90, 0x24, 0x6c, 0x9b, 0xf9, + 0x14, 0xdb, 0x66, 0x77, 0x50, 0xa1, 0xcb, 0x89, + 0x06, 0x90, 0x92, 0xaf, 0x07, 0x29, 0x22, 0x76 }, + .valid = true + }, + /* wycheproof - edge case for public key */ + { + .private = { 0xc0, 0x65, 0x8c, 0x46, 0xdd, 0xe1, 0x81, 0x29, + 0x29, 0x38, 0x77, 0x53, 0x5b, 0x11, 0x62, 0xb6, + 0xf9, 0xf5, 0x41, 0x4a, 0x23, 0xcf, 0x4d, 0x2c, + 0xbc, 0x14, 0x0a, 0x4d, 0x99, 0xda, 0x2b, 0x8f }, + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x57, 0x8b, 0xa8, 0xcc, 0x2d, 0xbd, 0xc5, 0x75, + 0xaf, 0xcf, 0x9d, 0xf2, 0xb3, 0xee, 0x61, 0x89, + 0xf5, 0x33, 0x7d, 0x68, 0x54, 0xc7, 0x9b, 0x4c, + 0xe1, 0x65, 0xea, 0x12, 0x29, 0x3b, 0x3a, 0x0f }, + .valid = true + }, + /* wycheproof - public key with low order */ + { + .private = { 0x10, 0x25, 0x5c, 0x92, 0x30, 0xa9, 0x7a, 0x30, + 0xa4, 0x58, 0xca, 0x28, 0x4a, 0x62, 0x96, 0x69, + 0x29, 0x3a, 0x31, 0x89, 0x0c, 0xda, 0x9d, 0x14, + 0x7f, 0xeb, 0xc7, 0xd1, 0xe2, 0x2d, 0x6b, 0xb1 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x78, 0xf1, 0xe8, 0xed, 0xf1, 0x44, 0x81, 0xb3, + 0x89, 0x44, 0x8d, 0xac, 0x8f, 0x59, 0xc7, 0x0b, + 0x03, 0x8e, 0x7c, 0xf9, 0x2e, 0xf2, 0xc7, 0xef, + 0xf5, 0x7a, 0x72, 0x46, 0x6e, 0x11, 0x52, 0x96 }, + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xa0, 0xa0, 0x5a, 0x3e, 0x8f, 0x9f, 0x44, 0x20, + 0x4d, 0x5f, 0x80, 0x59, 0xa9, 0x4a, 0xc7, 0xdf, + 0xc3, 0x9a, 0x49, 0xac, 0x01, 0x6d, 0xd7, 0x43, + 0xdb, 0xfa, 0x43, 0xc5, 0xd6, 0x71, 0xfd, 0x88 }, + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xd0, 0xdb, 0xb3, 0xed, 0x19, 0x06, 0x66, 0x3f, + 0x15, 0x42, 0x0a, 0xf3, 0x1f, 0x4e, 0xaf, 0x65, + 0x09, 0xd9, 0xa9, 0x94, 0x97, 0x23, 0x50, 0x06, + 0x05, 0xad, 0x7c, 0x1c, 0x6e, 0x74, 0x50, 0xa9 }, + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xc0, 0xb1, 0xd0, 0xeb, 0x22, 0xb2, 0x44, 0xfe, + 0x32, 0x91, 0x14, 0x00, 0x72, 0xcd, 0xd9, 0xd9, + 0x89, 0xb5, 0xf0, 0xec, 0xd9, 0x6c, 0x10, 0x0f, + 0xeb, 0x5b, 0xca, 0x24, 0x1c, 0x1d, 0x9f, 0x8f }, + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x48, 0x0b, 0xf4, 0x5f, 0x59, 0x49, 0x42, 0xa8, + 0xbc, 0x0f, 0x33, 0x53, 0xc6, 0xe8, 0xb8, 0x85, + 0x3d, 0x77, 0xf3, 0x51, 0xf1, 0xc2, 0xca, 0x6c, + 0x2d, 0x1a, 0xbf, 0x8a, 0x00, 0xb4, 0x22, 0x9c }, + .public = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x30, 0xf9, 0x93, 0xfc, 0xf8, 0x51, 0x4f, 0xc8, + 0x9b, 0xd8, 0xdb, 0x14, 0xcd, 0x43, 0xba, 0x0d, + 0x4b, 0x25, 0x30, 0xe7, 0x3c, 0x42, 0x76, 0xa0, + 0x5e, 0x1b, 0x14, 0x5d, 0x42, 0x0c, 0xed, 0xb4 }, + .public = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0xc0, 0x49, 0x74, 0xb7, 0x58, 0x38, 0x0e, 0x2a, + 0x5b, 0x5d, 0xf6, 0xeb, 0x09, 0xbb, 0x2f, 0x6b, + 0x34, 0x34, 0xf9, 0x82, 0x72, 0x2a, 0x8e, 0x67, + 0x6d, 0x3d, 0xa2, 0x51, 0xd1, 0xb3, 0xde, 0x83 }, + .public = { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x50, 0x2a, 0x31, 0x37, 0x3d, 0xb3, 0x24, 0x46, + 0x84, 0x2f, 0xe5, 0xad, 0xd3, 0xe0, 0x24, 0x02, + 0x2e, 0xa5, 0x4f, 0x27, 0x41, 0x82, 0xaf, 0xc3, + 0xd9, 0xf1, 0xbb, 0x3d, 0x39, 0x53, 0x4e, 0xb5 }, + .public = { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x90, 0xfa, 0x64, 0x17, 0xb0, 0xe3, 0x70, 0x30, + 0xfd, 0x6e, 0x43, 0xef, 0xf2, 0xab, 0xae, 0xf1, + 0x4c, 0x67, 0x93, 0x11, 0x7a, 0x03, 0x9c, 0xf6, + 0x21, 0x31, 0x8b, 0xa9, 0x0f, 0x4e, 0x98, 0xbe }, + .public = { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x78, 0xad, 0x3f, 0x26, 0x02, 0x7f, 0x1c, 0x9f, + 0xdd, 0x97, 0x5a, 0x16, 0x13, 0xb9, 0x47, 0x77, + 0x9b, 0xad, 0x2c, 0xf2, 0xb7, 0x41, 0xad, 0xe0, + 0x18, 0x40, 0x88, 0x5a, 0x30, 0xbb, 0x97, 0x9c }, + .public = { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key with low order */ + { + .private = { 0x98, 0xe2, 0x3d, 0xe7, 0xb1, 0xe0, 0x92, 0x6e, + 0xd9, 0xc8, 0x7e, 0x7b, 0x14, 0xba, 0xf5, 0x5f, + 0x49, 0x7a, 0x1d, 0x70, 0x96, 0xf9, 0x39, 0x77, + 0x68, 0x0e, 0x44, 0xdc, 0x1c, 0x7b, 0x7b, 0x8b }, + .public = { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = false + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf0, 0x1e, 0x48, 0xda, 0xfa, 0xc9, 0xd7, 0xbc, + 0xf5, 0x89, 0xcb, 0xc3, 0x82, 0xc8, 0x78, 0xd1, + 0x8b, 0xda, 0x35, 0x50, 0x58, 0x9f, 0xfb, 0x5d, + 0x50, 0xb5, 0x23, 0xbe, 0xbe, 0x32, 0x9d, 0xae }, + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0xbd, 0x36, 0xa0, 0x79, 0x0e, 0xb8, 0x83, 0x09, + 0x8c, 0x98, 0x8b, 0x21, 0x78, 0x67, 0x73, 0xde, + 0x0b, 0x3a, 0x4d, 0xf1, 0x62, 0x28, 0x2c, 0xf1, + 0x10, 0xde, 0x18, 0xdd, 0x48, 0x4c, 0xe7, 0x4b }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x28, 0x87, 0x96, 0xbc, 0x5a, 0xff, 0x4b, 0x81, + 0xa3, 0x75, 0x01, 0x75, 0x7b, 0xc0, 0x75, 0x3a, + 0x3c, 0x21, 0x96, 0x47, 0x90, 0xd3, 0x86, 0x99, + 0x30, 0x8d, 0xeb, 0xc1, 0x7a, 0x6e, 0xaf, 0x8d }, + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0xb4, 0xe0, 0xdd, 0x76, 0xda, 0x7b, 0x07, 0x17, + 0x28, 0xb6, 0x1f, 0x85, 0x67, 0x71, 0xaa, 0x35, + 0x6e, 0x57, 0xed, 0xa7, 0x8a, 0x5b, 0x16, 0x55, + 0xcc, 0x38, 0x20, 0xfb, 0x5f, 0x85, 0x4c, 0x5c }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x98, 0xdf, 0x84, 0x5f, 0x66, 0x51, 0xbf, 0x11, + 0x38, 0x22, 0x1f, 0x11, 0x90, 0x41, 0xf7, 0x2b, + 0x6d, 0xbc, 0x3c, 0x4a, 0xce, 0x71, 0x43, 0xd9, + 0x9f, 0xd5, 0x5a, 0xd8, 0x67, 0x48, 0x0d, 0xa8 }, + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x6f, 0xdf, 0x6c, 0x37, 0x61, 0x1d, 0xbd, 0x53, + 0x04, 0xdc, 0x0f, 0x2e, 0xb7, 0xc9, 0x51, 0x7e, + 0xb3, 0xc5, 0x0e, 0x12, 0xfd, 0x05, 0x0a, 0xc6, + 0xde, 0xc2, 0x70, 0x71, 0xd4, 0xbf, 0xc0, 0x34 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf0, 0x94, 0x98, 0xe4, 0x6f, 0x02, 0xf8, 0x78, + 0x82, 0x9e, 0x78, 0xb8, 0x03, 0xd3, 0x16, 0xa2, + 0xed, 0x69, 0x5d, 0x04, 0x98, 0xa0, 0x8a, 0xbd, + 0xf8, 0x27, 0x69, 0x30, 0xe2, 0x4e, 0xdc, 0xb0 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .result = { 0x4c, 0x8f, 0xc4, 0xb1, 0xc6, 0xab, 0x88, 0xfb, + 0x21, 0xf1, 0x8f, 0x6d, 0x4c, 0x81, 0x02, 0x40, + 0xd4, 0xe9, 0x46, 0x51, 0xba, 0x44, 0xf7, 0xa2, + 0xc8, 0x63, 0xce, 0xc7, 0xdc, 0x56, 0x60, 0x2d }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x18, 0x13, 0xc1, 0x0a, 0x5c, 0x7f, 0x21, 0xf9, + 0x6e, 0x17, 0xf2, 0x88, 0xc0, 0xcc, 0x37, 0x60, + 0x7c, 0x04, 0xc5, 0xf5, 0xae, 0xa2, 0xdb, 0x13, + 0x4f, 0x9e, 0x2f, 0xfc, 0x66, 0xbd, 0x9d, 0xb8 }, + .public = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x1c, 0xd0, 0xb2, 0x82, 0x67, 0xdc, 0x54, 0x1c, + 0x64, 0x2d, 0x6d, 0x7d, 0xca, 0x44, 0xa8, 0xb3, + 0x8a, 0x63, 0x73, 0x6e, 0xef, 0x5c, 0x4e, 0x65, + 0x01, 0xff, 0xbb, 0xb1, 0x78, 0x0c, 0x03, 0x3c }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x78, 0x57, 0xfb, 0x80, 0x86, 0x53, 0x64, 0x5a, + 0x0b, 0xeb, 0x13, 0x8a, 0x64, 0xf5, 0xf4, 0xd7, + 0x33, 0xa4, 0x5e, 0xa8, 0x4c, 0x3c, 0xda, 0x11, + 0xa9, 0xc0, 0x6f, 0x7e, 0x71, 0x39, 0x14, 0x9e }, + .public = { 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x87, 0x55, 0xbe, 0x01, 0xc6, 0x0a, 0x7e, 0x82, + 0x5c, 0xff, 0x3e, 0x0e, 0x78, 0xcb, 0x3a, 0xa4, + 0x33, 0x38, 0x61, 0x51, 0x6a, 0xa5, 0x9b, 0x1c, + 0x51, 0xa8, 0xb2, 0xa5, 0x43, 0xdf, 0xa8, 0x22 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xe0, 0x3a, 0xa8, 0x42, 0xe2, 0xab, 0xc5, 0x6e, + 0x81, 0xe8, 0x7b, 0x8b, 0x9f, 0x41, 0x7b, 0x2a, + 0x1e, 0x59, 0x13, 0xc7, 0x23, 0xee, 0xd2, 0x8d, + 0x75, 0x2f, 0x8d, 0x47, 0xa5, 0x9f, 0x49, 0x8f }, + .public = { 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 }, + .result = { 0x54, 0xc9, 0xa1, 0xed, 0x95, 0xe5, 0x46, 0xd2, + 0x78, 0x22, 0xa3, 0x60, 0x93, 0x1d, 0xda, 0x60, + 0xa1, 0xdf, 0x04, 0x9d, 0xa6, 0xf9, 0x04, 0x25, + 0x3c, 0x06, 0x12, 0xbb, 0xdc, 0x08, 0x74, 0x76 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xf8, 0xf7, 0x07, 0xb7, 0x99, 0x9b, 0x18, 0xcb, + 0x0d, 0x6b, 0x96, 0x12, 0x4f, 0x20, 0x45, 0x97, + 0x2c, 0xa2, 0x74, 0xbf, 0xc1, 0x54, 0xad, 0x0c, + 0x87, 0x03, 0x8c, 0x24, 0xc6, 0xd0, 0xd4, 0xb2 }, + .public = { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xcc, 0x1f, 0x40, 0xd7, 0x43, 0xcd, 0xc2, 0x23, + 0x0e, 0x10, 0x43, 0xda, 0xba, 0x8b, 0x75, 0xe8, + 0x10, 0xf1, 0xfb, 0xab, 0x7f, 0x25, 0x52, 0x69, + 0xbd, 0x9e, 0xbb, 0x29, 0xe6, 0xbf, 0x49, 0x4f }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xa0, 0x34, 0xf6, 0x84, 0xfa, 0x63, 0x1e, 0x1a, + 0x34, 0x81, 0x18, 0xc1, 0xce, 0x4c, 0x98, 0x23, + 0x1f, 0x2d, 0x9e, 0xec, 0x9b, 0xa5, 0x36, 0x5b, + 0x4a, 0x05, 0xd6, 0x9a, 0x78, 0x5b, 0x07, 0x96 }, + .public = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x54, 0x99, 0x8e, 0xe4, 0x3a, 0x5b, 0x00, 0x7b, + 0xf4, 0x99, 0xf0, 0x78, 0xe7, 0x36, 0x52, 0x44, + 0x00, 0xa8, 0xb5, 0xc7, 0xe9, 0xb9, 0xb4, 0x37, + 0x71, 0x74, 0x8c, 0x7c, 0xdf, 0x88, 0x04, 0x12 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x30, 0xb6, 0xc6, 0xa0, 0xf2, 0xff, 0xa6, 0x80, + 0x76, 0x8f, 0x99, 0x2b, 0xa8, 0x9e, 0x15, 0x2d, + 0x5b, 0xc9, 0x89, 0x3d, 0x38, 0xc9, 0x11, 0x9b, + 0xe4, 0xf7, 0x67, 0xbf, 0xab, 0x6e, 0x0c, 0xa5 }, + .public = { 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xea, 0xd9, 0xb3, 0x8e, 0xfd, 0xd7, 0x23, 0x63, + 0x79, 0x34, 0xe5, 0x5a, 0xb7, 0x17, 0xa7, 0xae, + 0x09, 0xeb, 0x86, 0xa2, 0x1d, 0xc3, 0x6a, 0x3f, + 0xee, 0xb8, 0x8b, 0x75, 0x9e, 0x39, 0x1e, 0x09 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x90, 0x1b, 0x9d, 0xcf, 0x88, 0x1e, 0x01, 0xe0, + 0x27, 0x57, 0x50, 0x35, 0xd4, 0x0b, 0x43, 0xbd, + 0xc1, 0xc5, 0x24, 0x2e, 0x03, 0x08, 0x47, 0x49, + 0x5b, 0x0c, 0x72, 0x86, 0x46, 0x9b, 0x65, 0x91 }, + .public = { 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x60, 0x2f, 0xf4, 0x07, 0x89, 0xb5, 0x4b, 0x41, + 0x80, 0x59, 0x15, 0xfe, 0x2a, 0x62, 0x21, 0xf0, + 0x7a, 0x50, 0xff, 0xc2, 0xc3, 0xfc, 0x94, 0xcf, + 0x61, 0xf1, 0x3d, 0x79, 0x04, 0xe8, 0x8e, 0x0e }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x80, 0x46, 0x67, 0x7c, 0x28, 0xfd, 0x82, 0xc9, + 0xa1, 0xbd, 0xb7, 0x1a, 0x1a, 0x1a, 0x34, 0xfa, + 0xba, 0x12, 0x25, 0xe2, 0x50, 0x7f, 0xe3, 0xf5, + 0x4d, 0x10, 0xbd, 0x5b, 0x0d, 0x86, 0x5f, 0x8e }, + .public = { 0xeb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0xe0, 0x0a, 0xe8, 0xb1, 0x43, 0x47, 0x12, 0x47, + 0xba, 0x24, 0xf1, 0x2c, 0x88, 0x55, 0x36, 0xc3, + 0xcb, 0x98, 0x1b, 0x58, 0xe1, 0xe5, 0x6b, 0x2b, + 0xaf, 0x35, 0xc1, 0x2a, 0xe1, 0xf7, 0x9c, 0x26 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x60, 0x2f, 0x7e, 0x2f, 0x68, 0xa8, 0x46, 0xb8, + 0x2c, 0xc2, 0x69, 0xb1, 0xd4, 0x8e, 0x93, 0x98, + 0x86, 0xae, 0x54, 0xfd, 0x63, 0x6c, 0x1f, 0xe0, + 0x74, 0xd7, 0x10, 0x12, 0x7d, 0x47, 0x24, 0x91 }, + .public = { 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x98, 0xcb, 0x9b, 0x50, 0xdd, 0x3f, 0xc2, 0xb0, + 0xd4, 0xf2, 0xd2, 0xbf, 0x7c, 0x5c, 0xfd, 0xd1, + 0x0c, 0x8f, 0xcd, 0x31, 0xfc, 0x40, 0xaf, 0x1a, + 0xd4, 0x4f, 0x47, 0xc1, 0x31, 0x37, 0x63, 0x62 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x60, 0x88, 0x7b, 0x3d, 0xc7, 0x24, 0x43, 0x02, + 0x6e, 0xbe, 0xdb, 0xbb, 0xb7, 0x06, 0x65, 0xf4, + 0x2b, 0x87, 0xad, 0xd1, 0x44, 0x0e, 0x77, 0x68, + 0xfb, 0xd7, 0xe8, 0xe2, 0xce, 0x5f, 0x63, 0x9d }, + .public = { 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x38, 0xd6, 0x30, 0x4c, 0x4a, 0x7e, 0x6d, 0x9f, + 0x79, 0x59, 0x33, 0x4f, 0xb5, 0x24, 0x5b, 0xd2, + 0xc7, 0x54, 0x52, 0x5d, 0x4c, 0x91, 0xdb, 0x95, + 0x02, 0x06, 0x92, 0x62, 0x34, 0xc1, 0xf6, 0x33 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0x78, 0xd3, 0x1d, 0xfa, 0x85, 0x44, 0x97, 0xd7, + 0x2d, 0x8d, 0xef, 0x8a, 0x1b, 0x7f, 0xb0, 0x06, + 0xce, 0xc2, 0xd8, 0xc4, 0x92, 0x46, 0x47, 0xc9, + 0x38, 0x14, 0xae, 0x56, 0xfa, 0xed, 0xa4, 0x95 }, + .public = { 0xf1, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x78, 0x6c, 0xd5, 0x49, 0x96, 0xf0, 0x14, 0xa5, + 0xa0, 0x31, 0xec, 0x14, 0xdb, 0x81, 0x2e, 0xd0, + 0x83, 0x55, 0x06, 0x1f, 0xdb, 0x5d, 0xe6, 0x80, + 0xa8, 0x00, 0xac, 0x52, 0x1f, 0x31, 0x8e, 0x23 }, + .valid = true + }, + /* wycheproof - public key >= p */ + { + .private = { 0xc0, 0x4c, 0x5b, 0xae, 0xfa, 0x83, 0x02, 0xdd, + 0xde, 0xd6, 0xa4, 0xbb, 0x95, 0x77, 0x61, 0xb4, + 0xeb, 0x97, 0xae, 0xfa, 0x4f, 0xc3, 0xb8, 0x04, + 0x30, 0x85, 0xf9, 0x6a, 0x56, 0x59, 0xb3, 0xa5 }, + .public = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + .result = { 0x29, 0xae, 0x8b, 0xc7, 0x3e, 0x9b, 0x10, 0xa0, + 0x8b, 0x4f, 0x68, 0x1c, 0x43, 0xc3, 0xe0, 0xac, + 0x1a, 0x17, 0x1d, 0x31, 0xb3, 0x8f, 0x1a, 0x48, + 0xef, 0xba, 0x29, 0xae, 0x63, 0x9e, 0xa1, 0x34 }, + .valid = true + }, + /* wycheproof - RFC 7748 */ + { + .private = { 0xa0, 0x46, 0xe3, 0x6b, 0xf0, 0x52, 0x7c, 0x9d, + 0x3b, 0x16, 0x15, 0x4b, 0x82, 0x46, 0x5e, 0xdd, + 0x62, 0x14, 0x4c, 0x0a, 0xc1, 0xfc, 0x5a, 0x18, + 0x50, 0x6a, 0x22, 0x44, 0xba, 0x44, 0x9a, 0x44 }, + .public = { 0xe6, 0xdb, 0x68, 0x67, 0x58, 0x30, 0x30, 0xdb, + 0x35, 0x94, 0xc1, 0xa4, 0x24, 0xb1, 0x5f, 0x7c, + 0x72, 0x66, 0x24, 0xec, 0x26, 0xb3, 0x35, 0x3b, + 0x10, 0xa9, 0x03, 0xa6, 0xd0, 0xab, 0x1c, 0x4c }, + .result = { 0xc3, 0xda, 0x55, 0x37, 0x9d, 0xe9, 0xc6, 0x90, + 0x8e, 0x94, 0xea, 0x4d, 0xf2, 0x8d, 0x08, 0x4f, + 0x32, 0xec, 0xcf, 0x03, 0x49, 0x1c, 0x71, 0xf7, + 0x54, 0xb4, 0x07, 0x55, 0x77, 0xa2, 0x85, 0x52 }, + .valid = true + }, + /* wycheproof - RFC 7748 */ + { + .private = { 0x48, 0x66, 0xe9, 0xd4, 0xd1, 0xb4, 0x67, 0x3c, + 0x5a, 0xd2, 0x26, 0x91, 0x95, 0x7d, 0x6a, 0xf5, + 0xc1, 0x1b, 0x64, 0x21, 0xe0, 0xea, 0x01, 0xd4, + 0x2c, 0xa4, 0x16, 0x9e, 0x79, 0x18, 0xba, 0x4d }, + .public = { 0xe5, 0x21, 0x0f, 0x12, 0x78, 0x68, 0x11, 0xd3, + 0xf4, 0xb7, 0x95, 0x9d, 0x05, 0x38, 0xae, 0x2c, + 0x31, 0xdb, 0xe7, 0x10, 0x6f, 0xc0, 0x3c, 0x3e, + 0xfc, 0x4c, 0xd5, 0x49, 0xc7, 0x15, 0xa4, 0x13 }, + .result = { 0x95, 0xcb, 0xde, 0x94, 0x76, 0xe8, 0x90, 0x7d, + 0x7a, 0xad, 0xe4, 0x5c, 0xb4, 0xb8, 0x73, 0xf8, + 0x8b, 0x59, 0x5a, 0x68, 0x79, 0x9f, 0xa1, 0x52, + 0xe6, 0xf8, 0xf7, 0x64, 0x7a, 0xac, 0x79, 0x57 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x0a, 0xb4, 0xe7, 0x63, 0x80, 0xd8, 0x4d, 0xde, + 0x4f, 0x68, 0x33, 0xc5, 0x8f, 0x2a, 0x9f, 0xb8, + 0xf8, 0x3b, 0xb0, 0x16, 0x9b, 0x17, 0x2b, 0xe4, + 0xb6, 0xe0, 0x59, 0x28, 0x87, 0x74, 0x1a, 0x36 }, + .result = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x89, 0xe1, 0x0d, 0x57, 0x01, 0xb4, 0x33, 0x7d, + 0x2d, 0x03, 0x21, 0x81, 0x53, 0x8b, 0x10, 0x64, + 0xbd, 0x40, 0x84, 0x40, 0x1c, 0xec, 0xa1, 0xfd, + 0x12, 0x66, 0x3a, 0x19, 0x59, 0x38, 0x80, 0x00 }, + .result = { 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x2b, 0x55, 0xd3, 0xaa, 0x4a, 0x8f, 0x80, 0xc8, + 0xc0, 0xb2, 0xae, 0x5f, 0x93, 0x3e, 0x85, 0xaf, + 0x49, 0xbe, 0xac, 0x36, 0xc2, 0xfa, 0x73, 0x94, + 0xba, 0xb7, 0x6c, 0x89, 0x33, 0xf8, 0xf8, 0x1d }, + .result = { 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x63, 0xe5, 0xb1, 0xfe, 0x96, 0x01, 0xfe, 0x84, + 0x38, 0x5d, 0x88, 0x66, 0xb0, 0x42, 0x12, 0x62, + 0xf7, 0x8f, 0xbf, 0xa5, 0xaf, 0xf9, 0x58, 0x5e, + 0x62, 0x66, 0x79, 0xb1, 0x85, 0x47, 0xd9, 0x59 }, + .result = { 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xe4, 0x28, 0xf3, 0xda, 0xc1, 0x78, 0x09, 0xf8, + 0x27, 0xa5, 0x22, 0xce, 0x32, 0x35, 0x50, 0x58, + 0xd0, 0x73, 0x69, 0x36, 0x4a, 0xa7, 0x89, 0x02, + 0xee, 0x10, 0x13, 0x9b, 0x9f, 0x9d, 0xd6, 0x53 }, + .result = { 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xb3, 0xb5, 0x0e, 0x3e, 0xd3, 0xa4, 0x07, 0xb9, + 0x5d, 0xe9, 0x42, 0xef, 0x74, 0x57, 0x5b, 0x5a, + 0xb8, 0xa1, 0x0c, 0x09, 0xee, 0x10, 0x35, 0x44, + 0xd6, 0x0b, 0xdf, 0xed, 0x81, 0x38, 0xab, 0x2b }, + .result = { 0xf9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x21, 0x3f, 0xff, 0xe9, 0x3d, 0x5e, 0xa8, 0xcd, + 0x24, 0x2e, 0x46, 0x28, 0x44, 0x02, 0x99, 0x22, + 0xc4, 0x3c, 0x77, 0xc9, 0xe3, 0xe4, 0x2f, 0x56, + 0x2f, 0x48, 0x5d, 0x24, 0xc5, 0x01, 0xa2, 0x0b }, + .result = { 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x91, 0xb2, 0x32, 0xa1, 0x78, 0xb3, 0xcd, 0x53, + 0x09, 0x32, 0x44, 0x1e, 0x61, 0x39, 0x41, 0x8f, + 0x72, 0x17, 0x22, 0x92, 0xf1, 0xda, 0x4c, 0x18, + 0x34, 0xfc, 0x5e, 0xbf, 0xef, 0xb5, 0x1e, 0x3f }, + .result = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x03 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x04, 0x5c, 0x6e, 0x11, 0xc5, 0xd3, 0x32, 0x55, + 0x6c, 0x78, 0x22, 0xfe, 0x94, 0xeb, 0xf8, 0x9b, + 0x56, 0xa3, 0x87, 0x8d, 0xc2, 0x7c, 0xa0, 0x79, + 0x10, 0x30, 0x58, 0x84, 0x9f, 0xab, 0xcb, 0x4f }, + .result = { 0xe5, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x1c, 0xa2, 0x19, 0x0b, 0x71, 0x16, 0x35, 0x39, + 0x06, 0x3c, 0x35, 0x77, 0x3b, 0xda, 0x0c, 0x9c, + 0x92, 0x8e, 0x91, 0x36, 0xf0, 0x62, 0x0a, 0xeb, + 0x09, 0x3f, 0x09, 0x91, 0x97, 0xb7, 0xf7, 0x4e }, + .result = { 0xe3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xf7, 0x6e, 0x90, 0x10, 0xac, 0x33, 0xc5, 0x04, + 0x3b, 0x2d, 0x3b, 0x76, 0xa8, 0x42, 0x17, 0x10, + 0x00, 0xc4, 0x91, 0x62, 0x22, 0xe9, 0xe8, 0x58, + 0x97, 0xa0, 0xae, 0xc7, 0xf6, 0x35, 0x0b, 0x3c }, + .result = { 0xdd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0xbb, 0x72, 0x68, 0x8d, 0x8f, 0x8a, 0xa7, 0xa3, + 0x9c, 0xd6, 0x06, 0x0c, 0xd5, 0xc8, 0x09, 0x3c, + 0xde, 0xc6, 0xfe, 0x34, 0x19, 0x37, 0xc3, 0x88, + 0x6a, 0x99, 0x34, 0x6c, 0xd0, 0x7f, 0xaa, 0x55 }, + .result = { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x88, 0xfd, 0xde, 0xa1, 0x93, 0x39, 0x1c, 0x6a, + 0x59, 0x33, 0xef, 0x9b, 0x71, 0x90, 0x15, 0x49, + 0x44, 0x72, 0x05, 0xaa, 0xe9, 0xda, 0x92, 0x8a, + 0x6b, 0x91, 0xa3, 0x52, 0xba, 0x10, 0xf4, 0x1f }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 }, + .valid = true + }, + /* wycheproof - edge case for shared secret */ + { + .private = { 0xa0, 0xa4, 0xf1, 0x30, 0xb9, 0x8a, 0x5b, 0xe4, + 0xb1, 0xce, 0xdb, 0x7c, 0xb8, 0x55, 0x84, 0xa3, + 0x52, 0x0e, 0x14, 0x2d, 0x47, 0x4d, 0xc9, 0xcc, + 0xb9, 0x09, 0xa0, 0x73, 0xa9, 0x76, 0xbf, 0x63 }, + .public = { 0x30, 0x3b, 0x39, 0x2f, 0x15, 0x31, 0x16, 0xca, + 0xd9, 0xcc, 0x68, 0x2a, 0x00, 0xcc, 0xc4, 0x4c, + 0x95, 0xff, 0x0d, 0x3b, 0xbe, 0x56, 0x8b, 0xeb, + 0x6c, 0x4e, 0x73, 0x9b, 0xaf, 0xdc, 0x2c, 0x68 }, + .result = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xfd, 0x30, 0x0a, 0xeb, 0x40, 0xe1, 0xfa, 0x58, + 0x25, 0x18, 0x41, 0x2b, 0x49, 0xb2, 0x08, 0xa7, + 0x84, 0x2b, 0x1e, 0x1f, 0x05, 0x6a, 0x04, 0x01, + 0x78, 0xea, 0x41, 0x41, 0x53, 0x4f, 0x65, 0x2d }, + .result = { 0xb7, 0x34, 0x10, 0x5d, 0xc2, 0x57, 0x58, 0x5d, + 0x73, 0xb5, 0x66, 0xcc, 0xb7, 0x6f, 0x06, 0x27, + 0x95, 0xcc, 0xbe, 0xc8, 0x91, 0x28, 0xe5, 0x2b, + 0x02, 0xf3, 0xe5, 0x96, 0x39, 0xf1, 0x3c, 0x46 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xc8, 0xef, 0x79, 0xb5, 0x14, 0xd7, 0x68, 0x26, + 0x77, 0xbc, 0x79, 0x31, 0xe0, 0x6e, 0xe5, 0xc2, + 0x7c, 0x9b, 0x39, 0x2b, 0x4a, 0xe9, 0x48, 0x44, + 0x73, 0xf5, 0x54, 0xe6, 0x67, 0x8e, 0xcc, 0x2e }, + .result = { 0x64, 0x7a, 0x46, 0xb6, 0xfc, 0x3f, 0x40, 0xd6, + 0x21, 0x41, 0xee, 0x3c, 0xee, 0x70, 0x6b, 0x4d, + 0x7a, 0x92, 0x71, 0x59, 0x3a, 0x7b, 0x14, 0x3e, + 0x8e, 0x2e, 0x22, 0x79, 0x88, 0x3e, 0x45, 0x50 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0x64, 0xae, 0xac, 0x25, 0x04, 0x14, 0x48, 0x61, + 0x53, 0x2b, 0x7b, 0xbc, 0xb6, 0xc8, 0x7d, 0x67, + 0xdd, 0x4c, 0x1f, 0x07, 0xeb, 0xc2, 0xe0, 0x6e, + 0xff, 0xb9, 0x5a, 0xec, 0xc6, 0x17, 0x0b, 0x2c }, + .result = { 0x4f, 0xf0, 0x3d, 0x5f, 0xb4, 0x3c, 0xd8, 0x65, + 0x7a, 0x3c, 0xf3, 0x7c, 0x13, 0x8c, 0xad, 0xce, + 0xcc, 0xe5, 0x09, 0xe4, 0xeb, 0xa0, 0x89, 0xd0, + 0xef, 0x40, 0xb4, 0xe4, 0xfb, 0x94, 0x61, 0x55 }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0xbf, 0x68, 0xe3, 0x5e, 0x9b, 0xdb, 0x7e, 0xee, + 0x1b, 0x50, 0x57, 0x02, 0x21, 0x86, 0x0f, 0x5d, + 0xcd, 0xad, 0x8a, 0xcb, 0xab, 0x03, 0x1b, 0x14, + 0x97, 0x4c, 0xc4, 0x90, 0x13, 0xc4, 0x98, 0x31 }, + .result = { 0x21, 0xce, 0xe5, 0x2e, 0xfd, 0xbc, 0x81, 0x2e, + 0x1d, 0x02, 0x1a, 0x4a, 0xf1, 0xe1, 0xd8, 0xbc, + 0x4d, 0xb3, 0xc4, 0x00, 0xe4, 0xd2, 0xa2, 0xc5, + 0x6a, 0x39, 0x26, 0xdb, 0x4d, 0x99, 0xc6, 0x5b }, + .valid = true + }, + /* wycheproof - checking for overflow */ + { + .private = { 0xc8, 0x17, 0x24, 0x70, 0x40, 0x00, 0xb2, 0x6d, + 0x31, 0x70, 0x3c, 0xc9, 0x7e, 0x3a, 0x37, 0x8d, + 0x56, 0xfa, 0xd8, 0x21, 0x93, 0x61, 0xc8, 0x8c, + 0xca, 0x8b, 0xd7, 0xc5, 0x71, 0x9b, 0x12, 0xb2 }, + .public = { 0x53, 0x47, 0xc4, 0x91, 0x33, 0x1a, 0x64, 0xb4, + 0x3d, 0xdc, 0x68, 0x30, 0x34, 0xe6, 0x77, 0xf5, + 0x3d, 0xc3, 0x2b, 0x52, 0xa5, 0x2a, 0x57, 0x7c, + 0x15, 0xa8, 0x3b, 0xf2, 0x98, 0xe9, 0x9f, 0x19 }, + .result = { 0x18, 0xcb, 0x89, 0xe4, 0xe2, 0x0c, 0x0c, 0x2b, + 0xd3, 0x24, 0x30, 0x52, 0x45, 0x26, 0x6c, 0x93, + 0x27, 0x69, 0x0b, 0xbe, 0x79, 0xac, 0xb8, 0x8f, + 0x5b, 0x8f, 0xb3, 0xf7, 0x4e, 0xca, 0x3e, 0x52 }, + .valid = true + }, + /* wycheproof - private key == -1 (mod order) */ + { + .private = { 0xa0, 0x23, 0xcd, 0xd0, 0x83, 0xef, 0x5b, 0xb8, + 0x2f, 0x10, 0xd6, 0x2e, 0x59, 0xe1, 0x5a, 0x68, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50 }, + .public = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, + .result = { 0x25, 0x8e, 0x04, 0x52, 0x3b, 0x8d, 0x25, 0x3e, + 0xe6, 0x57, 0x19, 0xfc, 0x69, 0x06, 0xc6, 0x57, + 0x19, 0x2d, 0x80, 0x71, 0x7e, 0xdc, 0x82, 0x8f, + 0xa0, 0xaf, 0x21, 0x68, 0x6e, 0x2f, 0xaa, 0x75 }, + .valid = true + }, + /* wycheproof - private key == 1 (mod order) on twist */ + { + .private = { 0x58, 0x08, 0x3d, 0xd2, 0x61, 0xad, 0x91, 0xef, + 0xf9, 0x52, 0x32, 0x2e, 0xc8, 0x24, 0xc6, 0x82, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5f }, + .public = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, + .result = { 0x2e, 0xae, 0x5e, 0xc3, 0xdd, 0x49, 0x4e, 0x9f, + 0x2d, 0x37, 0xd2, 0x58, 0xf8, 0x73, 0xa8, 0xe6, + 0xe9, 0xd0, 0xdb, 0xd1, 0xe3, 0x83, 0xef, 0x64, + 0xd9, 0x8b, 0xb9, 0x1b, 0x3e, 0x0b, 0xe0, 0x35 }, + .valid = true + } +}; + +static void test_curve25519(struct kunit *test) +{ + for (size_t i = 0; i < ARRAY_SIZE(curve25519_test_vectors); ++i) { + const struct curve25519_test_vector *vec = + &curve25519_test_vectors[i]; + u8 out[CURVE25519_KEY_SIZE] = {}; + bool ret; + + ret = curve25519(out, vec->private, vec->public); + KUNIT_EXPECT_EQ_MSG(test, ret, vec->valid, + "Wrong return value with test vector %zu", + i); + KUNIT_EXPECT_MEMEQ_MSG(test, out, vec->result, sizeof(out), + "Wrong output with test vector %zu", i); + } +} + +static void test_curve25519_basepoint(struct kunit *test) +{ + for (size_t i = 0; i < 5; ++i) { + u8 in[CURVE25519_KEY_SIZE]; + u8 out[CURVE25519_KEY_SIZE]; + u8 out2[CURVE25519_KEY_SIZE]; + bool ret, ret2; + + get_random_bytes(in, sizeof(in)); + ret = curve25519_generate_public(out, in); + ret2 = curve25519(out2, in, (u8[CURVE25519_KEY_SIZE]){ 9 }); + KUNIT_EXPECT_EQ_MSG(test, ret, ret2, + "in=%*phN", CURVE25519_KEY_SIZE, in); + KUNIT_EXPECT_MEMEQ_MSG(test, out, out2, CURVE25519_KEY_SIZE, + "in=%*phN", CURVE25519_KEY_SIZE, in); + } +} + +static struct kunit_case curve25519_test_cases[] = { + KUNIT_CASE(test_curve25519), + KUNIT_CASE(test_curve25519_basepoint), + {}, +}; + +static struct kunit_suite curve25519_test_suite = { + .name = "curve25519", + .test_cases = curve25519_test_cases, +}; +kunit_test_suite(curve25519_test_suite); + +MODULE_DESCRIPTION("KUnit tests for Curve25519"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 643d79e531cc99da0dc5502dfccb6b3b305c65f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:19 -0700 Subject: lib/crypto: tests: Add Curve25519 benchmark Add a benchmark to curve25519_kunit. This brings it in line with the other crypto KUnit tests and provides an easy way to measure performance. Link: https://lore.kernel.org/r/20250906213523.84915-9-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/curve25519_kunit.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/tests/curve25519_kunit.c b/lib/crypto/tests/curve25519_kunit.c index 68eab75d40dc..248d05f66b35 100644 --- a/lib/crypto/tests/curve25519_kunit.c +++ b/lib/crypto/tests/curve25519_kunit.c @@ -5,6 +5,7 @@ #include #include +#include struct curve25519_test_vector { u8 private[CURVE25519_KEY_SIZE]; @@ -1316,9 +1317,39 @@ static void test_curve25519_basepoint(struct kunit *test) } } +static void benchmark_curve25519(struct kunit *test) +{ + const u8 *private = curve25519_test_vectors[0].private; + const u8 *public = curve25519_test_vectors[0].public; + const size_t warmup_niter = 5000; + const size_t benchmark_niter = 1024; + u8 out[CURVE25519_KEY_SIZE]; + bool ok = true; + u64 t; + + if (!IS_ENABLED(CONFIG_CRYPTO_LIB_BENCHMARK)) + kunit_skip(test, "not enabled"); + + /* Warm-up */ + for (size_t i = 0; i < warmup_niter; i++) + ok &= curve25519(out, private, public); + + /* Benchmark */ + preempt_disable(); + t = ktime_get_ns(); + for (size_t i = 0; i < benchmark_niter; i++) + ok &= curve25519(out, private, public); + t = ktime_get_ns() - t; + preempt_enable(); + KUNIT_EXPECT_TRUE(test, ok); + kunit_info(test, "%llu ops/s", + div64_u64((u64)benchmark_niter * NSEC_PER_SEC, t ?: 1)); +} + static struct kunit_case curve25519_test_cases[] = { KUNIT_CASE(test_curve25519), KUNIT_CASE(test_curve25519_basepoint), + KUNIT_CASE(benchmark_curve25519), {}, }; @@ -1328,5 +1359,5 @@ static struct kunit_suite curve25519_test_suite = { }; kunit_test_suite(curve25519_test_suite); -MODULE_DESCRIPTION("KUnit tests for Curve25519"); +MODULE_DESCRIPTION("KUnit tests and benchmark for Curve25519"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 8c06b330e8f79834924305362227e38e4e2469ae Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:20 -0700 Subject: lib/crypto: curve25519: Move a couple functions out-of-line Move curve25519() and curve25519_generate_public() from curve25519.h to curve25519.c. There's no good reason for them to be inline. Link: https://lore.kernel.org/r/20250906213523.84915-10-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/curve25519.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 25f16777865b..1b786389d714 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -10,8 +10,40 @@ */ #include -#include +#include +#include #include +#include + +bool __must_check +curve25519(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) + curve25519_arch(mypublic, secret, basepoint); + else + curve25519_generic(mypublic, secret, basepoint); + return crypto_memneq(mypublic, curve25519_null_point, + CURVE25519_KEY_SIZE); +} +EXPORT_SYMBOL(curve25519); + +bool __must_check +curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (unlikely(!crypto_memneq(secret, curve25519_null_point, + CURVE25519_KEY_SIZE))) + return false; + + if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) + curve25519_base_arch(pub, secret); + else + curve25519_generic(pub, secret, curve25519_base_point); + return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); +} +EXPORT_SYMBOL(curve25519_generate_public); static int __init curve25519_init(void) { -- cgit v1.2.3 From 68546e5632c0b982663af575ae12cc5d81facc91 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:21 -0700 Subject: lib/crypto: curve25519: Consolidate into single module Reorganize the Curve25519 library code: - Build a single libcurve25519 module, instead of up to three modules: libcurve25519, libcurve25519-generic, and an arch-specific module. - Move the arch-specific Curve25519 code from arch/$(SRCARCH)/crypto/ to lib/crypto/$(SRCARCH)/. Centralize the build rules into lib/crypto/Makefile and lib/crypto/Kconfig. - Include the arch-specific code directly in lib/crypto/curve25519.c via a header, rather than using a separate .c file. - Eliminate the entanglement with CRYPTO. CRYPTO_LIB_CURVE25519 no longer selects CRYPTO, and the arch-specific Curve25519 code no longer depends on CRYPTO. This brings Curve25519 in line with the latest conventions for lib/crypto/, used by other algorithms. The exception is that I kept the generic code in separate translation units for now. (Some of the function names collide between the x86 and generic Curve25519 code. And the Curve25519 functions are very long anyway, so inlining doesn't matter as much for Curve25519 as it does for some other algorithms.) Link: https://lore.kernel.org/r/20250906213523.84915-11-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/Kconfig | 37 +- lib/crypto/Makefile | 26 +- lib/crypto/arm/curve25519-core.S | 2062 +++++++++++++++++++++++++++ lib/crypto/arm/curve25519.h | 47 + lib/crypto/curve25519-generic.c | 25 - lib/crypto/curve25519.c | 50 +- lib/crypto/powerpc/curve25519-ppc64le_asm.S | 671 +++++++++ lib/crypto/powerpc/curve25519.h | 186 +++ lib/crypto/x86/curve25519.h | 1613 +++++++++++++++++++++ 9 files changed, 4645 insertions(+), 72 deletions(-) create mode 100644 lib/crypto/arm/curve25519-core.S create mode 100644 lib/crypto/arm/curve25519.h delete mode 100644 lib/crypto/curve25519-generic.c create mode 100644 lib/crypto/powerpc/curve25519-ppc64le_asm.S create mode 100644 lib/crypto/powerpc/curve25519.h create mode 100644 lib/crypto/x86/curve25519.h (limited to 'lib') diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 37d85e0c9b97..eea17e36a22b 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -54,35 +54,24 @@ config CRYPTO_LIB_CHACHA_ARCH default y if S390 default y if X86_64 -config CRYPTO_ARCH_HAVE_LIB_CURVE25519 - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Curve25519 library interface, - either builtin or as a module. - -config CRYPTO_LIB_CURVE25519_GENERIC +config CRYPTO_LIB_CURVE25519 tristate select CRYPTO_LIB_UTILS help - This symbol can be depended upon by arch implementations of the - Curve25519 library interface that require the generic code as a - fallback, e.g., for SIMD implementations. If no arch specific - implementation is enabled, this implementation serves the users - of CRYPTO_LIB_CURVE25519. + The Curve25519 library functions. Select this if your module uses any + of the functions from . -config CRYPTO_LIB_CURVE25519_INTERNAL - tristate - select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n +config CRYPTO_LIB_CURVE25519_ARCH + bool + depends on CRYPTO_LIB_CURVE25519 && !UML && !KMSAN + default y if ARM && KERNEL_MODE_NEON + default y if PPC64 && CPU_LITTLE_ENDIAN + default y if X86_64 -config CRYPTO_LIB_CURVE25519 - tristate - select CRYPTO - select CRYPTO_LIB_CURVE25519_INTERNAL - help - Enable the Curve25519 library interface. This interface may be - fulfilled by either the generic implementation or an arch-specific - one, if one is available and enabled. +config CRYPTO_LIB_CURVE25519_GENERIC + bool + depends on CRYPTO_LIB_CURVE25519 + default y if !CRYPTO_LIB_CURVE25519_ARCH || ARM || X86_64 config CRYPTO_LIB_DES tristate diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 6c3be971ace0..bded351aeace 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -76,17 +76,31 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA20POLY1305) += libchacha20poly1305.o libchacha20poly1305-y += chacha20poly1305.o libchacha20poly1305-$(CONFIG_CRYPTO_SELFTESTS) += chacha20poly1305-selftest.o -obj-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += libcurve25519-generic.o -libcurve25519-generic-y := curve25519-fiat32.o -libcurve25519-generic-$(CONFIG_ARCH_SUPPORTS_INT128) := curve25519-hacl64.o -libcurve25519-generic-y += curve25519-generic.o +################################################################################ + +obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o +libcurve25519-y := curve25519.o + +# Disable GCOV in odd or sensitive code +GCOV_PROFILE_curve25519.o := n + +ifeq ($(CONFIG_ARCH_SUPPORTS_INT128),y) +libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-hacl64.o +else +libcurve25519-$(CONFIG_CRYPTO_LIB_CURVE25519_GENERIC) += curve25519-fiat32.o +endif # clang versions prior to 18 may blow out the stack with KASAN ifeq ($(call clang-min-version, 180000),) KASAN_SANITIZE_curve25519-hacl64.o := n endif -obj-$(CONFIG_CRYPTO_LIB_CURVE25519) += libcurve25519.o -libcurve25519-y += curve25519.o +ifeq ($(CONFIG_CRYPTO_LIB_CURVE25519_ARCH),y) +CFLAGS_curve25519.o += -I$(src)/$(SRCARCH) +libcurve25519-$(CONFIG_ARM) += arm/curve25519-core.o +libcurve25519-$(CONFIG_PPC) += powerpc/curve25519-ppc64le_asm.o +endif + +################################################################################ obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o libdes-y := des.o diff --git a/lib/crypto/arm/curve25519-core.S b/lib/crypto/arm/curve25519-core.S new file mode 100644 index 000000000000..b697fa5d059a --- /dev/null +++ b/lib/crypto/arm/curve25519-core.S @@ -0,0 +1,2062 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include + +.text +.arch armv7-a +.fpu neon +.align 4 + +ENTRY(curve25519_neon) + push {r4-r11, lr} + mov ip, sp + sub r3, sp, #704 + and r3, r3, #0xfffffff0 + mov sp, r3 + movw r4, #0 + movw r5, #254 + vmov.i32 q0, #1 + vshr.u64 q1, q0, #7 + vshr.u64 q0, q0, #8 + vmov.i32 d4, #19 + vmov.i32 d5, #38 + add r6, sp, #480 + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128] + add r6, r3, #0 + vmov.i32 q2, #0 + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 d4, [r6, : 64] + add r6, r3, #0 + movw r7, #960 + sub r7, r7, #2 + neg r7, r7 + sub r7, r7, r7, LSL #7 + str r7, [r6] + add r6, sp, #672 + vld1.8 {d4-d5}, [r1]! + vld1.8 {d6-d7}, [r1] + vst1.8 {d4-d5}, [r6, : 128]! + vst1.8 {d6-d7}, [r6, : 128] + sub r1, r6, #16 + ldrb r6, [r1] + and r6, r6, #248 + strb r6, [r1] + ldrb r6, [r1, #31] + and r6, r6, #127 + orr r6, r6, #64 + strb r6, [r1, #31] + vmov.i64 q2, #0xffffffff + vshr.u64 q3, q2, #7 + vshr.u64 q2, q2, #6 + vld1.8 {d8}, [r2] + vld1.8 {d10}, [r2] + add r2, r2, #6 + vld1.8 {d12}, [r2] + vld1.8 {d14}, [r2] + add r2, r2, #6 + vld1.8 {d16}, [r2] + add r2, r2, #4 + vld1.8 {d18}, [r2] + vld1.8 {d20}, [r2] + add r2, r2, #6 + vld1.8 {d22}, [r2] + add r2, r2, #2 + vld1.8 {d24}, [r2] + vld1.8 {d26}, [r2] + vshr.u64 q5, q5, #26 + vshr.u64 q6, q6, #3 + vshr.u64 q7, q7, #29 + vshr.u64 q8, q8, #6 + vshr.u64 q10, q10, #25 + vshr.u64 q11, q11, #3 + vshr.u64 q12, q12, #12 + vshr.u64 q13, q13, #38 + vand q4, q4, q2 + vand q6, q6, q2 + vand q8, q8, q2 + vand q10, q10, q2 + vand q2, q12, q2 + vand q5, q5, q3 + vand q7, q7, q3 + vand q9, q9, q3 + vand q11, q11, q3 + vand q3, q13, q3 + add r2, r3, #48 + vadd.i64 q12, q4, q1 + vadd.i64 q13, q10, q1 + vshr.s64 q12, q12, #26 + vshr.s64 q13, q13, #26 + vadd.i64 q5, q5, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q14, q5, q0 + vadd.i64 q11, q11, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q11, q0 + vsub.i64 q4, q4, q12 + vshr.s64 q12, q14, #25 + vsub.i64 q10, q10, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q14, q6, q1 + vadd.i64 q2, q2, q13 + vsub.i64 q5, q5, q12 + vshr.s64 q12, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q1 + vadd.i64 q7, q7, q12 + vshl.i64 q12, q12, #26 + vadd.i64 q15, q7, q0 + vsub.i64 q11, q11, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q12 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q3, q0 + vadd.i64 q8, q8, q12 + vshl.i64 q12, q12, #25 + vadd.i64 q15, q8, q1 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q7, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q9, q9, q12 + vtrn.32 d12, d14 + vshl.i64 q12, q12, #26 + vtrn.32 d13, d15 + vadd.i64 q0, q9, q0 + vadd.i64 q4, q4, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q6, q13, #4 + vsub.i64 q7, q8, q12 + vshr.s64 q0, q0, #25 + vadd.i64 q4, q4, q6 + vadd.i64 q6, q10, q0 + vshl.i64 q0, q0, #25 + vadd.i64 q8, q6, q1 + vadd.i64 q4, q4, q13 + vshl.i64 q10, q13, #25 + vadd.i64 q1, q4, q1 + vsub.i64 q0, q9, q0 + vshr.s64 q8, q8, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d14, d0 + vshr.s64 q1, q1, #26 + vtrn.32 d15, d1 + vadd.i64 q0, q11, q8 + vst1.8 d14, [r2, : 64] + vshl.i64 q7, q8, #26 + vadd.i64 q5, q5, q1 + vtrn.32 d4, d6 + vshl.i64 q1, q1, #26 + vtrn.32 d5, d7 + vsub.i64 q3, q6, q7 + add r2, r2, #16 + vsub.i64 q1, q4, q1 + vst1.8 d4, [r2, : 64] + vtrn.32 d6, d0 + vtrn.32 d7, d1 + sub r2, r2, #8 + vtrn.32 d2, d10 + vtrn.32 d3, d11 + vst1.8 d6, [r2, : 64] + sub r2, r2, #24 + vst1.8 d2, [r2, : 64] + add r2, r3, #96 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #144 + vmov.i32 q0, #0 + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #240 + vmov.i32 q0, #0 + vmov.i64 d2, #0xff + vmov.i64 d3, #0 + vshr.u32 q1, q1, #7 + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 d0, [r2, : 64] + add r2, r3, #48 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d4, [r6, : 64] +.Lmainloop: + mov r2, r5, LSR #3 + and r6, r5, #7 + ldrb r2, [r1, r2] + mov r2, r2, LSR r6 + and r2, r2, #1 + str r5, [sp, #456] + eor r4, r4, r2 + str r2, [sp, #460] + neg r2, r4 + add r4, r3, #96 + add r5, r3, #192 + add r6, r3, #144 + vld1.8 {d8-d9}, [r4, : 128]! + add r7, r3, #240 + vld1.8 {d10-d11}, [r5, : 128]! + veor q6, q4, q5 + vld1.8 {d14-d15}, [r6, : 128]! + vdup.i32 q8, r2 + vld1.8 {d18-d19}, [r7, : 128]! + veor q10, q7, q9 + vld1.8 {d22-d23}, [r4, : 128]! + vand q6, q6, q8 + vld1.8 {d24-d25}, [r5, : 128]! + vand q10, q10, q8 + vld1.8 {d26-d27}, [r6, : 128]! + veor q4, q4, q6 + vld1.8 {d28-d29}, [r7, : 128]! + veor q5, q5, q6 + vld1.8 {d0}, [r4, : 64] + veor q6, q7, q10 + vld1.8 {d2}, [r5, : 64] + veor q7, q9, q10 + vld1.8 {d4}, [r6, : 64] + veor q9, q11, q12 + vld1.8 {d6}, [r7, : 64] + veor q10, q0, q1 + sub r2, r4, #32 + vand q9, q9, q8 + sub r4, r5, #32 + vand q10, q10, q8 + sub r5, r6, #32 + veor q11, q11, q9 + sub r6, r7, #32 + veor q0, q0, q10 + veor q9, q12, q9 + veor q1, q1, q10 + veor q10, q13, q14 + veor q12, q2, q3 + vand q10, q10, q8 + vand q8, q12, q8 + veor q12, q13, q10 + veor q2, q2, q8 + veor q10, q14, q10 + veor q3, q3, q8 + vadd.i32 q8, q4, q6 + vsub.i32 q4, q4, q6 + vst1.8 {d16-d17}, [r2, : 128]! + vadd.i32 q6, q11, q12 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q4, q11, q12 + vst1.8 {d12-d13}, [r2, : 128]! + vadd.i32 q6, q0, q2 + vst1.8 {d8-d9}, [r5, : 128]! + vsub.i32 q0, q0, q2 + vst1.8 d12, [r2, : 64] + vadd.i32 q2, q5, q7 + vst1.8 d0, [r5, : 64] + vsub.i32 q0, q5, q7 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q9, q10 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q9, q10 + vst1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q2, q1, q3 + vst1.8 {d0-d1}, [r6, : 128]! + vsub.i32 q0, q1, q3 + vst1.8 d4, [r4, : 64] + vst1.8 d0, [r6, : 64] + add r2, sp, #512 + add r4, r3, #96 + add r5, r3, #144 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #288 + vshl.i64 q12, q12, #25 + add r4, r3, #336 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #240 + add r4, r3, #96 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #144 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #192 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128] + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #144 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, r3, #288 + add r4, r3, #336 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vsub.i32 q1, q1, q2 + add r5, r3, #240 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vsub.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #144 + add r4, r3, #96 + add r5, r3, #144 + add r6, r3, #192 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vsub.i32 q2, q0, q1 + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d6-d7}, [r4, : 128]! + vsub.i32 q4, q1, q3 + vadd.i32 q1, q1, q3 + vld1.8 {d6}, [r2, : 64] + vld1.8 {d10}, [r4, : 64] + vsub.i32 q6, q3, q5 + vadd.i32 q3, q3, q5 + vst1.8 {d4-d5}, [r5, : 128]! + vst1.8 {d0-d1}, [r6, : 128]! + vst1.8 {d8-d9}, [r5, : 128]! + vst1.8 {d2-d3}, [r6, : 128]! + vst1.8 d12, [r5, : 64] + vst1.8 d6, [r6, : 64] + add r2, r3, #0 + add r4, r3, #240 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #336 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #288 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #288 + vshl.i64 q7, q7, #25 + add r4, r3, #96 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + add r2, sp, #512 + add r4, r3, #144 + add r5, r3, #192 + vld1.8 {d0-d1}, [r2, : 128] + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4-d5}, [r5, : 128]! + vzip.i32 q1, q2 + vld1.8 {d6-d7}, [r4, : 128]! + vld1.8 {d8-d9}, [r5, : 128]! + vshl.i32 q5, q1, #1 + vzip.i32 q3, q4 + vshl.i32 q6, q2, #1 + vld1.8 {d14}, [r4, : 64] + vshl.i32 q8, q3, #1 + vld1.8 {d15}, [r5, : 64] + vshl.i32 q9, q4, #1 + vmul.i32 d21, d7, d1 + vtrn.32 d14, d15 + vmul.i32 q11, q4, q0 + vmul.i32 q0, q7, q0 + vmull.s32 q12, d2, d2 + vmlal.s32 q12, d11, d1 + vmlal.s32 q12, d12, d0 + vmlal.s32 q12, d13, d23 + vmlal.s32 q12, d16, d22 + vmlal.s32 q12, d7, d21 + vmull.s32 q10, d2, d11 + vmlal.s32 q10, d4, d1 + vmlal.s32 q10, d13, d0 + vmlal.s32 q10, d6, d23 + vmlal.s32 q10, d17, d22 + vmull.s32 q13, d10, d4 + vmlal.s32 q13, d11, d3 + vmlal.s32 q13, d13, d1 + vmlal.s32 q13, d16, d0 + vmlal.s32 q13, d17, d23 + vmlal.s32 q13, d8, d22 + vmull.s32 q1, d10, d5 + vmlal.s32 q1, d11, d4 + vmlal.s32 q1, d6, d1 + vmlal.s32 q1, d17, d0 + vmlal.s32 q1, d8, d23 + vmull.s32 q14, d10, d6 + vmlal.s32 q14, d11, d13 + vmlal.s32 q14, d4, d4 + vmlal.s32 q14, d17, d1 + vmlal.s32 q14, d18, d0 + vmlal.s32 q14, d9, d23 + vmull.s32 q11, d10, d7 + vmlal.s32 q11, d11, d6 + vmlal.s32 q11, d12, d5 + vmlal.s32 q11, d8, d1 + vmlal.s32 q11, d19, d0 + vmull.s32 q15, d10, d8 + vmlal.s32 q15, d11, d17 + vmlal.s32 q15, d12, d6 + vmlal.s32 q15, d13, d5 + vmlal.s32 q15, d19, d1 + vmlal.s32 q15, d14, d0 + vmull.s32 q2, d10, d9 + vmlal.s32 q2, d11, d8 + vmlal.s32 q2, d12, d7 + vmlal.s32 q2, d13, d6 + vmlal.s32 q2, d14, d1 + vmull.s32 q0, d15, d1 + vmlal.s32 q0, d10, d14 + vmlal.s32 q0, d11, d19 + vmlal.s32 q0, d12, d8 + vmlal.s32 q0, d13, d17 + vmlal.s32 q0, d6, d6 + add r2, sp, #480 + vld1.8 {d18-d19}, [r2, : 128]! + vmull.s32 q3, d16, d7 + vmlal.s32 q3, d10, d15 + vmlal.s32 q3, d11, d14 + vmlal.s32 q3, d12, d9 + vmlal.s32 q3, d13, d8 + vld1.8 {d8-d9}, [r2, : 128] + vadd.i64 q5, q12, q9 + vadd.i64 q6, q15, q9 + vshr.s64 q5, q5, #26 + vshr.s64 q6, q6, #26 + vadd.i64 q7, q10, q5 + vshl.i64 q5, q5, #26 + vadd.i64 q8, q7, q4 + vadd.i64 q2, q2, q6 + vshl.i64 q6, q6, #26 + vadd.i64 q10, q2, q4 + vsub.i64 q5, q12, q5 + vshr.s64 q8, q8, #25 + vsub.i64 q6, q15, q6 + vshr.s64 q10, q10, #25 + vadd.i64 q12, q13, q8 + vshl.i64 q8, q8, #25 + vadd.i64 q13, q12, q9 + vadd.i64 q0, q0, q10 + vsub.i64 q7, q7, q8 + vshr.s64 q8, q13, #26 + vshl.i64 q10, q10, #25 + vadd.i64 q13, q0, q9 + vadd.i64 q1, q1, q8 + vshl.i64 q8, q8, #26 + vadd.i64 q15, q1, q4 + vsub.i64 q2, q2, q10 + vshr.s64 q10, q13, #26 + vsub.i64 q8, q12, q8 + vshr.s64 q12, q15, #25 + vadd.i64 q3, q3, q10 + vshl.i64 q10, q10, #26 + vadd.i64 q13, q3, q4 + vadd.i64 q14, q14, q12 + add r2, r3, #144 + vshl.i64 q12, q12, #25 + add r4, r3, #192 + vadd.i64 q15, q14, q9 + add r2, r2, #8 + vsub.i64 q0, q0, q10 + add r4, r4, #8 + vshr.s64 q10, q13, #25 + vsub.i64 q1, q1, q12 + vshr.s64 q12, q15, #26 + vadd.i64 q13, q10, q10 + vadd.i64 q11, q11, q12 + vtrn.32 d16, d2 + vshl.i64 q12, q12, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q11, q4 + vadd.i64 q4, q5, q13 + vst1.8 d16, [r2, : 64]! + vshl.i64 q5, q10, #4 + vst1.8 d17, [r4, : 64]! + vsub.i64 q8, q14, q12 + vshr.s64 q1, q1, #25 + vadd.i64 q4, q4, q5 + vadd.i64 q5, q6, q1 + vshl.i64 q1, q1, #25 + vadd.i64 q6, q5, q9 + vadd.i64 q4, q4, q10 + vshl.i64 q10, q10, #25 + vadd.i64 q9, q4, q9 + vsub.i64 q1, q11, q1 + vshr.s64 q6, q6, #26 + vsub.i64 q3, q3, q10 + vtrn.32 d16, d2 + vshr.s64 q9, q9, #26 + vtrn.32 d17, d3 + vadd.i64 q1, q2, q6 + vst1.8 d16, [r2, : 64] + vshl.i64 q2, q6, #26 + vst1.8 d17, [r4, : 64] + vadd.i64 q6, q7, q9 + vtrn.32 d0, d6 + vshl.i64 q7, q9, #26 + vtrn.32 d1, d7 + vsub.i64 q2, q5, q2 + add r2, r2, #16 + vsub.i64 q3, q4, q7 + vst1.8 d0, [r2, : 64] + add r4, r4, #16 + vst1.8 d1, [r4, : 64] + vtrn.32 d4, d2 + vtrn.32 d5, d3 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d6, d12 + vtrn.32 d7, d13 + vst1.8 d4, [r2, : 64] + vst1.8 d5, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d6, [r2, : 64] + vst1.8 d7, [r4, : 64] + add r2, r3, #336 + add r4, r3, #288 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vadd.i32 q0, q0, q1 + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4-d5}, [r4, : 128]! + vadd.i32 q1, q1, q2 + add r5, r3, #288 + vld1.8 {d4}, [r2, : 64] + vld1.8 {d6}, [r4, : 64] + vadd.i32 q2, q2, q3 + vst1.8 {d0-d1}, [r5, : 128]! + vst1.8 {d2-d3}, [r5, : 128]! + vst1.8 d4, [r5, : 64] + add r2, r3, #48 + add r4, r3, #144 + vld1.8 {d0-d1}, [r4, : 128]! + vld1.8 {d2-d3}, [r4, : 128]! + vld1.8 {d4}, [r4, : 64] + add r4, r3, #288 + vld1.8 {d6-d7}, [r4, : 128]! + vtrn.32 q0, q3 + vld1.8 {d8-d9}, [r4, : 128]! + vshl.i32 q5, q0, #4 + vtrn.32 q1, q4 + vshl.i32 q6, q3, #4 + vadd.i32 q5, q5, q0 + vadd.i32 q6, q6, q3 + vshl.i32 q7, q1, #4 + vld1.8 {d5}, [r4, : 64] + vshl.i32 q8, q4, #4 + vtrn.32 d4, d5 + vadd.i32 q7, q7, q1 + vadd.i32 q8, q8, q4 + vld1.8 {d18-d19}, [r2, : 128]! + vshl.i32 q10, q2, #4 + vld1.8 {d22-d23}, [r2, : 128]! + vadd.i32 q10, q10, q2 + vld1.8 {d24}, [r2, : 64] + vadd.i32 q5, q5, q0 + add r2, r3, #240 + vld1.8 {d26-d27}, [r2, : 128]! + vadd.i32 q6, q6, q3 + vld1.8 {d28-d29}, [r2, : 128]! + vadd.i32 q8, q8, q4 + vld1.8 {d25}, [r2, : 64] + vadd.i32 q10, q10, q2 + vtrn.32 q9, q13 + vadd.i32 q7, q7, q1 + vadd.i32 q5, q5, q0 + vtrn.32 q11, q14 + vadd.i32 q6, q6, q3 + add r2, sp, #528 + vadd.i32 q10, q10, q2 + vtrn.32 d24, d25 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q6, q13, #1 + vst1.8 {d20-d21}, [r2, : 128]! + vshl.i32 q10, q14, #1 + vst1.8 {d12-d13}, [r2, : 128]! + vshl.i32 q15, q12, #1 + vadd.i32 q8, q8, q4 + vext.32 d10, d31, d30, #0 + vadd.i32 q7, q7, q1 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q8, d18, d5 + vmlal.s32 q8, d26, d4 + vmlal.s32 q8, d19, d9 + vmlal.s32 q8, d27, d3 + vmlal.s32 q8, d22, d8 + vmlal.s32 q8, d28, d2 + vmlal.s32 q8, d23, d7 + vmlal.s32 q8, d29, d1 + vmlal.s32 q8, d24, d6 + vmlal.s32 q8, d25, d0 + vst1.8 {d14-d15}, [r2, : 128]! + vmull.s32 q2, d18, d4 + vmlal.s32 q2, d12, d9 + vmlal.s32 q2, d13, d8 + vmlal.s32 q2, d19, d3 + vmlal.s32 q2, d22, d2 + vmlal.s32 q2, d23, d1 + vmlal.s32 q2, d24, d0 + vst1.8 {d20-d21}, [r2, : 128]! + vmull.s32 q7, d18, d9 + vmlal.s32 q7, d26, d3 + vmlal.s32 q7, d19, d8 + vmlal.s32 q7, d27, d2 + vmlal.s32 q7, d22, d7 + vmlal.s32 q7, d28, d1 + vmlal.s32 q7, d23, d6 + vmlal.s32 q7, d29, d0 + vst1.8 {d10-d11}, [r2, : 128]! + vmull.s32 q5, d18, d3 + vmlal.s32 q5, d19, d2 + vmlal.s32 q5, d22, d1 + vmlal.s32 q5, d23, d0 + vmlal.s32 q5, d12, d8 + vst1.8 {d16-d17}, [r2, : 128]! + vmull.s32 q4, d18, d8 + vmlal.s32 q4, d26, d2 + vmlal.s32 q4, d19, d7 + vmlal.s32 q4, d27, d1 + vmlal.s32 q4, d22, d6 + vmlal.s32 q4, d28, d0 + vmull.s32 q8, d18, d7 + vmlal.s32 q8, d26, d1 + vmlal.s32 q8, d19, d6 + vmlal.s32 q8, d27, d0 + add r2, sp, #544 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q7, d24, d21 + vmlal.s32 q7, d25, d20 + vmlal.s32 q4, d23, d21 + vmlal.s32 q4, d29, d20 + vmlal.s32 q8, d22, d21 + vmlal.s32 q8, d28, d20 + vmlal.s32 q5, d24, d20 + vst1.8 {d14-d15}, [r2, : 128] + vmull.s32 q7, d18, d6 + vmlal.s32 q7, d26, d0 + add r2, sp, #624 + vld1.8 {d30-d31}, [r2, : 128] + vmlal.s32 q2, d30, d21 + vmlal.s32 q7, d19, d21 + vmlal.s32 q7, d27, d20 + add r2, sp, #592 + vld1.8 {d26-d27}, [r2, : 128] + vmlal.s32 q4, d25, d27 + vmlal.s32 q8, d29, d27 + vmlal.s32 q8, d25, d26 + vmlal.s32 q7, d28, d27 + vmlal.s32 q7, d29, d26 + add r2, sp, #576 + vld1.8 {d28-d29}, [r2, : 128] + vmlal.s32 q4, d24, d29 + vmlal.s32 q8, d23, d29 + vmlal.s32 q8, d24, d28 + vmlal.s32 q7, d22, d29 + vmlal.s32 q7, d23, d28 + vst1.8 {d8-d9}, [r2, : 128] + add r2, sp, #528 + vld1.8 {d8-d9}, [r2, : 128] + vmlal.s32 q7, d24, d9 + vmlal.s32 q7, d25, d31 + vmull.s32 q1, d18, d2 + vmlal.s32 q1, d19, d1 + vmlal.s32 q1, d22, d0 + vmlal.s32 q1, d24, d27 + vmlal.s32 q1, d23, d20 + vmlal.s32 q1, d12, d7 + vmlal.s32 q1, d13, d6 + vmull.s32 q6, d18, d1 + vmlal.s32 q6, d19, d0 + vmlal.s32 q6, d23, d27 + vmlal.s32 q6, d22, d20 + vmlal.s32 q6, d24, d26 + vmull.s32 q0, d18, d0 + vmlal.s32 q0, d22, d27 + vmlal.s32 q0, d23, d26 + vmlal.s32 q0, d24, d31 + vmlal.s32 q0, d19, d20 + add r2, sp, #608 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q2, d18, d7 + vmlal.s32 q5, d18, d6 + vmlal.s32 q1, d18, d21 + vmlal.s32 q0, d18, d28 + vmlal.s32 q6, d18, d29 + vmlal.s32 q2, d19, d6 + vmlal.s32 q5, d19, d21 + vmlal.s32 q1, d19, d29 + vmlal.s32 q0, d19, d9 + vmlal.s32 q6, d19, d28 + add r2, sp, #560 + vld1.8 {d18-d19}, [r2, : 128] + add r2, sp, #480 + vld1.8 {d22-d23}, [r2, : 128] + vmlal.s32 q5, d19, d7 + vmlal.s32 q0, d18, d21 + vmlal.s32 q0, d19, d29 + vmlal.s32 q6, d18, d6 + add r2, sp, #496 + vld1.8 {d6-d7}, [r2, : 128] + vmlal.s32 q6, d19, d21 + add r2, sp, #544 + vld1.8 {d18-d19}, [r2, : 128] + vmlal.s32 q0, d30, d8 + add r2, sp, #640 + vld1.8 {d20-d21}, [r2, : 128] + vmlal.s32 q5, d30, d29 + add r2, sp, #576 + vld1.8 {d24-d25}, [r2, : 128] + vmlal.s32 q1, d30, d28 + vadd.i64 q13, q0, q11 + vadd.i64 q14, q5, q11 + vmlal.s32 q6, d30, d9 + vshr.s64 q4, q13, #26 + vshr.s64 q13, q14, #26 + vadd.i64 q7, q7, q4 + vshl.i64 q4, q4, #26 + vadd.i64 q14, q7, q3 + vadd.i64 q9, q9, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q15, q9, q3 + vsub.i64 q0, q0, q4 + vshr.s64 q4, q14, #25 + vsub.i64 q5, q5, q13 + vshr.s64 q13, q15, #25 + vadd.i64 q6, q6, q4 + vshl.i64 q4, q4, #25 + vadd.i64 q14, q6, q11 + vadd.i64 q2, q2, q13 + vsub.i64 q4, q7, q4 + vshr.s64 q7, q14, #26 + vshl.i64 q13, q13, #25 + vadd.i64 q14, q2, q11 + vadd.i64 q8, q8, q7 + vshl.i64 q7, q7, #26 + vadd.i64 q15, q8, q3 + vsub.i64 q9, q9, q13 + vshr.s64 q13, q14, #26 + vsub.i64 q6, q6, q7 + vshr.s64 q7, q15, #25 + vadd.i64 q10, q10, q13 + vshl.i64 q13, q13, #26 + vadd.i64 q14, q10, q3 + vadd.i64 q1, q1, q7 + add r2, r3, #240 + vshl.i64 q7, q7, #25 + add r4, r3, #144 + vadd.i64 q15, q1, q11 + add r2, r2, #8 + vsub.i64 q2, q2, q13 + add r4, r4, #8 + vshr.s64 q13, q14, #25 + vsub.i64 q7, q8, q7 + vshr.s64 q8, q15, #26 + vadd.i64 q14, q13, q13 + vadd.i64 q12, q12, q8 + vtrn.32 d12, d14 + vshl.i64 q8, q8, #26 + vtrn.32 d13, d15 + vadd.i64 q3, q12, q3 + vadd.i64 q0, q0, q14 + vst1.8 d12, [r2, : 64]! + vshl.i64 q7, q13, #4 + vst1.8 d13, [r4, : 64]! + vsub.i64 q1, q1, q8 + vshr.s64 q3, q3, #25 + vadd.i64 q0, q0, q7 + vadd.i64 q5, q5, q3 + vshl.i64 q3, q3, #25 + vadd.i64 q6, q5, q11 + vadd.i64 q0, q0, q13 + vshl.i64 q7, q13, #25 + vadd.i64 q8, q0, q11 + vsub.i64 q3, q12, q3 + vshr.s64 q6, q6, #26 + vsub.i64 q7, q10, q7 + vtrn.32 d2, d6 + vshr.s64 q8, q8, #26 + vtrn.32 d3, d7 + vadd.i64 q3, q9, q6 + vst1.8 d2, [r2, : 64] + vshl.i64 q6, q6, #26 + vst1.8 d3, [r4, : 64] + vadd.i64 q1, q4, q8 + vtrn.32 d4, d14 + vshl.i64 q4, q8, #26 + vtrn.32 d5, d15 + vsub.i64 q5, q5, q6 + add r2, r2, #16 + vsub.i64 q0, q0, q4 + vst1.8 d4, [r2, : 64] + add r4, r4, #16 + vst1.8 d5, [r4, : 64] + vtrn.32 d10, d6 + vtrn.32 d11, d7 + sub r2, r2, #8 + sub r4, r4, #8 + vtrn.32 d0, d2 + vtrn.32 d1, d3 + vst1.8 d10, [r2, : 64] + vst1.8 d11, [r4, : 64] + sub r2, r2, #24 + sub r4, r4, #24 + vst1.8 d0, [r2, : 64] + vst1.8 d1, [r4, : 64] + ldr r2, [sp, #456] + ldr r4, [sp, #460] + subs r5, r2, #1 + bge .Lmainloop + add r1, r3, #144 + add r2, r3, #336 + vld1.8 {d0-d1}, [r1, : 128]! + vld1.8 {d2-d3}, [r1, : 128]! + vld1.8 {d4}, [r1, : 64] + vst1.8 {d0-d1}, [r2, : 128]! + vst1.8 {d2-d3}, [r2, : 128]! + vst1.8 d4, [r2, : 64] + movw r1, #0 +.Linvertloop: + add r2, r3, #144 + movw r4, #0 + movw r5, #2 + cmp r1, #1 + moveq r5, #1 + addeq r2, r3, #336 + addeq r4, r3, #48 + cmp r1, #2 + moveq r5, #1 + addeq r2, r3, #48 + cmp r1, #3 + moveq r5, #5 + addeq r4, r3, #336 + cmp r1, #4 + moveq r5, #10 + cmp r1, #5 + moveq r5, #20 + cmp r1, #6 + moveq r5, #10 + addeq r2, r3, #336 + addeq r4, r3, #336 + cmp r1, #7 + moveq r5, #50 + cmp r1, #8 + moveq r5, #100 + cmp r1, #9 + moveq r5, #50 + addeq r2, r3, #336 + cmp r1, #10 + moveq r5, #5 + addeq r2, r3, #48 + cmp r1, #11 + moveq r5, #0 + addeq r2, r3, #96 + add r6, r3, #144 + add r7, r3, #288 + vld1.8 {d0-d1}, [r6, : 128]! + vld1.8 {d2-d3}, [r6, : 128]! + vld1.8 {d4}, [r6, : 64] + vst1.8 {d0-d1}, [r7, : 128]! + vst1.8 {d2-d3}, [r7, : 128]! + vst1.8 d4, [r7, : 64] + cmp r5, #0 + beq .Lskipsquaringloop +.Lsquaringloop: + add r6, r3, #288 + add r7, r3, #288 + add r8, r3, #288 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r7, : 128]! + vld1.8 {d6-d7}, [r7, : 128]! + vld1.8 {d9}, [r7, : 64] + vld1.8 {d10-d11}, [r6, : 128]! + add r7, sp, #384 + vld1.8 {d12-d13}, [r6, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r6, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r7, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r7, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r7, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r7, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r7, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r6, r7, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r6, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r6, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r6, r6, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r6, : 64] + sub r6, r6, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r7, sp, #480 + vld1.8 {d14-d15}, [r7, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r6, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r6, : 64]! + add r6, sp, #496 + vld1.8 {d20-d21}, [r6, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r6, r8, #8 + vst1.8 d0, [r6, : 64] + vsub.i64 d19, d19, d9 + add r6, r6, #16 + vst1.8 d16, [r6, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r6, r6, #8 + vst1.8 d6, [r6, : 64] + add r6, r6, #16 + vst1.8 d18, [r6, : 64] + vzip.i32 q2, q5 + sub r6, r6, #32 + vst1.8 d4, [r6, : 64] + subs r5, r5, #1 + bhi .Lsquaringloop +.Lskipsquaringloop: + mov r2, r2 + add r5, r3, #288 + add r6, r3, #144 + vmov.i32 q0, #19 + vmov.i32 q1, #0 + vmov.i32 q2, #1 + vzip.i32 q1, q2 + vld1.8 {d4-d5}, [r5, : 128]! + vld1.8 {d6-d7}, [r5, : 128]! + vld1.8 {d9}, [r5, : 64] + vld1.8 {d10-d11}, [r2, : 128]! + add r5, sp, #384 + vld1.8 {d12-d13}, [r2, : 128]! + vmul.i32 q7, q2, q0 + vld1.8 {d8}, [r2, : 64] + vext.32 d17, d11, d10, #1 + vmul.i32 q9, q3, q0 + vext.32 d16, d10, d8, #1 + vshl.u32 q10, q5, q1 + vext.32 d22, d14, d4, #1 + vext.32 d24, d18, d6, #1 + vshl.u32 q13, q6, q1 + vshl.u32 d28, d8, d2 + vrev64.i32 d22, d22 + vmul.i32 d1, d9, d1 + vrev64.i32 d24, d24 + vext.32 d29, d8, d13, #1 + vext.32 d0, d1, d9, #1 + vrev64.i32 d0, d0 + vext.32 d2, d9, d1, #1 + vext.32 d23, d15, d5, #1 + vmull.s32 q4, d20, d4 + vrev64.i32 d23, d23 + vmlal.s32 q4, d21, d1 + vrev64.i32 d2, d2 + vmlal.s32 q4, d26, d19 + vext.32 d3, d5, d15, #1 + vmlal.s32 q4, d27, d18 + vrev64.i32 d3, d3 + vmlal.s32 q4, d28, d15 + vext.32 d14, d12, d11, #1 + vmull.s32 q5, d16, d23 + vext.32 d15, d13, d12, #1 + vmlal.s32 q5, d17, d4 + vst1.8 d8, [r5, : 64]! + vmlal.s32 q5, d14, d1 + vext.32 d12, d9, d8, #0 + vmlal.s32 q5, d15, d19 + vmov.i64 d13, #0 + vmlal.s32 q5, d29, d18 + vext.32 d25, d19, d7, #1 + vmlal.s32 q6, d20, d5 + vrev64.i32 d25, d25 + vmlal.s32 q6, d21, d4 + vst1.8 d11, [r5, : 64]! + vmlal.s32 q6, d26, d1 + vext.32 d9, d10, d10, #0 + vmlal.s32 q6, d27, d19 + vmov.i64 d8, #0 + vmlal.s32 q6, d28, d18 + vmlal.s32 q4, d16, d24 + vmlal.s32 q4, d17, d5 + vmlal.s32 q4, d14, d4 + vst1.8 d12, [r5, : 64]! + vmlal.s32 q4, d15, d1 + vext.32 d10, d13, d12, #0 + vmlal.s32 q4, d29, d19 + vmov.i64 d11, #0 + vmlal.s32 q5, d20, d6 + vmlal.s32 q5, d21, d5 + vmlal.s32 q5, d26, d4 + vext.32 d13, d8, d8, #0 + vmlal.s32 q5, d27, d1 + vmov.i64 d12, #0 + vmlal.s32 q5, d28, d19 + vst1.8 d9, [r5, : 64]! + vmlal.s32 q6, d16, d25 + vmlal.s32 q6, d17, d6 + vst1.8 d10, [r5, : 64] + vmlal.s32 q6, d14, d5 + vext.32 d8, d11, d10, #0 + vmlal.s32 q6, d15, d4 + vmov.i64 d9, #0 + vmlal.s32 q6, d29, d1 + vmlal.s32 q4, d20, d7 + vmlal.s32 q4, d21, d6 + vmlal.s32 q4, d26, d5 + vext.32 d11, d12, d12, #0 + vmlal.s32 q4, d27, d4 + vmov.i64 d10, #0 + vmlal.s32 q4, d28, d1 + vmlal.s32 q5, d16, d0 + sub r2, r5, #32 + vmlal.s32 q5, d17, d7 + vmlal.s32 q5, d14, d6 + vext.32 d30, d9, d8, #0 + vmlal.s32 q5, d15, d5 + vld1.8 {d31}, [r2, : 64]! + vmlal.s32 q5, d29, d4 + vmlal.s32 q15, d20, d0 + vext.32 d0, d6, d18, #1 + vmlal.s32 q15, d21, d25 + vrev64.i32 d0, d0 + vmlal.s32 q15, d26, d24 + vext.32 d1, d7, d19, #1 + vext.32 d7, d10, d10, #0 + vmlal.s32 q15, d27, d23 + vrev64.i32 d1, d1 + vld1.8 {d6}, [r2, : 64] + vmlal.s32 q15, d28, d22 + vmlal.s32 q3, d16, d4 + add r2, r2, #24 + vmlal.s32 q3, d17, d2 + vext.32 d4, d31, d30, #0 + vmov d17, d11 + vmlal.s32 q3, d14, d1 + vext.32 d11, d13, d13, #0 + vext.32 d13, d30, d30, #0 + vmlal.s32 q3, d15, d0 + vext.32 d1, d8, d8, #0 + vmlal.s32 q3, d29, d3 + vld1.8 {d5}, [r2, : 64] + sub r2, r2, #16 + vext.32 d10, d6, d6, #0 + vmov.i32 q1, #0xffffffff + vshl.i64 q4, q1, #25 + add r5, sp, #480 + vld1.8 {d14-d15}, [r5, : 128] + vadd.i64 q9, q2, q7 + vshl.i64 q1, q1, #26 + vshr.s64 q10, q9, #26 + vld1.8 {d0}, [r2, : 64]! + vadd.i64 q5, q5, q10 + vand q9, q9, q1 + vld1.8 {d16}, [r2, : 64]! + add r2, sp, #496 + vld1.8 {d20-d21}, [r2, : 128] + vadd.i64 q11, q5, q10 + vsub.i64 q2, q2, q9 + vshr.s64 q9, q11, #25 + vext.32 d12, d5, d4, #0 + vand q11, q11, q4 + vadd.i64 q0, q0, q9 + vmov d19, d7 + vadd.i64 q3, q0, q7 + vsub.i64 q5, q5, q11 + vshr.s64 q11, q3, #26 + vext.32 d18, d11, d10, #0 + vand q3, q3, q1 + vadd.i64 q8, q8, q11 + vadd.i64 q11, q8, q10 + vsub.i64 q0, q0, q3 + vshr.s64 q3, q11, #25 + vand q11, q11, q4 + vadd.i64 q3, q6, q3 + vadd.i64 q6, q3, q7 + vsub.i64 q8, q8, q11 + vshr.s64 q11, q6, #26 + vand q6, q6, q1 + vadd.i64 q9, q9, q11 + vadd.i64 d25, d19, d21 + vsub.i64 q3, q3, q6 + vshr.s64 d23, d25, #25 + vand q4, q12, q4 + vadd.i64 d21, d23, d23 + vshl.i64 d25, d23, #4 + vadd.i64 d21, d21, d23 + vadd.i64 d25, d25, d21 + vadd.i64 d4, d4, d25 + vzip.i32 q0, q8 + vadd.i64 d12, d4, d14 + add r2, r6, #8 + vst1.8 d0, [r2, : 64] + vsub.i64 d19, d19, d9 + add r2, r2, #16 + vst1.8 d16, [r2, : 64] + vshr.s64 d22, d12, #26 + vand q0, q6, q1 + vadd.i64 d10, d10, d22 + vzip.i32 q3, q9 + vsub.i64 d4, d4, d0 + sub r2, r2, #8 + vst1.8 d6, [r2, : 64] + add r2, r2, #16 + vst1.8 d18, [r2, : 64] + vzip.i32 q2, q5 + sub r2, r2, #32 + vst1.8 d4, [r2, : 64] + cmp r4, #0 + beq .Lskippostcopy + add r2, r3, #144 + mov r4, r4 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskippostcopy: + cmp r1, #1 + bne .Lskipfinalcopy + add r2, r3, #288 + add r4, r3, #144 + vld1.8 {d0-d1}, [r2, : 128]! + vld1.8 {d2-d3}, [r2, : 128]! + vld1.8 {d4}, [r2, : 64] + vst1.8 {d0-d1}, [r4, : 128]! + vst1.8 {d2-d3}, [r4, : 128]! + vst1.8 d4, [r4, : 64] +.Lskipfinalcopy: + add r1, r1, #1 + cmp r1, #12 + blo .Linvertloop + add r1, r3, #144 + ldr r2, [r1], #4 + ldr r3, [r1], #4 + ldr r4, [r1], #4 + ldr r5, [r1], #4 + ldr r6, [r1], #4 + ldr r7, [r1], #4 + ldr r8, [r1], #4 + ldr r9, [r1], #4 + ldr r10, [r1], #4 + ldr r1, [r1] + add r11, r1, r1, LSL #4 + add r11, r11, r1, LSL #1 + add r11, r11, #16777216 + mov r11, r11, ASR #25 + add r11, r11, r2 + mov r11, r11, ASR #26 + add r11, r11, r3 + mov r11, r11, ASR #25 + add r11, r11, r4 + mov r11, r11, ASR #26 + add r11, r11, r5 + mov r11, r11, ASR #25 + add r11, r11, r6 + mov r11, r11, ASR #26 + add r11, r11, r7 + mov r11, r11, ASR #25 + add r11, r11, r8 + mov r11, r11, ASR #26 + add r11, r11, r9 + mov r11, r11, ASR #25 + add r11, r11, r10 + mov r11, r11, ASR #26 + add r11, r11, r1 + mov r11, r11, ASR #25 + add r2, r2, r11 + add r2, r2, r11, LSL #1 + add r2, r2, r11, LSL #4 + mov r11, r2, ASR #26 + add r3, r3, r11 + sub r2, r2, r11, LSL #26 + mov r11, r3, ASR #25 + add r4, r4, r11 + sub r3, r3, r11, LSL #25 + mov r11, r4, ASR #26 + add r5, r5, r11 + sub r4, r4, r11, LSL #26 + mov r11, r5, ASR #25 + add r6, r6, r11 + sub r5, r5, r11, LSL #25 + mov r11, r6, ASR #26 + add r7, r7, r11 + sub r6, r6, r11, LSL #26 + mov r11, r7, ASR #25 + add r8, r8, r11 + sub r7, r7, r11, LSL #25 + mov r11, r8, ASR #26 + add r9, r9, r11 + sub r8, r8, r11, LSL #26 + mov r11, r9, ASR #25 + add r10, r10, r11 + sub r9, r9, r11, LSL #25 + mov r11, r10, ASR #26 + add r1, r1, r11 + sub r10, r10, r11, LSL #26 + mov r11, r1, ASR #25 + sub r1, r1, r11, LSL #25 + add r2, r2, r3, LSL #26 + mov r3, r3, LSR #6 + add r3, r3, r4, LSL #19 + mov r4, r4, LSR #13 + add r4, r4, r5, LSL #13 + mov r5, r5, LSR #19 + add r5, r5, r6, LSL #6 + add r6, r7, r8, LSL #25 + mov r7, r8, LSR #7 + add r7, r7, r9, LSL #19 + mov r8, r9, LSR #13 + add r8, r8, r10, LSL #12 + mov r9, r10, LSR #20 + add r1, r9, r1, LSL #6 + str r2, [r0] + str r3, [r0, #4] + str r4, [r0, #8] + str r5, [r0, #12] + str r6, [r0, #16] + str r7, [r0, #20] + str r8, [r0, #24] + str r1, [r0, #28] + movw r0, #0 + mov sp, ip + pop {r4-r11, pc} +ENDPROC(curve25519_neon) diff --git a/lib/crypto/arm/curve25519.h b/lib/crypto/arm/curve25519.h new file mode 100644 index 000000000000..f6d66494eb8f --- /dev/null +++ b/lib/crypto/arm/curve25519.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. + * + * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This + * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been + * manually reworked for use in kernel space. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void curve25519_arch(u8 out[CURVE25519_KEY_SIZE], + const u8 scalar[CURVE25519_KEY_SIZE], + const u8 point[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&have_neon) && crypto_simd_usable()) { + kernel_neon_begin(); + curve25519_neon(out, scalar, point); + kernel_neon_end(); + } else { + curve25519_generic(out, scalar, point); + } +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_arch(pub, secret, curve25519_base_point); +} + +#define curve25519_mod_init_arch curve25519_mod_init_arch +static void curve25519_mod_init_arch(void) +{ + if (elf_hwcap & HWCAP_NEON) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/curve25519-generic.c b/lib/crypto/curve25519-generic.c deleted file mode 100644 index f8aa70c9f559..000000000000 --- a/lib/crypto/curve25519-generic.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. - * - * This is an implementation of the Curve25519 ECDH algorithm, using either - * a 32-bit implementation or a 64-bit implementation with 128-bit integers, - * depending on what is supported by the target compiler. - * - * Information: https://cr.yp.to/ecdh.html - */ - -#include -#include -#include - -const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; -const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; - -EXPORT_SYMBOL(curve25519_null_point); -EXPORT_SYMBOL(curve25519_base_point); -EXPORT_SYMBOL(curve25519_generic); - -MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Curve25519 scalar multiplication"); -MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/curve25519.c b/lib/crypto/curve25519.c index 1b786389d714..01e265dfbcd9 100644 --- a/lib/crypto/curve25519.c +++ b/lib/crypto/curve25519.c @@ -2,8 +2,9 @@ /* * Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved. * - * This is an implementation of the Curve25519 ECDH algorithm, using either - * a 32-bit implementation or a 64-bit implementation with 128-bit integers, + * This is an implementation of the Curve25519 ECDH algorithm, using either an + * architecture-optimized implementation or a generic implementation. The + * generic implementation is either 32-bit, or 64-bit with 128-bit integers, * depending on what is supported by the target compiler. * * Information: https://cr.yp.to/ecdh.html @@ -15,15 +16,32 @@ #include #include +static const u8 curve25519_null_point[CURVE25519_KEY_SIZE] __aligned(32) = { 0 }; +static const u8 curve25519_base_point[CURVE25519_KEY_SIZE] __aligned(32) = { 9 }; + +#ifdef CONFIG_CRYPTO_LIB_CURVE25519_ARCH +#include "curve25519.h" /* $(SRCARCH)/curve25519.h */ +#else +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + curve25519_generic(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_generic(pub, secret, curve25519_base_point); +} +#endif + bool __must_check curve25519(u8 mypublic[CURVE25519_KEY_SIZE], const u8 secret[CURVE25519_KEY_SIZE], const u8 basepoint[CURVE25519_KEY_SIZE]) { - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) - curve25519_arch(mypublic, secret, basepoint); - else - curve25519_generic(mypublic, secret, basepoint); + curve25519_arch(mypublic, secret, basepoint); return crypto_memneq(mypublic, curve25519_null_point, CURVE25519_KEY_SIZE); } @@ -36,27 +54,25 @@ curve25519_generate_public(u8 pub[CURVE25519_KEY_SIZE], if (unlikely(!crypto_memneq(secret, curve25519_null_point, CURVE25519_KEY_SIZE))) return false; - - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_CURVE25519)) - curve25519_base_arch(pub, secret); - else - curve25519_generic(pub, secret, curve25519_base_point); + curve25519_base_arch(pub, secret); return crypto_memneq(pub, curve25519_null_point, CURVE25519_KEY_SIZE); } EXPORT_SYMBOL(curve25519_generate_public); -static int __init curve25519_init(void) +#ifdef curve25519_mod_init_arch +static int __init curve25519_mod_init(void) { + curve25519_mod_init_arch(); return 0; } +subsys_initcall(curve25519_mod_init); -static void __exit curve25519_exit(void) +static void __exit curve25519_mod_exit(void) { } - -module_init(curve25519_init); -module_exit(curve25519_exit); +module_exit(curve25519_mod_exit); +#endif MODULE_LICENSE("GPL v2"); -MODULE_DESCRIPTION("Curve25519 scalar multiplication"); +MODULE_DESCRIPTION("Curve25519 algorithm"); MODULE_AUTHOR("Jason A. Donenfeld "); diff --git a/lib/crypto/powerpc/curve25519-ppc64le_asm.S b/lib/crypto/powerpc/curve25519-ppc64le_asm.S new file mode 100644 index 000000000000..06c1febe24b9 --- /dev/null +++ b/lib/crypto/powerpc/curve25519-ppc64le_asm.S @@ -0,0 +1,671 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +# +# This code is taken from CRYPTOGAMs[1] and is included here using the option +# in the license to distribute the code under the GPL. Therefore this program +# is free software; you can redistribute it and/or modify it under the terms of +# the GNU General Public License version 2 as published by the Free Software +# Foundation. +# +# [1] https://github.com/dot-asm/cryptogams/ + +# Copyright (c) 2006-2017, CRYPTOGAMS by +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain copyright notices, +# this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials +# provided with the distribution. +# +# * Neither the name of the CRYPTOGAMS nor the names of its +# copyright holder and contributors may be used to endorse or +# promote products derived from this software without specific +# prior written permission. +# +# ALTERNATIVELY, provided that this notice is retained in full, this +# product may be distributed under the terms of the GNU General Public +# License (GPL), in which case the provisions of the GPL apply INSTEAD OF +# those given above. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# ==================================================================== +# Written by Andy Polyakov for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see https://www.openssl.org/~appro/cryptogams/. +# ==================================================================== + +# +# ==================================================================== +# Written and Modified by Danny Tsen +# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes +# and x25519_cswap +# +# Copyright 2024- IBM Corp. +# +# X25519 lower-level primitives for PPC64. +# + +#include + +.text + +.align 5 +SYM_FUNC_START(x25519_fe51_mul) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 6,0(5) + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mulld 22,7,6 + mulhdu 23,7,6 + + mulld 24,8,6 + mulhdu 25,8,6 + + mulld 30,11,6 + mulhdu 31,11,6 + ld 4,8(5) + mulli 11,11,19 + + mulld 26,9,6 + mulhdu 27,9,6 + + mulld 28,10,6 + mulhdu 29,10,6 + mulld 12,11,4 + mulhdu 21,11,4 + addc 22,22,12 + adde 23,23,21 + + mulld 12,7,4 + mulhdu 21,7,4 + addc 24,24,12 + adde 25,25,21 + + mulld 12,10,4 + mulhdu 21,10,4 + ld 6,16(5) + mulli 10,10,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,8,4 + mulhdu 21,8,4 + addc 26,26,12 + adde 27,27,21 + + mulld 12,9,4 + mulhdu 21,9,4 + addc 28,28,12 + adde 29,29,21 + mulld 12,10,6 + mulhdu 21,10,6 + addc 22,22,12 + adde 23,23,21 + + mulld 12,11,6 + mulhdu 21,11,6 + addc 24,24,12 + adde 25,25,21 + + mulld 12,9,6 + mulhdu 21,9,6 + ld 4,24(5) + mulli 9,9,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,7,6 + mulhdu 21,7,6 + addc 26,26,12 + adde 27,27,21 + + mulld 12,8,6 + mulhdu 21,8,6 + addc 28,28,12 + adde 29,29,21 + mulld 12,9,4 + mulhdu 21,9,4 + addc 22,22,12 + adde 23,23,21 + + mulld 12,10,4 + mulhdu 21,10,4 + addc 24,24,12 + adde 25,25,21 + + mulld 12,8,4 + mulhdu 21,8,4 + ld 6,32(5) + mulli 8,8,19 + addc 30,30,12 + adde 31,31,21 + + mulld 12,11,4 + mulhdu 21,11,4 + addc 26,26,12 + adde 27,27,21 + + mulld 12,7,4 + mulhdu 21,7,4 + addc 28,28,12 + adde 29,29,21 + mulld 12,8,6 + mulhdu 21,8,6 + addc 22,22,12 + adde 23,23,21 + + mulld 12,9,6 + mulhdu 21,9,6 + addc 24,24,12 + adde 25,25,21 + + mulld 12,10,6 + mulhdu 21,10,6 + addc 26,26,12 + adde 27,27,21 + + mulld 12,11,6 + mulhdu 21,11,6 + addc 28,28,12 + adde 29,29,21 + + mulld 12,7,6 + mulhdu 21,7,6 + addc 30,30,12 + adde 31,31,21 + +.Lfe51_reduce: + li 0,-1 + srdi 0,0,13 + + srdi 12,26,51 + and 9,26,0 + insrdi 12,27,51,0 + srdi 21,22,51 + and 7,22,0 + insrdi 21,23,51,0 + addc 28,28,12 + addze 29,29 + addc 24,24,21 + addze 25,25 + + srdi 12,28,51 + and 10,28,0 + insrdi 12,29,51,0 + srdi 21,24,51 + and 8,24,0 + insrdi 21,25,51,0 + addc 30,30,12 + addze 31,31 + add 9,9,21 + + srdi 12,30,51 + and 11,30,0 + insrdi 12,31,51,0 + mulli 12,12,19 + + add 7,7,12 + + srdi 21,9,51 + and 9,9,0 + add 10,10,21 + + srdi 12,7,51 + and 7,7,0 + add 8,8,12 + + std 9,16(3) + std 10,24(3) + std 11,32(3) + std 7,0(3) + std 8,8(3) + + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + addi 1,1,144 + blr +SYM_FUNC_END(x25519_fe51_mul) + +.align 5 +SYM_FUNC_START(x25519_fe51_sqr) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + add 6,7,7 + mulli 21,11,19 + + mulld 22,7,7 + mulhdu 23,7,7 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + add 6,8,8 + mulld 12,11,21 + mulhdu 11,11,21 + addc 28,28,12 + adde 29,29,11 + + mulli 5,10,19 + + mulld 12,8,8 + mulhdu 11,8,8 + addc 26,26,12 + adde 27,27,11 + mulld 12,9,6 + mulhdu 11,9,6 + addc 28,28,12 + adde 29,29,11 + mulld 12,10,6 + mulhdu 11,10,6 + addc 30,30,12 + adde 31,31,11 + mulld 12,21,6 + mulhdu 11,21,6 + add 6,10,10 + addc 22,22,12 + adde 23,23,11 + mulld 12,10,5 + mulhdu 10,10,5 + addc 24,24,12 + adde 25,25,10 + mulld 12,6,21 + mulhdu 10,6,21 + add 6,9,9 + addc 26,26,12 + adde 27,27,10 + + mulld 12,9,9 + mulhdu 10,9,9 + addc 30,30,12 + adde 31,31,10 + mulld 12,5,6 + mulhdu 10,5,6 + addc 22,22,12 + adde 23,23,10 + mulld 12,21,6 + mulhdu 10,21,6 + addc 24,24,12 + adde 25,25,10 + + b .Lfe51_reduce +SYM_FUNC_END(x25519_fe51_sqr) + +.align 5 +SYM_FUNC_START(x25519_fe51_mul121666) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + lis 6,1 + ori 6,6,56130 + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mulld 22,7,6 + mulhdu 23,7,6 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + + b .Lfe51_reduce +SYM_FUNC_END(x25519_fe51_mul121666) + +.align 5 +SYM_FUNC_START(x25519_fe51_sqr_times) + + stdu 1,-144(1) + std 21,56(1) + std 22,64(1) + std 23,72(1) + std 24,80(1) + std 25,88(1) + std 26,96(1) + std 27,104(1) + std 28,112(1) + std 29,120(1) + std 30,128(1) + std 31,136(1) + + ld 7,0(4) + ld 8,8(4) + ld 9,16(4) + ld 10,24(4) + ld 11,32(4) + + mtctr 5 + +.Lsqr_times_loop: + add 6,7,7 + mulli 21,11,19 + + mulld 22,7,7 + mulhdu 23,7,7 + mulld 24,8,6 + mulhdu 25,8,6 + mulld 26,9,6 + mulhdu 27,9,6 + mulld 28,10,6 + mulhdu 29,10,6 + mulld 30,11,6 + mulhdu 31,11,6 + add 6,8,8 + mulld 12,11,21 + mulhdu 11,11,21 + addc 28,28,12 + adde 29,29,11 + + mulli 5,10,19 + + mulld 12,8,8 + mulhdu 11,8,8 + addc 26,26,12 + adde 27,27,11 + mulld 12,9,6 + mulhdu 11,9,6 + addc 28,28,12 + adde 29,29,11 + mulld 12,10,6 + mulhdu 11,10,6 + addc 30,30,12 + adde 31,31,11 + mulld 12,21,6 + mulhdu 11,21,6 + add 6,10,10 + addc 22,22,12 + adde 23,23,11 + mulld 12,10,5 + mulhdu 10,10,5 + addc 24,24,12 + adde 25,25,10 + mulld 12,6,21 + mulhdu 10,6,21 + add 6,9,9 + addc 26,26,12 + adde 27,27,10 + + mulld 12,9,9 + mulhdu 10,9,9 + addc 30,30,12 + adde 31,31,10 + mulld 12,5,6 + mulhdu 10,5,6 + addc 22,22,12 + adde 23,23,10 + mulld 12,21,6 + mulhdu 10,21,6 + addc 24,24,12 + adde 25,25,10 + + # fe51_reduce + li 0,-1 + srdi 0,0,13 + + srdi 12,26,51 + and 9,26,0 + insrdi 12,27,51,0 + srdi 21,22,51 + and 7,22,0 + insrdi 21,23,51,0 + addc 28,28,12 + addze 29,29 + addc 24,24,21 + addze 25,25 + + srdi 12,28,51 + and 10,28,0 + insrdi 12,29,51,0 + srdi 21,24,51 + and 8,24,0 + insrdi 21,25,51,0 + addc 30,30,12 + addze 31,31 + add 9,9,21 + + srdi 12,30,51 + and 11,30,0 + insrdi 12,31,51,0 + mulli 12,12,19 + + add 7,7,12 + + srdi 21,9,51 + and 9,9,0 + add 10,10,21 + + srdi 12,7,51 + and 7,7,0 + add 8,8,12 + + bdnz .Lsqr_times_loop + + std 9,16(3) + std 10,24(3) + std 11,32(3) + std 7,0(3) + std 8,8(3) + + ld 21,56(1) + ld 22,64(1) + ld 23,72(1) + ld 24,80(1) + ld 25,88(1) + ld 26,96(1) + ld 27,104(1) + ld 28,112(1) + ld 29,120(1) + ld 30,128(1) + ld 31,136(1) + addi 1,1,144 + blr +SYM_FUNC_END(x25519_fe51_sqr_times) + +.align 5 +SYM_FUNC_START(x25519_fe51_frombytes) + + li 12, -1 + srdi 12, 12, 13 # 0x7ffffffffffff + + ld 5, 0(4) + ld 6, 8(4) + ld 7, 16(4) + ld 8, 24(4) + + srdi 10, 5, 51 + and 5, 5, 12 # h0 + + sldi 11, 6, 13 + or 11, 10, 11 # h1t + srdi 10, 6, 38 + and 6, 11, 12 # h1 + + sldi 11, 7, 26 + or 10, 10, 11 # h2t + + srdi 11, 7, 25 + and 7, 10, 12 # h2 + sldi 10, 8, 39 + or 11, 11, 10 # h3t + + srdi 9, 8, 12 + and 8, 11, 12 # h3 + and 9, 9, 12 # h4 + + std 5, 0(3) + std 6, 8(3) + std 7, 16(3) + std 8, 24(3) + std 9, 32(3) + + blr +SYM_FUNC_END(x25519_fe51_frombytes) + +.align 5 +SYM_FUNC_START(x25519_fe51_tobytes) + + ld 5, 0(4) + ld 6, 8(4) + ld 7, 16(4) + ld 8, 24(4) + ld 9, 32(4) + + li 12, -1 + srdi 12, 12, 13 # 0x7ffffffffffff + + # Full reducuction + addi 10, 5, 19 + srdi 10, 10, 51 + add 10, 10, 6 + srdi 10, 10, 51 + add 10, 10, 7 + srdi 10, 10, 51 + add 10, 10, 8 + srdi 10, 10, 51 + add 10, 10, 9 + srdi 10, 10, 51 + + mulli 10, 10, 19 + add 5, 5, 10 + srdi 11, 5, 51 + add 6, 6, 11 + srdi 11, 6, 51 + add 7, 7, 11 + srdi 11, 7, 51 + add 8, 8, 11 + srdi 11, 8, 51 + add 9, 9, 11 + + and 5, 5, 12 + and 6, 6, 12 + and 7, 7, 12 + and 8, 8, 12 + and 9, 9, 12 + + sldi 10, 6, 51 + or 5, 5, 10 # s0 + + srdi 11, 6, 13 + sldi 10, 7, 38 + or 6, 11, 10 # s1 + + srdi 11, 7, 26 + sldi 10, 8, 25 + or 7, 11, 10 # s2 + + srdi 11, 8, 39 + sldi 10, 9, 12 + or 8, 11, 10 # s4 + + std 5, 0(3) + std 6, 8(3) + std 7, 16(3) + std 8, 24(3) + + blr +SYM_FUNC_END(x25519_fe51_tobytes) + +.align 5 +SYM_FUNC_START(x25519_cswap) + + li 7, 5 + neg 6, 5 + mtctr 7 + +.Lswap_loop: + ld 8, 0(3) + ld 9, 0(4) + xor 10, 8, 9 + and 10, 10, 6 + xor 11, 8, 10 + xor 12, 9, 10 + std 11, 0(3) + addi 3, 3, 8 + std 12, 0(4) + addi 4, 4, 8 + bdnz .Lswap_loop + + blr +SYM_FUNC_END(x25519_cswap) diff --git a/lib/crypto/powerpc/curve25519.h b/lib/crypto/powerpc/curve25519.h new file mode 100644 index 000000000000..dee6234c48e9 --- /dev/null +++ b/lib/crypto/powerpc/curve25519.h @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2024- IBM Corp. + * + * X25519 scalar multiplication with 51 bits limbs for PPC64le. + * Based on RFC7748 and AArch64 optimized implementation for X25519 + * - Algorithm 1 Scalar multiplication of a variable point + */ + +#include +#include +#include + +#include +#include + +typedef uint64_t fe51[5]; + +asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g); +asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f); +asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f); +asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n); +asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s); +asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h); +asmlinkage void x25519_cswap(fe51 p, fe51 q, unsigned int bit); + +#define fmul x25519_fe51_mul +#define fsqr x25519_fe51_sqr +#define fmul121666 x25519_fe51_mul121666 +#define fe51_tobytes x25519_fe51_tobytes + +static void fadd(fe51 h, const fe51 f, const fe51 g) +{ + h[0] = f[0] + g[0]; + h[1] = f[1] + g[1]; + h[2] = f[2] + g[2]; + h[3] = f[3] + g[3]; + h[4] = f[4] + g[4]; +} + +/* + * Prime = 2 ** 255 - 19, 255 bits + * (0x7fffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffed) + * + * Prime in 5 51-bit limbs + */ +static fe51 prime51 = { 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff}; + +static void fsub(fe51 h, const fe51 f, const fe51 g) +{ + h[0] = (f[0] + ((prime51[0] * 2))) - g[0]; + h[1] = (f[1] + ((prime51[1] * 2))) - g[1]; + h[2] = (f[2] + ((prime51[2] * 2))) - g[2]; + h[3] = (f[3] + ((prime51[3] * 2))) - g[3]; + h[4] = (f[4] + ((prime51[4] * 2))) - g[4]; +} + +static void fe51_frombytes(fe51 h, const uint8_t *s) +{ + /* + * Make sure 64-bit aligned. + */ + unsigned char sbuf[32+8]; + unsigned char *sb = PTR_ALIGN((void *)sbuf, 8); + + memcpy(sb, s, 32); + x25519_fe51_frombytes(h, sb); +} + +static void finv(fe51 o, const fe51 i) +{ + fe51 a0, b, c, t00; + + fsqr(a0, i); + x25519_fe51_sqr_times(t00, a0, 2); + + fmul(b, t00, i); + fmul(a0, b, a0); + + fsqr(t00, a0); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 5); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 10); + + fmul(c, t00, b); + x25519_fe51_sqr_times(t00, c, 20); + + fmul(t00, t00, c); + x25519_fe51_sqr_times(t00, t00, 10); + + fmul(b, t00, b); + x25519_fe51_sqr_times(t00, b, 50); + + fmul(c, t00, b); + x25519_fe51_sqr_times(t00, c, 100); + + fmul(t00, t00, c); + x25519_fe51_sqr_times(t00, t00, 50); + + fmul(t00, t00, b); + x25519_fe51_sqr_times(t00, t00, 5); + + fmul(o, t00, a0); +} + +static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32], + const uint8_t point[32]) +{ + fe51 x1, x2, z2, x3, z3; + uint8_t s[32]; + unsigned int swap = 0; + int i; + + memcpy(s, scalar, 32); + s[0] &= 0xf8; + s[31] &= 0x7f; + s[31] |= 0x40; + fe51_frombytes(x1, point); + + z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0; + x3[0] = x1[0]; + x3[1] = x1[1]; + x3[2] = x1[2]; + x3[3] = x1[3]; + x3[4] = x1[4]; + + x2[0] = z3[0] = 1; + x2[1] = z3[1] = 0; + x2[2] = z3[2] = 0; + x2[3] = z3[3] = 0; + x2[4] = z3[4] = 0; + + for (i = 254; i >= 0; --i) { + unsigned int k_t = 1 & (s[i / 8] >> (i & 7)); + fe51 a, b, c, d, e; + fe51 da, cb, aa, bb; + fe51 dacb_p, dacb_m; + + swap ^= k_t; + x25519_cswap(x2, x3, swap); + x25519_cswap(z2, z3, swap); + swap = k_t; + + fsub(b, x2, z2); // B = x_2 - z_2 + fadd(a, x2, z2); // A = x_2 + z_2 + fsub(d, x3, z3); // D = x_3 - z_3 + fadd(c, x3, z3); // C = x_3 + z_3 + + fsqr(bb, b); // BB = B^2 + fsqr(aa, a); // AA = A^2 + fmul(da, d, a); // DA = D * A + fmul(cb, c, b); // CB = C * B + + fsub(e, aa, bb); // E = AA - BB + fmul(x2, aa, bb); // x2 = AA * BB + fadd(dacb_p, da, cb); // DA + CB + fsub(dacb_m, da, cb); // DA - CB + + fmul121666(z3, e); // 121666 * E + fsqr(z2, dacb_m); // (DA - CB)^2 + fsqr(x3, dacb_p); // x3 = (DA + CB)^2 + fadd(b, bb, z3); // BB + 121666 * E + fmul(z3, x1, z2); // z3 = x1 * (DA - CB)^2 + fmul(z2, e, b); // z2 = e * (BB + (DA + CB)^2) + } + + finv(z2, z2); + fmul(x2, x2, z2); + fe51_tobytes(out, x2); +} + +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + curve25519_fe51(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + curve25519_fe51(pub, secret, curve25519_base_point); +} diff --git a/lib/crypto/x86/curve25519.h b/lib/crypto/x86/curve25519.h new file mode 100644 index 000000000000..5c0b8408852d --- /dev/null +++ b/lib/crypto/x86/curve25519.h @@ -0,0 +1,1613 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright (C) 2020 Jason A. Donenfeld . All Rights Reserved. + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + */ + +#include +#include +#include + +#include +#include + +static __always_inline u64 eq_mask(u64 a, u64 b) +{ + u64 x = a ^ b; + u64 minus_x = ~x + (u64)1U; + u64 x_or_minus_x = x | minus_x; + u64 xnx = x_or_minus_x >> (u32)63U; + return xnx - (u64)1U; +} + +static __always_inline u64 gte_mask(u64 a, u64 b) +{ + u64 x = a; + u64 y = b; + u64 x_xor_y = x ^ y; + u64 x_sub_y = x - y; + u64 x_sub_y_xor_y = x_sub_y ^ y; + u64 q = x_xor_y | x_sub_y_xor_y; + u64 x_xor_q = x ^ q; + u64 x_xor_q_ = x_xor_q >> (u32)63U; + return x_xor_q_ - (u64)1U; +} + +/* Computes the addition of four-element f1 with value in f2 + * and returns the carry (if any) */ +static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) +{ + u64 carry_r; + + asm volatile( + /* Clear registers to propagate the carry bit */ + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %k1, %k1;" + + /* Begin addition chain */ + " addq 0(%3), %0;" + " movq %0, 0(%2);" + " adcxq 8(%3), %%r8;" + " movq %%r8, 8(%2);" + " adcxq 16(%3), %%r9;" + " movq %%r9, 16(%2);" + " adcxq 24(%3), %%r10;" + " movq %%r10, 24(%2);" + + /* Return the carry bit in a register */ + " adcx %%r11, %1;" + : "+&r"(f2), "=&r"(carry_r) + : "r"(out), "r"(f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc"); + + return carry_r; +} + +/* Computes the field addition of two field elements */ +static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw addition of f1 + f2 */ + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + /* Step 2: Add carry*38 to the original sum */ + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes the field subtraction of two field elements */ +static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) +{ + asm volatile( + /* Compute the raw subtraction of f1-f2 */ + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + /* Step 2: Subtract carry*38 from the original difference */ + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + /* Store the result */ + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +/* Computes a field multiplication: out <- f1 * f2 + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication: tmp <- src1 * src2 */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes two field multiplications: + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results: */ +static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) +{ + asm volatile( + + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ + + /* Compute src1[0] * src2 */ + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ + + /* Compute src1[0] * src2 */ + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + /* Compute src1[1] * src2 */ + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[2] * src2 */ + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + /* Compute src1[3] * src2 */ + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + /* Line up pointers */ + " mov %2, %0;" + " mov %3, %2;" + + /* Wrap the results back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); +} + +/* Computes the field multiplication of four-element f1 with value in f2 + * Requires f2 to be smaller than 2^17 */ +static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) +{ + register u64 f2_r asm("rdx") = f2; + + asm volatile( + /* Compute the raw multiplication of f1*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute carry*38 */ + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", + "memory", "cc"); +} + +/* Computes p1 <- bit ? p2 : p1 in constant time */ +static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) +{ + asm volatile( + /* Transfer bit into CF flag */ + " add $18446744073709551615, %0;" + + /* cswap p1[0], p2[0] */ + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + /* cswap p1[1], p2[1] */ + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + /* cswap p1[2], p2[2] */ + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + /* cswap p1[3], p2[3] */ + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + /* cswap p1[4], p2[4] */ + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + /* cswap p1[5], p2[5] */ + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + /* cswap p1[6], p2[6] */ + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + /* cswap p1[7], p2[7] */ + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +/* Computes the square of a field element: out <- f * f + * Uses the 8-element buffer tmp for intermediate results */ +static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Compute the raw multiplication: tmp <- f * f */ + + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Wrap the result back into the field */ + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +/* Computes two field squarings: + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] + * Uses the 16-element buffer tmp for intermediate results */ +static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) +{ + asm volatile( + /* Step 1: Compute all partial products */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + /* Step 1: Compute all partial products */ + " movq 32(%0), %%rdx;" /* f[0] */ + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%0), %%rdx;" /* f[3] */ + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ + + /* Step 2: Compute two parallel carry chains */ + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + /* Step 3: Compute intermediate squares */ + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + /* Line up pointers */ + " mov %1, %0;" + " mov %2, %1;" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + /* Step 2: Fold the carry back into dst */ + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); +} + +static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) +{ + u64 *nq = p01_tmp1; + u64 *nq_p1 = p01_tmp1 + (u32)8U; + u64 *tmp1 = p01_tmp1 + (u32)16U; + u64 *x1 = q; + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *z3 = nq_p1 + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + u64 *x3; + u64 *z31; + u64 *d0; + u64 *c0; + u64 *a1; + u64 *b1; + u64 *d; + u64 *c; + u64 *ab1; + u64 *dc1; + fadd(a, x2, z2); + fsub(b, x2, z2); + x3 = nq_p1; + z31 = nq_p1 + (u32)4U; + d0 = dc; + c0 = dc + (u32)4U; + fadd(c0, x3, z31); + fsub(d0, x3, z31); + fmul2(dc, dc, ab, tmp2); + fadd(x3, d0, c0); + fsub(z31, d0, c0); + a1 = tmp1; + b1 = tmp1 + (u32)4U; + d = tmp1 + (u32)8U; + c = tmp1 + (u32)12U; + ab1 = tmp1; + dc1 = tmp1 + (u32)8U; + fsqr2(dc1, ab1, tmp2); + fsqr2(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b1, c, (u64)121665U); + fadd(b1, b1, d); + fmul2(nq, dc1, ab1, tmp2); + fmul(z3, z3, x1, tmp2); +} + +static void point_double(u64 *nq, u64 *tmp1, u64 *tmp2) +{ + u64 *x2 = nq; + u64 *z2 = nq + (u32)4U; + u64 *a = tmp1; + u64 *b = tmp1 + (u32)4U; + u64 *d = tmp1 + (u32)8U; + u64 *c = tmp1 + (u32)12U; + u64 *ab = tmp1; + u64 *dc = tmp1 + (u32)8U; + fadd(a, x2, z2); + fsub(b, x2, z2); + fsqr2(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub(c, d, c); + fmul_scalar(b, c, (u64)121665U); + fadd(b, b, d); + fmul2(nq, dc, ab, tmp2); +} + +static void montgomery_ladder(u64 *out, const u8 *key, u64 *init1) +{ + u64 tmp2[16U] = { 0U }; + u64 p01_tmp1_swap[33U] = { 0U }; + u64 *p0 = p01_tmp1_swap; + u64 *p01 = p01_tmp1_swap; + u64 *p03 = p01; + u64 *p11 = p01 + (u32)8U; + u64 *x0; + u64 *z0; + u64 *p01_tmp1; + u64 *p01_tmp11; + u64 *nq10; + u64 *nq_p11; + u64 *swap1; + u64 sw0; + u64 *nq1; + u64 *tmp1; + memcpy(p11, init1, (u32)8U * sizeof(init1[0U])); + x0 = p03; + z0 = p03 + (u32)4U; + x0[0U] = (u64)1U; + x0[1U] = (u64)0U; + x0[2U] = (u64)0U; + x0[3U] = (u64)0U; + z0[0U] = (u64)0U; + z0[1U] = (u64)0U; + z0[2U] = (u64)0U; + z0[3U] = (u64)0U; + p01_tmp1 = p01_tmp1_swap; + p01_tmp11 = p01_tmp1_swap; + nq10 = p01_tmp1_swap; + nq_p11 = p01_tmp1_swap + (u32)8U; + swap1 = p01_tmp1_swap + (u32)32U; + cswap2((u64)1U, nq10, nq_p11); + point_add_and_double(init1, p01_tmp11, tmp2); + swap1[0U] = (u64)1U; + { + u32 i; + for (i = (u32)0U; i < (u32)251U; i = i + (u32)1U) { + u64 *p01_tmp12 = p01_tmp1_swap; + u64 *swap2 = p01_tmp1_swap + (u32)32U; + u64 *nq2 = p01_tmp12; + u64 *nq_p12 = p01_tmp12 + (u32)8U; + u64 bit = (u64)(key[((u32)253U - i) / (u32)8U] >> ((u32)253U - i) % (u32)8U & (u8)1U); + u64 sw = swap2[0U] ^ bit; + cswap2(sw, nq2, nq_p12); + point_add_and_double(init1, p01_tmp12, tmp2); + swap2[0U] = bit; + } + } + sw0 = swap1[0U]; + cswap2(sw0, nq10, nq_p11); + nq1 = p01_tmp1; + tmp1 = p01_tmp1 + (u32)16U; + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + point_double(nq1, tmp1, tmp2); + memcpy(out, p0, (u32)8U * sizeof(p0[0U])); + + memzero_explicit(tmp2, sizeof(tmp2)); + memzero_explicit(p01_tmp1_swap, sizeof(p01_tmp1_swap)); +} + +static void fsquare_times(u64 *o, const u64 *inp, u64 *tmp, u32 n1) +{ + u32 i; + fsqr(o, inp, tmp); + for (i = (u32)0U; i < n1 - (u32)1U; i = i + (u32)1U) + fsqr(o, o, tmp); +} + +static void finv(u64 *o, const u64 *i, u64 *tmp) +{ + u64 t1[16U] = { 0U }; + u64 *a0 = t1; + u64 *b = t1 + (u32)4U; + u64 *c = t1 + (u32)8U; + u64 *t00 = t1 + (u32)12U; + u64 *tmp1 = tmp; + u64 *a; + u64 *t0; + fsquare_times(a0, i, tmp1, (u32)1U); + fsquare_times(t00, a0, tmp1, (u32)2U); + fmul(b, t00, i, tmp); + fmul(a0, b, a0, tmp); + fsquare_times(t00, a0, tmp1, (u32)1U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)5U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)10U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)20U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)10U); + fmul(b, t00, b, tmp); + fsquare_times(t00, b, tmp1, (u32)50U); + fmul(c, t00, b, tmp); + fsquare_times(t00, c, tmp1, (u32)100U); + fmul(t00, t00, c, tmp); + fsquare_times(t00, t00, tmp1, (u32)50U); + fmul(t00, t00, b, tmp); + fsquare_times(t00, t00, tmp1, (u32)5U); + a = t1; + t0 = t1 + (u32)12U; + fmul(o, t0, a, tmp); +} + +static void store_felem(u64 *b, u64 *f) +{ + u64 f30 = f[3U]; + u64 top_bit0 = f30 >> (u32)63U; + u64 f31; + u64 top_bit; + u64 f0; + u64 f1; + u64 f2; + u64 f3; + u64 m0; + u64 m1; + u64 m2; + u64 m3; + u64 mask; + u64 f0_; + u64 f1_; + u64 f2_; + u64 f3_; + u64 o0; + u64 o1; + u64 o2; + u64 o3; + f[3U] = f30 & (u64)0x7fffffffffffffffU; + add_scalar(f, f, (u64)19U * top_bit0); + f31 = f[3U]; + top_bit = f31 >> (u32)63U; + f[3U] = f31 & (u64)0x7fffffffffffffffU; + add_scalar(f, f, (u64)19U * top_bit); + f0 = f[0U]; + f1 = f[1U]; + f2 = f[2U]; + f3 = f[3U]; + m0 = gte_mask(f0, (u64)0xffffffffffffffedU); + m1 = eq_mask(f1, (u64)0xffffffffffffffffU); + m2 = eq_mask(f2, (u64)0xffffffffffffffffU); + m3 = eq_mask(f3, (u64)0x7fffffffffffffffU); + mask = ((m0 & m1) & m2) & m3; + f0_ = f0 - (mask & (u64)0xffffffffffffffedU); + f1_ = f1 - (mask & (u64)0xffffffffffffffffU); + f2_ = f2 - (mask & (u64)0xffffffffffffffffU); + f3_ = f3 - (mask & (u64)0x7fffffffffffffffU); + o0 = f0_; + o1 = f1_; + o2 = f2_; + o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void encode_point(u8 *o, const u64 *i) +{ + const u64 *x = i; + const u64 *z = i + (u32)4U; + u64 tmp[4U] = { 0U }; + u64 tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul(tmp, tmp, x, tmp_w); + store_felem((u64 *)o, tmp); +} + +static void curve25519_ever64(u8 *out, const u8 *priv, const u8 *pub) +{ + u64 init1[8U] = { 0U }; + u64 tmp[4U] = { 0U }; + u64 tmp3; + u64 *x; + u64 *z; + { + u32 i; + for (i = (u32)0U; i < (u32)4U; i = i + (u32)1U) { + u64 *os = tmp; + const u8 *bj = pub + i * (u32)8U; + u64 u = *(u64 *)bj; + u64 r = u; + u64 x0 = r; + os[i] = x0; + } + } + tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (u64)0x7fffffffffffffffU; + x = init1; + z = init1 + (u32)4U; + z[0U] = (u64)1U; + z[1U] = (u64)0U; + z[2U] = (u64)0U; + z[3U] = (u64)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init1, priv, init1); + encode_point(out, init1); +} + +/* The below constants were generated using this sage script: + * + * #!/usr/bin/env sage + * import sys + * from sage.all import * + * def limbs(n): + * n = int(n) + * l = ((n >> 0) % 2^64, (n >> 64) % 2^64, (n >> 128) % 2^64, (n >> 192) % 2^64) + * return "0x%016xULL, 0x%016xULL, 0x%016xULL, 0x%016xULL" % l + * ec = EllipticCurve(GF(2^255 - 19), [0, 486662, 0, 1, 0]) + * p_minus_s = (ec.lift_x(9) - ec.lift_x(1))[0] + * print("static const u64 p_minus_s[] = { %s };\n" % limbs(p_minus_s)) + * print("static const u64 table_ladder[] = {") + * p = ec.lift_x(9) + * for i in range(252): + * l = (p[0] + p[2]) / (p[0] - p[2]) + * print(("\t%s" + ("," if i != 251 else "")) % limbs(l)) + * p = p * 2 + * print("};") + * + */ + +static const u64 p_minus_s[] = { 0x816b1e0137d48290ULL, 0x440f6a51eb4d1207ULL, 0x52385f46dca2b71dULL, 0x215132111d8354cbULL }; + +static const u64 table_ladder[] = { + 0xfffffffffffffff3ULL, 0xffffffffffffffffULL, 0xffffffffffffffffULL, 0x5fffffffffffffffULL, + 0x6b8220f416aafe96ULL, 0x82ebeb2b4f566a34ULL, 0xd5a9a5b075a5950fULL, 0x5142b2cf4b2488f4ULL, + 0x6aaebc750069680cULL, 0x89cf7820a0f99c41ULL, 0x2a58d9183b56d0f4ULL, 0x4b5aca80e36011a4ULL, + 0x329132348c29745dULL, 0xf4a2e616e1642fd7ULL, 0x1e45bb03ff67bc34ULL, 0x306912d0f42a9b4aULL, + 0xff886507e6af7154ULL, 0x04f50e13dfeec82fULL, 0xaa512fe82abab5ceULL, 0x174e251a68d5f222ULL, + 0xcf96700d82028898ULL, 0x1743e3370a2c02c5ULL, 0x379eec98b4e86eaaULL, 0x0c59888a51e0482eULL, + 0xfbcbf1d699b5d189ULL, 0xacaef0d58e9fdc84ULL, 0xc1c20d06231f7614ULL, 0x2938218da274f972ULL, + 0xf6af49beff1d7f18ULL, 0xcc541c22387ac9c2ULL, 0x96fcc9ef4015c56bULL, 0x69c1627c690913a9ULL, + 0x7a86fd2f4733db0eULL, 0xfdb8c4f29e087de9ULL, 0x095e4b1a8ea2a229ULL, 0x1ad7a7c829b37a79ULL, + 0x342d89cad17ea0c0ULL, 0x67bedda6cced2051ULL, 0x19ca31bf2bb42f74ULL, 0x3df7b4c84980acbbULL, + 0xa8c6444dc80ad883ULL, 0xb91e440366e3ab85ULL, 0xc215cda00164f6d8ULL, 0x3d867c6ef247e668ULL, + 0xc7dd582bcc3e658cULL, 0xfd2c4748ee0e5528ULL, 0xa0fd9b95cc9f4f71ULL, 0x7529d871b0675ddfULL, + 0xb8f568b42d3cbd78ULL, 0x1233011b91f3da82ULL, 0x2dce6ccd4a7c3b62ULL, 0x75e7fc8e9e498603ULL, + 0x2f4f13f1fcd0b6ecULL, 0xf1a8ca1f29ff7a45ULL, 0xc249c1a72981e29bULL, 0x6ebe0dbb8c83b56aULL, + 0x7114fa8d170bb222ULL, 0x65a2dcd5bf93935fULL, 0xbdc41f68b59c979aULL, 0x2f0eef79a2ce9289ULL, + 0x42ecbf0c083c37ceULL, 0x2930bc09ec496322ULL, 0xf294b0c19cfeac0dULL, 0x3780aa4bedfabb80ULL, + 0x56c17d3e7cead929ULL, 0xe7cb4beb2e5722c5ULL, 0x0ce931732dbfe15aULL, 0x41b883c7621052f8ULL, + 0xdbf75ca0c3d25350ULL, 0x2936be086eb1e351ULL, 0xc936e03cb4a9b212ULL, 0x1d45bf82322225aaULL, + 0xe81ab1036a024cc5ULL, 0xe212201c304c9a72ULL, 0xc5d73fba6832b1fcULL, 0x20ffdb5a4d839581ULL, + 0xa283d367be5d0fadULL, 0x6c2b25ca8b164475ULL, 0x9d4935467caaf22eULL, 0x5166408eee85ff49ULL, + 0x3c67baa2fab4e361ULL, 0xb3e433c67ef35cefULL, 0x5259729241159b1cULL, 0x6a621892d5b0ab33ULL, + 0x20b74a387555cdcbULL, 0x532aa10e1208923fULL, 0xeaa17b7762281dd1ULL, 0x61ab3443f05c44bfULL, + 0x257a6c422324def8ULL, 0x131c6c1017e3cf7fULL, 0x23758739f630a257ULL, 0x295a407a01a78580ULL, + 0xf8c443246d5da8d9ULL, 0x19d775450c52fa5dULL, 0x2afcfc92731bf83dULL, 0x7d10c8e81b2b4700ULL, + 0xc8e0271f70baa20bULL, 0x993748867ca63957ULL, 0x5412efb3cb7ed4bbULL, 0x3196d36173e62975ULL, + 0xde5bcad141c7dffcULL, 0x47cc8cd2b395c848ULL, 0xa34cd942e11af3cbULL, 0x0256dbf2d04ecec2ULL, + 0x875ab7e94b0e667fULL, 0xcad4dd83c0850d10ULL, 0x47f12e8f4e72c79fULL, 0x5f1a87bb8c85b19bULL, + 0x7ae9d0b6437f51b8ULL, 0x12c7ce5518879065ULL, 0x2ade09fe5cf77aeeULL, 0x23a05a2f7d2c5627ULL, + 0x5908e128f17c169aULL, 0xf77498dd8ad0852dULL, 0x74b4c4ceab102f64ULL, 0x183abadd10139845ULL, + 0xb165ba8daa92aaacULL, 0xd5c5ef9599386705ULL, 0xbe2f8f0cf8fc40d1ULL, 0x2701e635ee204514ULL, + 0x629fa80020156514ULL, 0xf223868764a8c1ceULL, 0x5b894fff0b3f060eULL, 0x60d9944cf708a3faULL, + 0xaeea001a1c7a201fULL, 0xebf16a633ee2ce63ULL, 0x6f7709594c7a07e1ULL, 0x79b958150d0208cbULL, + 0x24b55e5301d410e7ULL, 0xe3a34edff3fdc84dULL, 0xd88768e4904032d8ULL, 0x131384427b3aaeecULL, + 0x8405e51286234f14ULL, 0x14dc4739adb4c529ULL, 0xb8a2b5b250634ffdULL, 0x2fe2a94ad8a7ff93ULL, + 0xec5c57efe843faddULL, 0x2843ce40f0bb9918ULL, 0xa4b561d6cf3d6305ULL, 0x743629bde8fb777eULL, + 0x343edd46bbaf738fULL, 0xed981828b101a651ULL, 0xa401760b882c797aULL, 0x1fc223e28dc88730ULL, + 0x48604e91fc0fba0eULL, 0xb637f78f052c6fa4ULL, 0x91ccac3d09e9239cULL, 0x23f7eed4437a687cULL, + 0x5173b1118d9bd800ULL, 0x29d641b63189d4a7ULL, 0xfdbf177988bbc586ULL, 0x2959894fcad81df5ULL, + 0xaebc8ef3b4bbc899ULL, 0x4148995ab26992b9ULL, 0x24e20b0134f92cfbULL, 0x40d158894a05dee8ULL, + 0x46b00b1185af76f6ULL, 0x26bac77873187a79ULL, 0x3dc0bf95ab8fff5fULL, 0x2a608bd8945524d7ULL, + 0x26449588bd446302ULL, 0x7c4bc21c0388439cULL, 0x8e98a4f383bd11b2ULL, 0x26218d7bc9d876b9ULL, + 0xe3081542997c178aULL, 0x3c2d29a86fb6606fULL, 0x5c217736fa279374ULL, 0x7dde05734afeb1faULL, + 0x3bf10e3906d42babULL, 0xe4f7803e1980649cULL, 0xe6053bf89595bf7aULL, 0x394faf38da245530ULL, + 0x7a8efb58896928f4ULL, 0xfbc778e9cc6a113cULL, 0x72670ce330af596fULL, 0x48f222a81d3d6cf7ULL, + 0xf01fce410d72caa7ULL, 0x5a20ecc7213b5595ULL, 0x7bc21165c1fa1483ULL, 0x07f89ae31da8a741ULL, + 0x05d2c2b4c6830ff9ULL, 0xd43e330fc6316293ULL, 0xa5a5590a96d3a904ULL, 0x705edb91a65333b6ULL, + 0x048ee15e0bb9a5f7ULL, 0x3240cfca9e0aaf5dULL, 0x8f4b71ceedc4a40bULL, 0x621c0da3de544a6dULL, + 0x92872836a08c4091ULL, 0xce8375b010c91445ULL, 0x8a72eb524f276394ULL, 0x2667fcfa7ec83635ULL, + 0x7f4c173345e8752aULL, 0x061b47feee7079a5ULL, 0x25dd9afa9f86ff34ULL, 0x3780cef5425dc89cULL, + 0x1a46035a513bb4e9ULL, 0x3e1ef379ac575adaULL, 0xc78c5f1c5fa24b50ULL, 0x321a967634fd9f22ULL, + 0x946707b8826e27faULL, 0x3dca84d64c506fd0ULL, 0xc189218075e91436ULL, 0x6d9284169b3b8484ULL, + 0x3a67e840383f2ddfULL, 0x33eec9a30c4f9b75ULL, 0x3ec7c86fa783ef47ULL, 0x26ec449fbac9fbc4ULL, + 0x5c0f38cba09b9e7dULL, 0x81168cc762a3478cULL, 0x3e23b0d306fc121cULL, 0x5a238aa0a5efdcddULL, + 0x1ba26121c4ea43ffULL, 0x36f8c77f7c8832b5ULL, 0x88fbea0b0adcf99aULL, 0x5ca9938ec25bebf9ULL, + 0xd5436a5e51fccda0ULL, 0x1dbc4797c2cd893bULL, 0x19346a65d3224a08ULL, 0x0f5034e49b9af466ULL, + 0xf23c3967a1e0b96eULL, 0xe58b08fa867a4d88ULL, 0xfb2fabc6a7341679ULL, 0x2a75381eb6026946ULL, + 0xc80a3be4c19420acULL, 0x66b1f6c681f2b6dcULL, 0x7cf7036761e93388ULL, 0x25abbbd8a660a4c4ULL, + 0x91ea12ba14fd5198ULL, 0x684950fc4a3cffa9ULL, 0xf826842130f5ad28ULL, 0x3ea988f75301a441ULL, + 0xc978109a695f8c6fULL, 0x1746eb4a0530c3f3ULL, 0x444d6d77b4459995ULL, 0x75952b8c054e5cc7ULL, + 0xa3703f7915f4d6aaULL, 0x66c346202f2647d8ULL, 0xd01469df811d644bULL, 0x77fea47d81a5d71fULL, + 0xc5e9529ef57ca381ULL, 0x6eeeb4b9ce2f881aULL, 0xb6e91a28e8009bd6ULL, 0x4b80be3e9afc3fecULL, + 0x7e3773c526aed2c5ULL, 0x1b4afcb453c9a49dULL, 0xa920bdd7baffb24dULL, 0x7c54699f122d400eULL, + 0xef46c8e14fa94bc8ULL, 0xe0b074ce2952ed5eULL, 0xbea450e1dbd885d5ULL, 0x61b68649320f712cULL, + 0x8a485f7309ccbdd1ULL, 0xbd06320d7d4d1a2dULL, 0x25232973322dbef4ULL, 0x445dc4758c17f770ULL, + 0xdb0434177cc8933cULL, 0xed6fe82175ea059fULL, 0x1efebefdc053db34ULL, 0x4adbe867c65daf99ULL, + 0x3acd71a2a90609dfULL, 0xe5e991856dd04050ULL, 0x1ec69b688157c23cULL, 0x697427f6885cfe4dULL, + 0xd7be7b9b65e1a851ULL, 0xa03d28d522c536ddULL, 0x28399d658fd2b645ULL, 0x49e5b7e17c2641e1ULL, + 0x6f8c3a98700457a4ULL, 0x5078f0a25ebb6778ULL, 0xd13c3ccbc382960fULL, 0x2e003258a7df84b1ULL, + 0x8ad1f39be6296a1cULL, 0xc1eeaa652a5fbfb2ULL, 0x33ee0673fd26f3cbULL, 0x59256173a69d2cccULL, + 0x41ea07aa4e18fc41ULL, 0xd9fc19527c87a51eULL, 0xbdaacb805831ca6fULL, 0x445b652dc916694fULL, + 0xce92a3a7f2172315ULL, 0x1edc282de11b9964ULL, 0xa1823aafe04c314aULL, 0x790a2d94437cf586ULL, + 0x71c447fb93f6e009ULL, 0x8922a56722845276ULL, 0xbf70903b204f5169ULL, 0x2f7a89891ba319feULL, + 0x02a08eb577e2140cULL, 0xed9a4ed4427bdcf4ULL, 0x5253ec44e4323cd1ULL, 0x3e88363c14e9355bULL, + 0xaa66c14277110b8cULL, 0x1ae0391610a23390ULL, 0x2030bd12c93fc2a2ULL, 0x3ee141579555c7abULL, + 0x9214de3a6d6e7d41ULL, 0x3ccdd88607f17efeULL, 0x674f1288f8e11217ULL, 0x5682250f329f93d0ULL, + 0x6cf00b136d2e396eULL, 0x6e4cf86f1014debfULL, 0x5930b1b5bfcc4e83ULL, 0x047069b48aba16b6ULL, + 0x0d4ce4ab69b20793ULL, 0xb24db91a97d0fb9eULL, 0xcdfa50f54e00d01dULL, 0x221b1085368bddb5ULL, + 0xe7e59468b1e3d8d2ULL, 0x53c56563bd122f93ULL, 0xeee8a903e0663f09ULL, 0x61efa662cbbe3d42ULL, + 0x2cf8ddddde6eab2aULL, 0x9bf80ad51435f231ULL, 0x5deadacec9f04973ULL, 0x29275b5d41d29b27ULL, + 0xcfde0f0895ebf14fULL, 0xb9aab96b054905a7ULL, 0xcae80dd9a1c420fdULL, 0x0a63bf2f1673bbc7ULL, + 0x092f6e11958fbc8cULL, 0x672a81e804822fadULL, 0xcac8351560d52517ULL, 0x6f3f7722c8f192f8ULL, + 0xf8ba90ccc2e894b7ULL, 0x2c7557a438ff9f0dULL, 0x894d1d855ae52359ULL, 0x68e122157b743d69ULL, + 0xd87e5570cfb919f3ULL, 0x3f2cdecd95798db9ULL, 0x2121154710c0a2ceULL, 0x3c66a115246dc5b2ULL, + 0xcbedc562294ecb72ULL, 0xba7143c36a280b16ULL, 0x9610c2efd4078b67ULL, 0x6144735d946a4b1eULL, + 0x536f111ed75b3350ULL, 0x0211db8c2041d81bULL, 0xf93cb1000e10413cULL, 0x149dfd3c039e8876ULL, + 0xd479dde46b63155bULL, 0xb66e15e93c837976ULL, 0xdafde43b1f13e038ULL, 0x5fafda1a2e4b0b35ULL, + 0x3600bbdf17197581ULL, 0x3972050bbe3cd2c2ULL, 0x5938906dbdd5be86ULL, 0x34fce5e43f9b860fULL, + 0x75a8a4cd42d14d02ULL, 0x828dabc53441df65ULL, 0x33dcabedd2e131d3ULL, 0x3ebad76fb814d25fULL, + 0xd4906f566f70e10fULL, 0x5d12f7aa51690f5aULL, 0x45adb16e76cefcf2ULL, 0x01f768aead232999ULL, + 0x2b6cc77b6248febdULL, 0x3cd30628ec3aaffdULL, 0xce1c0b80d4ef486aULL, 0x4c3bff2ea6f66c23ULL, + 0x3f2ec4094aeaeb5fULL, 0x61b19b286e372ca7ULL, 0x5eefa966de2a701dULL, 0x23b20565de55e3efULL, + 0xe301ca5279d58557ULL, 0x07b2d4ce27c2874fULL, 0xa532cd8a9dcf1d67ULL, 0x2a52fee23f2bff56ULL, + 0x8624efb37cd8663dULL, 0xbbc7ac20ffbd7594ULL, 0x57b85e9c82d37445ULL, 0x7b3052cb86a6ec66ULL, + 0x3482f0ad2525e91eULL, 0x2cb68043d28edca0ULL, 0xaf4f6d052e1b003aULL, 0x185f8c2529781b0aULL, + 0xaa41de5bd80ce0d6ULL, 0x9407b2416853e9d6ULL, 0x563ec36e357f4c3aULL, 0x4cc4b8dd0e297bceULL, + 0xa2fc1a52ffb8730eULL, 0x1811f16e67058e37ULL, 0x10f9a366cddf4ee1ULL, 0x72f4a0c4a0b9f099ULL, + 0x8c16c06f663f4ea7ULL, 0x693b3af74e970fbaULL, 0x2102e7f1d69ec345ULL, 0x0ba53cbc968a8089ULL, + 0xca3d9dc7fea15537ULL, 0x4c6824bb51536493ULL, 0xb9886314844006b1ULL, 0x40d2a72ab454cc60ULL, + 0x5936a1b712570975ULL, 0x91b9d648debda657ULL, 0x3344094bb64330eaULL, 0x006ba10d12ee51d0ULL, + 0x19228468f5de5d58ULL, 0x0eb12f4c38cc05b0ULL, 0xa1039f9dd5601990ULL, 0x4502d4ce4fff0e0bULL, + 0xeb2054106837c189ULL, 0xd0f6544c6dd3b93cULL, 0x40727064c416d74fULL, 0x6e15c6114b502ef0ULL, + 0x4df2a398cfb1a76bULL, 0x11256c7419f2f6b1ULL, 0x4a497962066e6043ULL, 0x705b3aab41355b44ULL, + 0x365ef536d797b1d8ULL, 0x00076bd622ddf0dbULL, 0x3bbf33b0e0575a88ULL, 0x3777aa05c8e4ca4dULL, + 0x392745c85578db5fULL, 0x6fda4149dbae5ae2ULL, 0xb1f0b00b8adc9867ULL, 0x09963437d36f1da3ULL, + 0x7e824e90a5dc3853ULL, 0xccb5f6641f135cbdULL, 0x6736d86c87ce8fccULL, 0x625f3ce26604249fULL, + 0xaf8ac8059502f63fULL, 0x0c05e70a2e351469ULL, 0x35292e9c764b6305ULL, 0x1a394360c7e23ac3ULL, + 0xd5c6d53251183264ULL, 0x62065abd43c2b74fULL, 0xb5fbf5d03b973f9bULL, 0x13a3da3661206e5eULL, + 0xc6bd5837725d94e5ULL, 0x18e30912205016c5ULL, 0x2088ce1570033c68ULL, 0x7fba1f495c837987ULL, + 0x5a8c7423f2f9079dULL, 0x1735157b34023fc5ULL, 0xe4f9b49ad2fab351ULL, 0x6691ff72c878e33cULL, + 0x122c2adedc5eff3eULL, 0xf8dd4bf1d8956cf4ULL, 0xeb86205d9e9e5bdaULL, 0x049b92b9d975c743ULL, + 0xa5379730b0f6c05aULL, 0x72a0ffacc6f3a553ULL, 0xb0032c34b20dcd6dULL, 0x470e9dbc88d5164aULL, + 0xb19cf10ca237c047ULL, 0xb65466711f6c81a2ULL, 0xb3321bd16dd80b43ULL, 0x48c14f600c5fbe8eULL, + 0x66451c264aa6c803ULL, 0xb66e3904a4fa7da6ULL, 0xd45f19b0b3128395ULL, 0x31602627c3c9bc10ULL, + 0x3120dc4832e4e10dULL, 0xeb20c46756c717f7ULL, 0x00f52e3f67280294ULL, 0x566d4fc14730c509ULL, + 0x7e3a5d40fd837206ULL, 0xc1e926dc7159547aULL, 0x216730fba68d6095ULL, 0x22e8c3843f69cea7ULL, + 0x33d074e8930e4b2bULL, 0xb6e4350e84d15816ULL, 0x5534c26ad6ba2365ULL, 0x7773c12f89f1f3f3ULL, + 0x8cba404da57962aaULL, 0x5b9897a81999ce56ULL, 0x508e862f121692fcULL, 0x3a81907fa093c291ULL, + 0x0dded0ff4725a510ULL, 0x10d8cc10673fc503ULL, 0x5b9d151c9f1f4e89ULL, 0x32a5c1d5cb09a44cULL, + 0x1e0aa442b90541fbULL, 0x5f85eb7cc1b485dbULL, 0xbee595ce8a9df2e5ULL, 0x25e496c722422236ULL, + 0x5edf3c46cd0fe5b9ULL, 0x34e75a7ed2a43388ULL, 0xe488de11d761e352ULL, 0x0e878a01a085545cULL, + 0xba493c77e021bb04ULL, 0x2b4d1843c7df899aULL, 0x9ea37a487ae80d67ULL, 0x67a9958011e41794ULL, + 0x4b58051a6697b065ULL, 0x47e33f7d8d6ba6d4ULL, 0xbb4da8d483ca46c1ULL, 0x68becaa181c2db0dULL, + 0x8d8980e90b989aa5ULL, 0xf95eb14a2c93c99bULL, 0x51c6c7c4796e73a2ULL, 0x6e228363b5efb569ULL, + 0xc6bbc0b02dd624c8ULL, 0x777eb47dec8170eeULL, 0x3cde15a004cfafa9ULL, 0x1dc6bc087160bf9bULL, + 0x2e07e043eec34002ULL, 0x18e9fc677a68dc7fULL, 0xd8da03188bd15b9aULL, 0x48fbc3bb00568253ULL, + 0x57547d4cfb654ce1ULL, 0xd3565b82a058e2adULL, 0xf63eaf0bbf154478ULL, 0x47531ef114dfbb18ULL, + 0xe1ec630a4278c587ULL, 0x5507d546ca8e83f3ULL, 0x85e135c63adc0c2bULL, 0x0aa7efa85682844eULL, + 0x72691ba8b3e1f615ULL, 0x32b4e9701fbe3ffaULL, 0x97b6d92e39bb7868ULL, 0x2cfe53dea02e39e8ULL, + 0x687392cd85cd52b0ULL, 0x27ff66c910e29831ULL, 0x97134556a9832d06ULL, 0x269bb0360a84f8a0ULL, + 0x706e55457643f85cULL, 0x3734a48c9b597d1bULL, 0x7aee91e8c6efa472ULL, 0x5cd6abc198a9d9e0ULL, + 0x0e04de06cb3ce41aULL, 0xd8c6eb893402e138ULL, 0x904659bb686e3772ULL, 0x7215c371746ba8c8ULL, + 0xfd12a97eeae4a2d9ULL, 0x9514b7516394f2c5ULL, 0x266fd5809208f294ULL, 0x5c847085619a26b9ULL, + 0x52985410fed694eaULL, 0x3c905b934a2ed254ULL, 0x10bb47692d3be467ULL, 0x063b3d2d69e5e9e1ULL, + 0x472726eedda57debULL, 0xefb6c4ae10f41891ULL, 0x2b1641917b307614ULL, 0x117c554fc4f45b7cULL, + 0xc07cf3118f9d8812ULL, 0x01dbd82050017939ULL, 0xd7e803f4171b2827ULL, 0x1015e87487d225eaULL, + 0xc58de3fed23acc4dULL, 0x50db91c294a7be2dULL, 0x0b94d43d1c9cf457ULL, 0x6b1640fa6e37524aULL, + 0x692f346c5fda0d09ULL, 0x200b1c59fa4d3151ULL, 0xb8c46f760777a296ULL, 0x4b38395f3ffdfbcfULL, + 0x18d25e00be54d671ULL, 0x60d50582bec8aba6ULL, 0x87ad8f263b78b982ULL, 0x50fdf64e9cda0432ULL, + 0x90f567aac578dcf0ULL, 0xef1e9b0ef2a3133bULL, 0x0eebba9242d9de71ULL, 0x15473c9bf03101c7ULL, + 0x7c77e8ae56b78095ULL, 0xb678e7666e6f078eULL, 0x2da0b9615348ba1fULL, 0x7cf931c1ff733f0bULL, + 0x26b357f50a0a366cULL, 0xe9708cf42b87d732ULL, 0xc13aeea5f91cb2c0ULL, 0x35d90c991143bb4cULL, + 0x47c1c404a9a0d9dcULL, 0x659e58451972d251ULL, 0x3875a8c473b38c31ULL, 0x1fbd9ed379561f24ULL, + 0x11fabc6fd41ec28dULL, 0x7ef8dfe3cd2a2dcaULL, 0x72e73b5d8c404595ULL, 0x6135fa4954b72f27ULL, + 0xccfc32a2de24b69cULL, 0x3f55698c1f095d88ULL, 0xbe3350ed5ac3f929ULL, 0x5e9bf806ca477eebULL, + 0xe9ce8fb63c309f68ULL, 0x5376f63565e1f9f4ULL, 0xd1afcfb35a6393f1ULL, 0x6632a1ede5623506ULL, + 0x0b7d6c390c2ded4cULL, 0x56cb3281df04cb1fULL, 0x66305a1249ecc3c7ULL, 0x5d588b60a38ca72aULL, + 0xa6ecbf78e8e5f42dULL, 0x86eeb44b3c8a3eecULL, 0xec219c48fbd21604ULL, 0x1aaf1af517c36731ULL, + 0xc306a2836769bde7ULL, 0x208280622b1e2adbULL, 0x8027f51ffbff94a6ULL, 0x76cfa1ce1124f26bULL, + 0x18eb00562422abb6ULL, 0xf377c4d58f8c29c3ULL, 0x4dbbc207f531561aULL, 0x0253b7f082128a27ULL, + 0x3d1f091cb62c17e0ULL, 0x4860e1abd64628a9ULL, 0x52d17436309d4253ULL, 0x356f97e13efae576ULL, + 0xd351e11aa150535bULL, 0x3e6b45bb1dd878ccULL, 0x0c776128bed92c98ULL, 0x1d34ae93032885b8ULL, + 0x4ba0488ca85ba4c3ULL, 0x985348c33c9ce6ceULL, 0x66124c6f97bda770ULL, 0x0f81a0290654124aULL, + 0x9ed09ca6569b86fdULL, 0x811009fd18af9a2dULL, 0xff08d03f93d8c20aULL, 0x52a148199faef26bULL, + 0x3e03f9dc2d8d1b73ULL, 0x4205801873961a70ULL, 0xc0d987f041a35970ULL, 0x07aa1f15a1c0d549ULL, + 0xdfd46ce08cd27224ULL, 0x6d0a024f934e4239ULL, 0x808a7a6399897b59ULL, 0x0a4556e9e13d95a2ULL, + 0xd21a991fe9c13045ULL, 0x9b0e8548fe7751b8ULL, 0x5da643cb4bf30035ULL, 0x77db28d63940f721ULL, + 0xfc5eeb614adc9011ULL, 0x5229419ae8c411ebULL, 0x9ec3e7787d1dcf74ULL, 0x340d053e216e4cb5ULL, + 0xcac7af39b48df2b4ULL, 0xc0faec2871a10a94ULL, 0x140a69245ca575edULL, 0x0cf1c37134273a4cULL, + 0xc8ee306ac224b8a5ULL, 0x57eaee7ccb4930b0ULL, 0xa1e806bdaacbe74fULL, 0x7d9a62742eeb657dULL, + 0x9eb6b6ef546c4830ULL, 0x885cca1fddb36e2eULL, 0xe6b9f383ef0d7105ULL, 0x58654fef9d2e0412ULL, + 0xa905c4ffbe0e8e26ULL, 0x942de5df9b31816eULL, 0x497d723f802e88e1ULL, 0x30684dea602f408dULL, + 0x21e5a278a3e6cb34ULL, 0xaefb6e6f5b151dc4ULL, 0xb30b8e049d77ca15ULL, 0x28c3c9cf53b98981ULL, + 0x287fb721556cdd2aULL, 0x0d317ca897022274ULL, 0x7468c7423a543258ULL, 0x4a7f11464eb5642fULL, + 0xa237a4774d193aa6ULL, 0xd865986ea92129a1ULL, 0x24c515ecf87c1a88ULL, 0x604003575f39f5ebULL, + 0x47b9f189570a9b27ULL, 0x2b98cede465e4b78ULL, 0x026df551dbb85c20ULL, 0x74fcd91047e21901ULL, + 0x13e2a90a23c1bfa3ULL, 0x0cb0074e478519f6ULL, 0x5ff1cbbe3af6cf44ULL, 0x67fe5438be812dbeULL, + 0xd13cf64fa40f05b0ULL, 0x054dfb2f32283787ULL, 0x4173915b7f0d2aeaULL, 0x482f144f1f610d4eULL, + 0xf6210201b47f8234ULL, 0x5d0ae1929e70b990ULL, 0xdcd7f455b049567cULL, 0x7e93d0f1f0916f01ULL, + 0xdd79cbf18a7db4faULL, 0xbe8391bf6f74c62fULL, 0x027145d14b8291bdULL, 0x585a73ea2cbf1705ULL, + 0x485ca03e928a0db2ULL, 0x10fc01a5742857e7ULL, 0x2f482edbd6d551a7ULL, 0x0f0433b5048fdb8aULL, + 0x60da2e8dd7dc6247ULL, 0x88b4c9d38cd4819aULL, 0x13033ac001f66697ULL, 0x273b24fe3b367d75ULL, + 0xc6e8f66a31b3b9d4ULL, 0x281514a494df49d5ULL, 0xd1726fdfc8b23da7ULL, 0x4b3ae7d103dee548ULL, + 0xc6256e19ce4b9d7eULL, 0xff5c5cf186e3c61cULL, 0xacc63ca34b8ec145ULL, 0x74621888fee66574ULL, + 0x956f409645290a1eULL, 0xef0bf8e3263a962eULL, 0xed6a50eb5ec2647bULL, 0x0694283a9dca7502ULL, + 0x769b963643a2dcd1ULL, 0x42b7c8ea09fc5353ULL, 0x4f002aee13397eabULL, 0x63005e2c19b7d63aULL, + 0xca6736da63023beaULL, 0x966c7f6db12a99b7ULL, 0xace09390c537c5e1ULL, 0x0b696063a1aa89eeULL, + 0xebb03e97288c56e5ULL, 0x432a9f9f938c8be8ULL, 0xa6a5a93d5b717f71ULL, 0x1a5fb4c3e18f9d97ULL, + 0x1c94e7ad1c60cdceULL, 0xee202a43fc02c4a0ULL, 0x8dafe4d867c46a20ULL, 0x0a10263c8ac27b58ULL, + 0xd0dea9dfe4432a4aULL, 0x856af87bbe9277c5ULL, 0xce8472acc212c71aULL, 0x6f151b6d9bbb1e91ULL, + 0x26776c527ceed56aULL, 0x7d211cb7fbf8faecULL, 0x37ae66a6fd4609ccULL, 0x1f81b702d2770c42ULL, + 0x2fb0b057eac58392ULL, 0xe1dd89fe29744e9dULL, 0xc964f8eb17beb4f8ULL, 0x29571073c9a2d41eULL, + 0xa948a18981c0e254ULL, 0x2df6369b65b22830ULL, 0xa33eb2d75fcfd3c6ULL, 0x078cd6ec4199a01fULL, + 0x4a584a41ad900d2fULL, 0x32142b78e2c74c52ULL, 0x68c4e8338431c978ULL, 0x7f69ea9008689fc2ULL, + 0x52f2c81e46a38265ULL, 0xfd78072d04a832fdULL, 0x8cd7d5fa25359e94ULL, 0x4de71b7454cc29d2ULL, + 0x42eb60ad1eda6ac9ULL, 0x0aad37dfdbc09c3aULL, 0x81004b71e33cc191ULL, 0x44e6be345122803cULL, + 0x03fe8388ba1920dbULL, 0xf5d57c32150db008ULL, 0x49c8c4281af60c29ULL, 0x21edb518de701aeeULL, + 0x7fb63e418f06dc99ULL, 0xa4460d99c166d7b8ULL, 0x24dd5248ce520a83ULL, 0x5ec3ad712b928358ULL, + 0x15022a5fbd17930fULL, 0xa4f64a77d82570e3ULL, 0x12bc8d6915783712ULL, 0x498194c0fc620abbULL, + 0x38a2d9d255686c82ULL, 0x785c6bd9193e21f0ULL, 0xe4d5c81ab24a5484ULL, 0x56307860b2e20989ULL, + 0x429d55f78b4d74c4ULL, 0x22f1834643350131ULL, 0x1e60c24598c71fffULL, 0x59f2f014979983efULL, + 0x46a47d56eb494a44ULL, 0x3e22a854d636a18eULL, 0xb346e15274491c3bULL, 0x2ceafd4e5390cde7ULL, + 0xba8a8538be0d6675ULL, 0x4b9074bb50818e23ULL, 0xcbdab89085d304c3ULL, 0x61a24fe0e56192c4ULL, + 0xcb7615e6db525bcbULL, 0xdd7d8c35a567e4caULL, 0xe6b4153acafcdd69ULL, 0x2d668e097f3c9766ULL, + 0xa57e7e265ce55ef0ULL, 0x5d9f4e527cd4b967ULL, 0xfbc83606492fd1e5ULL, 0x090d52beb7c3f7aeULL, + 0x09b9515a1e7b4d7cULL, 0x1f266a2599da44c0ULL, 0xa1c49548e2c55504ULL, 0x7ef04287126f15ccULL, + 0xfed1659dbd30ef15ULL, 0x8b4ab9eec4e0277bULL, 0x884d6236a5df3291ULL, 0x1fd96ea6bf5cf788ULL, + 0x42a161981f190d9aULL, 0x61d849507e6052c1ULL, 0x9fe113bf285a2cd5ULL, 0x7c22d676dbad85d8ULL, + 0x82e770ed2bfbd27dULL, 0x4c05b2ece996f5a5ULL, 0xcd40a9c2b0900150ULL, 0x5895319213d9bf64ULL, + 0xe7cc5d703fea2e08ULL, 0xb50c491258e2188cULL, 0xcce30baa48205bf0ULL, 0x537c659ccfa32d62ULL, + 0x37b6623a98cfc088ULL, 0xfe9bed1fa4d6aca4ULL, 0x04d29b8e56a8d1b0ULL, 0x725f71c40b519575ULL, + 0x28c7f89cd0339ce6ULL, 0x8367b14469ddc18bULL, 0x883ada83a6a1652cULL, 0x585f1974034d6c17ULL, + 0x89cfb266f1b19188ULL, 0xe63b4863e7c35217ULL, 0xd88c9da6b4c0526aULL, 0x3e035c9df0954635ULL, + 0xdd9d5412fb45de9dULL, 0xdd684532e4cff40dULL, 0x4b5c999b151d671cULL, 0x2d8c2cc811e7f690ULL, + 0x7f54be1d90055d40ULL, 0xa464c5df464aaf40ULL, 0x33979624f0e917beULL, 0x2c018dc527356b30ULL, + 0xa5415024e330b3d4ULL, 0x73ff3d96691652d3ULL, 0x94ec42c4ef9b59f1ULL, 0x0747201618d08e5aULL, + 0x4d6ca48aca411c53ULL, 0x66415f2fcfa66119ULL, 0x9c4dd40051e227ffULL, 0x59810bc09a02f7ebULL, + 0x2a7eb171b3dc101dULL, 0x441c5ab99ffef68eULL, 0x32025c9b93b359eaULL, 0x5e8ce0a71e9d112fULL, + 0xbfcccb92429503fdULL, 0xd271ba752f095d55ULL, 0x345ead5e972d091eULL, 0x18c8df11a83103baULL, + 0x90cd949a9aed0f4cULL, 0xc5d1f4cb6660e37eULL, 0xb8cac52d56c52e0bULL, 0x6e42e400c5808e0dULL, + 0xa3b46966eeaefd23ULL, 0x0c4f1f0be39ecdcaULL, 0x189dc8c9d683a51dULL, 0x51f27f054c09351bULL, + 0x4c487ccd2a320682ULL, 0x587ea95bb3df1c96ULL, 0xc8ccf79e555cb8e8ULL, 0x547dc829a206d73dULL, + 0xb822a6cd80c39b06ULL, 0xe96d54732000d4c6ULL, 0x28535b6f91463b4dULL, 0x228f4660e2486e1dULL, + 0x98799538de8d3abfULL, 0x8cd8330045ebca6eULL, 0x79952a008221e738ULL, 0x4322e1a7535cd2bbULL, + 0xb114c11819d1801cULL, 0x2016e4d84f3f5ec7ULL, 0xdd0e2df409260f4cULL, 0x5ec362c0ae5f7266ULL, + 0xc0462b18b8b2b4eeULL, 0x7cc8d950274d1afbULL, 0xf25f7105436b02d2ULL, 0x43bbf8dcbff9ccd3ULL, + 0xb6ad1767a039e9dfULL, 0xb0714da8f69d3583ULL, 0x5e55fa18b42931f5ULL, 0x4ed5558f33c60961ULL, + 0x1fe37901c647a5ddULL, 0x593ddf1f8081d357ULL, 0x0249a4fd813fd7a6ULL, 0x69acca274e9caf61ULL, + 0x047ba3ea330721c9ULL, 0x83423fc20e7e1ea0ULL, 0x1df4c0af01314a60ULL, 0x09a62dab89289527ULL, + 0xa5b325a49cc6cb00ULL, 0xe94b5dc654b56cb6ULL, 0x3be28779adc994a0ULL, 0x4296e8f8ba3a4aadULL, + 0x328689761e451eabULL, 0x2e4d598bff59594aULL, 0x49b96853d7a7084aULL, 0x4980a319601420a8ULL, + 0x9565b9e12f552c42ULL, 0x8a5318db7100fe96ULL, 0x05c90b4d43add0d7ULL, 0x538b4cd66a5d4edaULL, + 0xf4e94fc3e89f039fULL, 0x592c9af26f618045ULL, 0x08a36eb5fd4b9550ULL, 0x25fffaf6c2ed1419ULL, + 0x34434459cc79d354ULL, 0xeeecbfb4b1d5476bULL, 0xddeb34a061615d99ULL, 0x5129cecceb64b773ULL, + 0xee43215894993520ULL, 0x772f9c7cf14c0b3bULL, 0xd2e2fce306bedad5ULL, 0x715f42b546f06a97ULL, + 0x434ecdceda5b5f1aULL, 0x0da17115a49741a9ULL, 0x680bd77c73edad2eULL, 0x487c02354edd9041ULL, + 0xb8efeff3a70ed9c4ULL, 0x56a32aa3e857e302ULL, 0xdf3a68bd48a2a5a0ULL, 0x07f650b73176c444ULL, + 0xe38b9b1626e0ccb1ULL, 0x79e053c18b09fb36ULL, 0x56d90319c9f94964ULL, 0x1ca941e7ac9ff5c4ULL, + 0x49c4df29162fa0bbULL, 0x8488cf3282b33305ULL, 0x95dfda14cabb437dULL, 0x3391f78264d5ad86ULL, + 0x729ae06ae2b5095dULL, 0xd58a58d73259a946ULL, 0xe9834262d13921edULL, 0x27fedafaa54bb592ULL, + 0xa99dc5b829ad48bbULL, 0x5f025742499ee260ULL, 0x802c8ecd5d7513fdULL, 0x78ceb3ef3f6dd938ULL, + 0xc342f44f8a135d94ULL, 0x7b9edb44828cdda3ULL, 0x9436d11a0537cfe7ULL, 0x5064b164ec1ab4c8ULL, + 0x7020eccfd37eb2fcULL, 0x1f31ea3ed90d25fcULL, 0x1b930d7bdfa1bb34ULL, 0x5344467a48113044ULL, + 0x70073170f25e6dfbULL, 0xe385dc1a50114cc8ULL, 0x2348698ac8fc4f00ULL, 0x2a77a55284dd40d8ULL, + 0xfe06afe0c98c6ce4ULL, 0xc235df96dddfd6e4ULL, 0x1428d01e33bf1ed3ULL, 0x785768ec9300bdafULL, + 0x9702e57a91deb63bULL, 0x61bdb8bfe5ce8b80ULL, 0x645b426f3d1d58acULL, 0x4804a82227a557bcULL, + 0x8e57048ab44d2601ULL, 0x68d6501a4b3a6935ULL, 0xc39c9ec3f9e1c293ULL, 0x4172f257d4de63e2ULL, + 0xd368b450330c6401ULL, 0x040d3017418f2391ULL, 0x2c34bb6090b7d90dULL, 0x16f649228fdfd51fULL, + 0xbea6818e2b928ef5ULL, 0xe28ccf91cdc11e72ULL, 0x594aaa68e77a36cdULL, 0x313034806c7ffd0fULL, + 0x8a9d27ac2249bd65ULL, 0x19a3b464018e9512ULL, 0xc26ccff352b37ec7ULL, 0x056f68341d797b21ULL, + 0x5e79d6757efd2327ULL, 0xfabdbcb6553afe15ULL, 0xd3e7222c6eaf5a60ULL, 0x7046c76d4dae743bULL, + 0x660be872b18d4a55ULL, 0x19992518574e1496ULL, 0xc103053a302bdcbbULL, 0x3ed8e9800b218e8eULL, + 0x7b0b9239fa75e03eULL, 0xefe9fb684633c083ULL, 0x98a35fbe391a7793ULL, 0x6065510fe2d0fe34ULL, + 0x55cb668548abad0cULL, 0xb4584548da87e527ULL, 0x2c43ecea0107c1ddULL, 0x526028809372de35ULL, + 0x3415c56af9213b1fULL, 0x5bee1a4d017e98dbULL, 0x13f6b105b5cf709bULL, 0x5ff20e3482b29ab6ULL, + 0x0aa29c75cc2e6c90ULL, 0xfc7d73ca3a70e206ULL, 0x899fc38fc4b5c515ULL, 0x250386b124ffc207ULL, + 0x54ea28d5ae3d2b56ULL, 0x9913149dd6de60ceULL, 0x16694fc58f06d6c1ULL, 0x46b23975eb018fc7ULL, + 0x470a6a0fb4b7b4e2ULL, 0x5d92475a8f7253deULL, 0xabeee5b52fbd3adbULL, 0x7fa20801a0806968ULL, + 0x76f3faf19f7714d2ULL, 0xb3e840c12f4660c3ULL, 0x0fb4cd8df212744eULL, 0x4b065a251d3a2dd2ULL, + 0x5cebde383d77cd4aULL, 0x6adf39df882c9cb1ULL, 0xa2dd242eb09af759ULL, 0x3147c0e50e5f6422ULL, + 0x164ca5101d1350dbULL, 0xf8d13479c33fc962ULL, 0xe640ce4d13e5da08ULL, 0x4bdee0c45061f8baULL, + 0xd7c46dc1a4edb1c9ULL, 0x5514d7b6437fd98aULL, 0x58942f6bb2a1c00bULL, 0x2dffb2ab1d70710eULL, + 0xccdfcf2fc18b6d68ULL, 0xa8ebcba8b7806167ULL, 0x980697f95e2937e3ULL, 0x02fbba1cd0126e8cULL +}; + +static void curve25519_ever64_base(u8 *out, const u8 *priv) +{ + u64 swap = 1; + int i, j, k; + u64 tmp[16 + 32 + 4]; + u64 *x1 = &tmp[0]; + u64 *z1 = &tmp[4]; + u64 *x2 = &tmp[8]; + u64 *z2 = &tmp[12]; + u64 *xz1 = &tmp[0]; + u64 *xz2 = &tmp[8]; + u64 *a = &tmp[0 + 16]; + u64 *b = &tmp[4 + 16]; + u64 *c = &tmp[8 + 16]; + u64 *ab = &tmp[0 + 16]; + u64 *abcd = &tmp[0 + 16]; + u64 *ef = &tmp[16 + 16]; + u64 *efgh = &tmp[16 + 16]; + u64 *key = &tmp[0 + 16 + 32]; + + memcpy(key, priv, 32); + ((u8 *)key)[0] &= 248; + ((u8 *)key)[31] = (((u8 *)key)[31] & 127) | 64; + + x1[0] = 1, x1[1] = x1[2] = x1[3] = 0; + z1[0] = 1, z1[1] = z1[2] = z1[3] = 0; + z2[0] = 1, z2[1] = z2[2] = z2[3] = 0; + memcpy(x2, p_minus_s, sizeof(p_minus_s)); + + j = 3; + for (i = 0; i < 4; ++i) { + while (j < (const int[]){ 64, 64, 64, 63 }[i]) { + u64 bit = (key[i] >> j) & 1; + k = (64 * i + j - 3); + swap = swap ^ bit; + cswap2(swap, xz1, xz2); + swap = bit; + fsub(b, x1, z1); + fadd(a, x1, z1); + fmul(c, &table_ladder[4 * k], b, ef); + fsub(b, a, c); + fadd(a, a, c); + fsqr2(ab, ab, efgh); + fmul2(xz1, xz2, ab, efgh); + ++j; + } + j = 0; + } + + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + point_double(xz1, abcd, efgh); + encode_point(out, xz1); + + memzero_explicit(tmp, sizeof(tmp)); +} + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2_adx); + +static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE], + const u8 basepoint[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64(mypublic, secret, basepoint); + else + curve25519_generic(mypublic, secret, basepoint); +} + +static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], + const u8 secret[CURVE25519_KEY_SIZE]) +{ + if (static_branch_likely(&curve25519_use_bmi2_adx)) + curve25519_ever64_base(pub, secret); + else + curve25519_generic(pub, secret, curve25519_base_point); +} + +#define curve25519_mod_init_arch curve25519_mod_init_arch +static void curve25519_mod_init_arch(void) +{ + if (boot_cpu_has(X86_FEATURE_BMI2) && boot_cpu_has(X86_FEATURE_ADX)) + static_branch_enable(&curve25519_use_bmi2_adx); +} -- cgit v1.2.3 From cb2d6b132a44a140aed3562ef932754d39ddccf3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Sep 2025 14:35:22 -0700 Subject: lib/crypto: tests: Enable Curve25519 test when CRYPTO_SELFTESTS Now that the Curve25519 library has been disentangled from CRYPTO, adding CRYPTO_SELFTESTS as a default value of CRYPTO_LIB_CURVE25519_KUNIT_TEST no longer causes a recursive kconfig dependency. Do this, which makes this option consistent with the other crypto KUnit test options in the same file. Link: https://lore.kernel.org/r/20250906213523.84915-12-ebiggers@kernel.org Signed-off-by: Eric Biggers --- lib/crypto/tests/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crypto/tests/Kconfig b/lib/crypto/tests/Kconfig index eaca60d3e0a3..578af717e13a 100644 --- a/lib/crypto/tests/Kconfig +++ b/lib/crypto/tests/Kconfig @@ -13,7 +13,7 @@ config CRYPTO_LIB_BLAKE2S_KUNIT_TEST config CRYPTO_LIB_CURVE25519_KUNIT_TEST tristate "KUnit tests for Curve25519" if !KUNIT_ALL_TESTS depends on KUNIT - default KUNIT_ALL_TESTS + default KUNIT_ALL_TESTS || CRYPTO_SELFTESTS select CRYPTO_LIB_BENCHMARK_VISIBLE select CRYPTO_LIB_CURVE25519 help -- cgit v1.2.3