From 714656a84697f9615b9488b490c99edb3ecfcd3d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Apr 2025 08:27:08 -0700 Subject: crypto: arm - move library functions to arch/arm/lib/crypto/ Continue disentangling the crypto library functions from the generic crypto infrastructure by moving the arm BLAKE2s, ChaCha, and Poly1305 library functions into a new directory arch/arm/lib/crypto/ that does not depend on CRYPTO. This mirrors the distinction between crypto/ and lib/crypto/. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/chacha-glue.c | 134 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 arch/arm/lib/crypto/chacha-glue.c (limited to 'arch/arm/lib/crypto/chacha-glue.c') diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c new file mode 100644 index 000000000000..12afb40cf1ff --- /dev/null +++ b/arch/arm/lib/crypto/chacha-glue.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ChaCha and HChaCha functions (ARM optimized) + * + * Copyright (C) 2016-2019 Linaro, Ltd. + * Copyright (C) 2015 Martin Willi + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds); +asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, + int nrounds, unsigned int nbytes); +asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); +asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const u32 *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } + if (bytes) { + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); + state[12]++; + } +} + +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, stream, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, stream, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + /* We always can use at least the ARM scalar implementation. */ + return true; +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +static int __init chacha_arm_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + break; + default: + static_branch_enable(&use_neon); + } + } + return 0; +} +arch_initcall(chacha_arm_mod_init); + +static void __exit chacha_arm_mod_exit(void) +{ +} +module_exit(chacha_arm_mod_exit); + +MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From ef93f1562803cd7bb8159e3abedaf7f47dce4e35 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 30 Apr 2025 16:17:02 +0800 Subject: Revert "crypto: run initcalls for generic implementations earlier" This reverts commit c4741b23059794bd99beef0f700103b0d983b3fd. Crypto API self-tests no longer run at registration time and now occur either at late_initcall or upon the first use. Therefore the premise of the above commit no longer exists. Revert it and subsequent additions of subsys_initcall and arch_initcall. Note that lib/crypto calls will stay at subsys_initcall (or rather downgraded from arch_initcall) because they may need to occur before Crypto API registration. Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/chacha-glue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/lib/crypto/chacha-glue.c') diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c index 12afb40cf1ff..1e28736834a0 100644 --- a/arch/arm/lib/crypto/chacha-glue.c +++ b/arch/arm/lib/crypto/chacha-glue.c @@ -122,7 +122,7 @@ static int __init chacha_arm_mod_init(void) } return 0; } -arch_initcall(chacha_arm_mod_init); +subsys_initcall(chacha_arm_mod_init); static void __exit chacha_arm_mod_exit(void) { -- cgit v1.2.3 From 98066f2f8901ccf72f3c5d6c391c8fff1cabd49d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 11:18:21 -0700 Subject: crypto: lib/chacha - strongly type the ChaCha state The ChaCha state matrix is 16 32-bit words. Currently it is represented in the code as a raw u32 array, or even just a pointer to u32. This weak typing is error-prone. Instead, introduce struct chacha_state: struct chacha_state { u32 x[16]; }; Convert all ChaCha and HChaCha functions to use struct chacha_state. No functional changes. Signed-off-by: Eric Biggers Acked-by: Kent Overstreet Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/chacha-glue.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'arch/arm/lib/crypto/chacha-glue.c') diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c index 1e28736834a0..0c2b4c62d484 100644 --- a/arch/arm/lib/crypto/chacha-glue.c +++ b/arch/arm/lib/crypto/chacha-glue.c @@ -17,15 +17,18 @@ #include #include -asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, - int nrounds); -asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds, unsigned int nbytes); -asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); -asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); +asmlinkage void hchacha_block_arm(const struct chacha_state *state, + u32 *out, int nrounds); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 *out, int nrounds); asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, - const u32 *state, int nrounds); + const struct chacha_state *state, int nrounds); static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); @@ -34,7 +37,7 @@ static inline bool neon_usable(void) return static_branch_likely(&use_neon) && crypto_simd_usable(); } -static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, unsigned int bytes, int nrounds) { u8 buf[CHACHA_BLOCK_SIZE]; @@ -46,7 +49,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, bytes -= l; src += l; dst += l; - state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); } if (bytes) { const u8 *s = src; @@ -57,11 +60,12 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, chacha_block_xor_neon(state, d, s, nrounds); if (d != dst) memcpy(dst, buf, bytes); - state[12]++; + state->x[12]++; } } -void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +void hchacha_block_arch(const struct chacha_state *state, u32 *stream, + int nrounds) { if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { hchacha_block_arm(state, stream, nrounds); @@ -73,13 +77,13 @@ void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) } EXPORT_SYMBOL(hchacha_block_arch); -void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, - int nrounds) +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) { if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || bytes <= CHACHA_BLOCK_SIZE) { chacha_doarm(dst, src, bytes, state, nrounds); - state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); return; } -- cgit v1.2.3 From bdc2a55687f123bd32aaefb81e11c7450a431eaf Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 5 May 2025 11:18:24 -0700 Subject: crypto: lib/chacha - add array bounds to function prototypes Add explicit array bounds to the function prototypes for the parameters that didn't already get handled by the conversion to use chacha_state: - chacha_block_*(): Change 'u8 *out' or 'u8 *stream' to u8 out[CHACHA_BLOCK_SIZE]. - hchacha_block_*(): Change 'u32 *out' or 'u32 *stream' to u32 out[HCHACHA_OUT_WORDS]. - chacha_init(): Change 'const u32 *key' to 'const u32 key[CHACHA_KEY_WORDS]'. Change 'const u8 *iv' to 'const u8 iv[CHACHA_IV_SIZE]'. No functional changes. This just makes it clear when fixed-size arrays are expected. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- arch/arm/lib/crypto/chacha-glue.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/arm/lib/crypto/chacha-glue.c') diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c index 0c2b4c62d484..88ec96415283 100644 --- a/arch/arm/lib/crypto/chacha-glue.c +++ b/arch/arm/lib/crypto/chacha-glue.c @@ -23,9 +23,9 @@ asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, u8 *dst, const u8 *src, int nrounds, unsigned int nbytes); asmlinkage void hchacha_block_arm(const struct chacha_state *state, - u32 *out, int nrounds); + u32 out[HCHACHA_OUT_WORDS], int nrounds); asmlinkage void hchacha_block_neon(const struct chacha_state *state, - u32 *out, int nrounds); + u32 out[HCHACHA_OUT_WORDS], int nrounds); asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, const struct chacha_state *state, int nrounds); @@ -64,14 +64,14 @@ static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, } } -void hchacha_block_arch(const struct chacha_state *state, u32 *stream, - int nrounds) +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) { if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { - hchacha_block_arm(state, stream, nrounds); + hchacha_block_arm(state, out, nrounds); } else { kernel_neon_begin(); - hchacha_block_neon(state, stream, nrounds); + hchacha_block_neon(state, out, nrounds); kernel_neon_end(); } } -- cgit v1.2.3