diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-05-26 13:47:28 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-05-26 13:47:28 -0700 |
| commit | 14418ddcc2c2055743ac7ee53d5ac2cf8a8660a7 (patch) | |
| tree | 4edb44583a986dce31f537ff3d55032118b2db3e /arch/arm/lib/crypto/chacha-glue.c | |
| parent | 15d90a5e5524532b7456a24f4626cf28c1629c4c (diff) | |
| parent | 2297554f01df6d3d4e98a3915c183ce3e491740a (diff) | |
Merge tag 'v6.16-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"API:
- Fix memcpy_sglist to handle partially overlapping SG lists
- Use memcpy_sglist to replace null skcipher
- Rename CRYPTO_TESTS to CRYPTO_BENCHMARK
- Flip CRYPTO_MANAGER_DISABLE_TEST into CRYPTO_SELFTESTS
- Hide CRYPTO_MANAGER
- Add delayed freeing of driver crypto_alg structures
Compression:
- Allocate large buffers on first use instead of initialisation in scomp
- Drop destination linearisation buffer in scomp
- Move scomp stream allocation into acomp
- Add acomp scatter-gather walker
- Remove request chaining
- Add optional async request allocation
Hashing:
- Remove request chaining
- Add optional async request allocation
- Move partial block handling into API
- Add ahash support to hmac
- Fix shash documentation to disallow usage in hard IRQs
Algorithms:
- Remove unnecessary SIMD fallback code on x86 and arm/arm64
- Drop avx10_256 xts(aes)/ctr(aes) on x86
- Improve avx-512 optimisations for xts(aes)
- Move chacha arch implementations into lib/crypto
- Move poly1305 into lib/crypto and drop unused Crypto API algorithm
- Disable powerpc/poly1305 as it has no SIMD fallback
- Move sha256 arch implementations into lib/crypto
- Convert deflate to acomp
- Set block size correctly in cbcmac
Drivers:
- Do not use sg_dma_len before mapping in sun8i-ss
- Fix warm-reboot failure by making shutdown do more work in qat
- Add locking in zynqmp-sha
- Remove cavium/zip
- Add support for PCI device 0x17D8 to ccp
- Add qat_6xxx support in qat
- Add support for RK3576 in rockchip-rng
- Add support for i.MX8QM in caam
Others:
- Fix irq_fpu_usable/kernel_fpu_begin inconsistency during CPU bring-up
- Add new SEV/SNP platform shutdown API in ccp"
* tag 'v6.16-p1' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (382 commits)
x86/fpu: Fix irq_fpu_usable() to return false during CPU onlining
crypto: qat - add missing header inclusion
crypto: api - Redo lookup on EEXIST
Revert "crypto: testmgr - Add hash export format testing"
crypto: marvell/cesa - Do not chain submitted requests
crypto: powerpc/poly1305 - add depends on BROKEN for now
Revert "crypto: powerpc/poly1305 - Add SIMD fallback"
crypto: ccp - Add missing tee info reg for teev2
crypto: ccp - Add missing bootloader info reg for pspv5
crypto: sun8i-ce - move fallback ahash_request to the end of the struct
crypto: octeontx2 - Use dynamic allocated memory region for lmtst
crypto: octeontx2 - Initialize cptlfs device info once
crypto: xts - Only add ecb if it is not already there
crypto: lrw - Only add ecb if it is not already there
crypto: testmgr - Add hash export format testing
crypto: testmgr - Use ahash for generic tfm
crypto: hmac - Add ahash support
crypto: testmgr - Ignore EEXIST on shash allocation
crypto: algapi - Add driver template support to crypto_inst_setname
crypto: shash - Set reqsize in shash_alg
...
Diffstat (limited to 'arch/arm/lib/crypto/chacha-glue.c')
| -rw-r--r-- | arch/arm/lib/crypto/chacha-glue.c | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/arch/arm/lib/crypto/chacha-glue.c b/arch/arm/lib/crypto/chacha-glue.c new file mode 100644 index 000000000000..88ec96415283 --- /dev/null +++ b/arch/arm/lib/crypto/chacha-glue.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ChaCha and HChaCha functions (ARM optimized) + * + * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> + * Copyright (C) 2015 Martin Willi + */ + +#include <crypto/chacha.h> +#include <crypto/internal/simd.h> +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include <asm/cputype.h> +#include <asm/hwcap.h> +#include <asm/neon.h> +#include <asm/simd.h> + +asmlinkage void chacha_block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, int nrounds); +asmlinkage void chacha_4block_xor_neon(const struct chacha_state *state, + u8 *dst, const u8 *src, + int nrounds, unsigned int nbytes); +asmlinkage void hchacha_block_arm(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); +asmlinkage void hchacha_block_neon(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds); + +asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, + const struct chacha_state *state, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); + +static inline bool neon_usable(void) +{ + return static_branch_likely(&use_neon) && crypto_simd_usable(); +} + +static void chacha_doneon(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + u8 buf[CHACHA_BLOCK_SIZE]; + + while (bytes > CHACHA_BLOCK_SIZE) { + unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); + + chacha_4block_xor_neon(state, dst, src, nrounds, l); + bytes -= l; + src += l; + dst += l; + state->x[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); + } + if (bytes) { + const u8 *s = src; + u8 *d = dst; + + if (bytes != CHACHA_BLOCK_SIZE) + s = d = memcpy(buf, src, bytes); + chacha_block_xor_neon(state, d, s, nrounds); + if (d != dst) + memcpy(dst, buf, bytes); + state->x[12]++; + } +} + +void hchacha_block_arch(const struct chacha_state *state, + u32 out[HCHACHA_OUT_WORDS], int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { + hchacha_block_arm(state, out, nrounds); + } else { + kernel_neon_begin(); + hchacha_block_neon(state, out, nrounds); + kernel_neon_end(); + } +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_crypt_arch(struct chacha_state *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || + bytes <= CHACHA_BLOCK_SIZE) { + chacha_doarm(dst, src, bytes, state, nrounds); + state->x[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); + return; + } + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + kernel_neon_begin(); + chacha_doneon(state, dst, src, todo, nrounds); + kernel_neon_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +bool chacha_is_arch_optimized(void) +{ + /* We always can use at least the ARM scalar implementation. */ + return true; +} +EXPORT_SYMBOL(chacha_is_arch_optimized); + +static int __init chacha_arm_mod_init(void) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A7: + case ARM_CPU_PART_CORTEX_A5: + /* + * The Cortex-A7 and Cortex-A5 do not perform well with + * the NEON implementation but do incredibly with the + * scalar one and use less power. + */ + break; + default: + static_branch_enable(&use_neon); + } + } + return 0; +} +subsys_initcall(chacha_arm_mod_init); + +static void __exit chacha_arm_mod_exit(void) +{ +} +module_exit(chacha_arm_mod_exit); + +MODULE_DESCRIPTION("ChaCha and HChaCha functions (ARM optimized)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); |
