From 4fb623074ea537524d06598acbb5517f027f3b53 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Oct 2025 13:27:08 +0200 Subject: lib/crc: Switch ARM and arm64 to 'ksimd' scoped guard API Before modifying the prototypes of kernel_neon_begin() and kernel_neon_end() to accommodate kernel mode FP/SIMD state buffers allocated on the stack, move arm64 to the new 'ksimd' scoped guard API, which encapsulates the calls to those functions. For symmetry, do the same for 32-bit ARM too. Reviewed-by: Eric Biggers Reviewed-by: Jonathan Cameron Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel --- lib/crc/arm/crc-t10dif.h | 19 ++++++------------- lib/crc/arm/crc32.h | 11 ++++------- lib/crc/arm64/crc-t10dif.h | 19 ++++++------------- lib/crc/arm64/crc32.h | 16 ++++++---------- 4 files changed, 22 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/crc/arm/crc-t10dif.h b/lib/crc/arm/crc-t10dif.h index 63441de5e3f1..afc0ebf97f19 100644 --- a/lib/crc/arm/crc-t10dif.h +++ b/lib/crc/arm/crc-t10dif.h @@ -5,7 +5,6 @@ * Copyright (C) 2016 Linaro Ltd */ -#include #include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); @@ -19,22 +18,16 @@ asmlinkage void crc_t10dif_pmull8(u16 init_crc, const u8 *buf, size_t len, static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { if (static_branch_likely(&have_pmull)) { - if (likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc_t10dif_pmull64(crc, data, length); - kernel_neon_end(); - return crc; - } + scoped_ksimd() + return crc_t10dif_pmull64(crc, data, length); } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && - static_branch_likely(&have_neon) && - likely(may_use_simd())) { + static_branch_likely(&have_neon)) { u8 buf[16] __aligned(16); - kernel_neon_begin(); - crc_t10dif_pmull8(crc, data, length, buf); - kernel_neon_end(); + scoped_ksimd() + crc_t10dif_pmull8(crc, data, length, buf); return crc_t10dif_generic(0, buf, sizeof(buf)); } diff --git a/lib/crc/arm/crc32.h b/lib/crc/arm/crc32.h index 7b76f52f6907..f33de6b22cd4 100644 --- a/lib/crc/arm/crc32.h +++ b/lib/crc/arm/crc32.h @@ -8,7 +8,6 @@ #include #include -#include #include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_crc32); @@ -42,9 +41,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) len -= n; } n = round_down(len, 16); - kernel_neon_begin(); - crc = crc32_pmull_le(p, n, crc); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_pmull_le(p, n, crc); p += n; len -= n; } @@ -71,9 +69,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) len -= n; } n = round_down(len, 16); - kernel_neon_begin(); - crc = crc32c_pmull_le(p, n, crc); - kernel_neon_end(); + scoped_ksimd() + crc = crc32c_pmull_le(p, n, crc); p += n; len -= n; } diff --git a/lib/crc/arm64/crc-t10dif.h b/lib/crc/arm64/crc-t10dif.h index f88db2971805..b8338139ed77 100644 --- a/lib/crc/arm64/crc-t10dif.h +++ b/lib/crc/arm64/crc-t10dif.h @@ -7,7 +7,6 @@ #include -#include #include static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_asimd); @@ -21,22 +20,16 @@ asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len); static inline u16 crc_t10dif_arch(u16 crc, const u8 *data, size_t length) { - if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE) { + if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && likely(may_use_simd())) { if (static_branch_likely(&have_pmull)) { - if (likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc_t10dif_pmull_p64(crc, data, length); - kernel_neon_end(); - return crc; - } + scoped_ksimd() + return crc_t10dif_pmull_p64(crc, data, length); } else if (length > CRC_T10DIF_PMULL_CHUNK_SIZE && - static_branch_likely(&have_asimd) && - likely(may_use_simd())) { + static_branch_likely(&have_asimd)) { u8 buf[16]; - kernel_neon_begin(); - crc_t10dif_pmull_p8(crc, data, length, buf); - kernel_neon_end(); + scoped_ksimd() + crc_t10dif_pmull_p8(crc, data, length, buf); return crc_t10dif_generic(0, buf, sizeof(buf)); } diff --git a/lib/crc/arm64/crc32.h b/lib/crc/arm64/crc32.h index 31e649cd40a2..1939a5dee477 100644 --- a/lib/crc/arm64/crc32.h +++ b/lib/crc/arm64/crc32.h @@ -2,7 +2,6 @@ #include #include -#include #include // The minimum input length to consider the 4-way interleaved code path @@ -23,9 +22,8 @@ static inline u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32_le_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_le_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; @@ -44,9 +42,8 @@ static inline u32 crc32c_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32c_le_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32c_le_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; @@ -65,9 +62,8 @@ static inline u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) if (len >= min_len && cpu_have_named_feature(PMULL) && likely(may_use_simd())) { - kernel_neon_begin(); - crc = crc32_be_arm64_4way(crc, p, len); - kernel_neon_end(); + scoped_ksimd() + crc = crc32_be_arm64_4way(crc, p, len); p += round_down(len, 64); len %= 64; -- cgit v1.2.3 From 3142ec4af2a308f08693d355a786871e84a64ed9 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 1 Oct 2025 12:23:40 +0200 Subject: raid6: Move to more abstract 'ksimd' guard API Move away from calling kernel_neon_begin() and kernel_neon_end() directly, and instead, use the newly introduced scoped_ksimd() API. This permits arm64 to modify the kernel mode NEON API without affecting code that is shared between ARM and arm64. Reviewed-by: Eric Biggers Reviewed-by: Jonathan Cameron Acked-by: Catalin Marinas Signed-off-by: Ard Biesheuvel --- lib/raid6/neon.c | 17 +++++++---------- lib/raid6/recov_neon.c | 15 ++++++--------- 2 files changed, 13 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 0a2e76035ea9..6d9474ce6da9 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -8,10 +8,9 @@ #include #ifdef __KERNEL__ -#include +#include #else -#define kernel_neon_begin() -#define kernel_neon_end() +#define scoped_ksimd() #define cpu_has_neon() (1) #endif @@ -32,10 +31,9 @@ { \ void raid6_neon ## _n ## _gen_syndrome_real(int, \ unsigned long, void**); \ - kernel_neon_begin(); \ - raid6_neon ## _n ## _gen_syndrome_real(disks, \ + scoped_ksimd() \ + raid6_neon ## _n ## _gen_syndrome_real(disks, \ (unsigned long)bytes, ptrs); \ - kernel_neon_end(); \ } \ static void raid6_neon ## _n ## _xor_syndrome(int disks, \ int start, int stop, \ @@ -43,10 +41,9 @@ { \ void raid6_neon ## _n ## _xor_syndrome_real(int, \ int, int, unsigned long, void**); \ - kernel_neon_begin(); \ - raid6_neon ## _n ## _xor_syndrome_real(disks, \ - start, stop, (unsigned long)bytes, ptrs); \ - kernel_neon_end(); \ + scoped_ksimd() \ + raid6_neon ## _n ## _xor_syndrome_real(disks, \ + start, stop, (unsigned long)bytes, ptrs);\ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c index 70e1404c1512..9d99aeabd31a 100644 --- a/lib/raid6/recov_neon.c +++ b/lib/raid6/recov_neon.c @@ -7,11 +7,10 @@ #include #ifdef __KERNEL__ -#include +#include #include "neon.h" #else -#define kernel_neon_begin() -#define kernel_neon_end() +#define scoped_ksimd() #define cpu_has_neon() (1) #endif @@ -55,9 +54,8 @@ static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]]; - kernel_neon_begin(); - __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); - kernel_neon_end(); + scoped_ksimd() + __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); } static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, @@ -86,9 +84,8 @@ static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, /* Now, pick the proper data tables */ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; - kernel_neon_begin(); - __raid6_datap_recov_neon(bytes, p, q, dq, qmul); - kernel_neon_end(); + scoped_ksimd() + __raid6_datap_recov_neon(bytes, p, q, dq, qmul); } const struct raid6_recov_calls raid6_recov_neon = { -- cgit v1.2.3