summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEric Biggers <ebiggers@kernel.org>2026-03-18 23:17:12 -0700
committerEric Biggers <ebiggers@kernel.org>2026-03-23 16:44:29 -0700
commit73f315c15d6ec1ef33202e7253af90dd44ff4a3b (patch)
tree52c7f7e2e54d594ee7d1d3d96119ec6932f7334b /lib
parent631a84e49e5b16ab83098350c2695d806ad54d23 (diff)
lib/crypto: powerpc/ghash: Migrate optimized code into library
Remove the "p8_ghash" crypto_shash algorithm. Move the corresponding assembly code into lib/crypto/, and wire it up to the GHASH library. This makes the GHASH library be optimized for POWER8. It also greatly reduces the amount of powerpc-specific glue code that is needed, and it fixes the issue where this optimized GHASH code was disabled by default. Note that previously the C code defined the POWER8 GHASH key format as "u128 htable[16]", despite the assembly code only using four entries. Fix the C code to use the correct key format. To fulfill the library API contract, also make the key preparation work in all contexts. Note that the POWER8 assembly code takes the accumulator in GHASH format, but it actually byte-reflects it to get it into POLYVAL format. The library already works with POLYVAL natively. For now, just wire up this existing code by converting it to/from GHASH format in C code. This should be cleaned up to eliminate the unnecessary conversion later. Acked-by: Ard Biesheuvel <ardb@kernel.org> Link: https://lore.kernel.org/r/20260319061723.1140720-12-ebiggers@kernel.org Signed-off-by: Eric Biggers <ebiggers@kernel.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/crypto/Kconfig1
-rw-r--r--lib/crypto/Makefile25
-rw-r--r--lib/crypto/powerpc/.gitignore1
-rw-r--r--lib/crypto/powerpc/gf128hash.h109
-rw-r--r--lib/crypto/powerpc/ghashp8-ppc.pl244
5 files changed, 375 insertions, 5 deletions
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 4f1a79883a56..f54add7d9070 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -121,6 +121,7 @@ config CRYPTO_LIB_GF128HASH_ARCH
depends on CRYPTO_LIB_GF128HASH && !UML
default y if ARM && KERNEL_MODE_NEON
default y if ARM64
+ default y if PPC64 && VSX
default y if X86_64
config CRYPTO_LIB_MD5
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 4ce0bac8fd93..8a9084188778 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -8,6 +8,10 @@ quiet_cmd_perlasm = PERLASM $@
quiet_cmd_perlasm_with_args = PERLASM $@
cmd_perlasm_with_args = $(PERL) $(<) void $(@)
+ppc64-perlasm-flavour-y := linux-ppc64
+ppc64-perlasm-flavour-$(CONFIG_PPC64_ELF_ABI_V2) := linux-ppc64-elfv2
+ppc64-perlasm-flavour-$(CONFIG_CPU_LITTLE_ENDIAN) := linux-ppc64le
+
obj-$(CONFIG_KUNIT) += tests/
obj-$(CONFIG_CRYPTO_HASH_INFO) += hash_info.o
@@ -36,11 +40,8 @@ libaes-y += powerpc/aes-spe-core.o \
powerpc/aes-tab-4k.o
else
libaes-y += powerpc/aesp8-ppc.o
-aes-perlasm-flavour-y := linux-ppc64
-aes-perlasm-flavour-$(CONFIG_PPC64_ELF_ABI_V2) := linux-ppc64-elfv2
-aes-perlasm-flavour-$(CONFIG_CPU_LITTLE_ENDIAN) := linux-ppc64le
quiet_cmd_perlasm_aes = PERLASM $@
- cmd_perlasm_aes = $(PERL) $< $(aes-perlasm-flavour-y) $@
+ cmd_perlasm_aes = $(PERL) $< $(ppc64-perlasm-flavour-y) $@
# Use if_changed instead of cmd, in case the flavour changed.
$(obj)/powerpc/aesp8-ppc.S: $(src)/powerpc/aesp8-ppc.pl FORCE
$(call if_changed,perlasm_aes)
@@ -161,9 +162,23 @@ CFLAGS_gf128hash.o += -I$(src)/$(SRCARCH)
libgf128hash-$(CONFIG_ARM) += arm/ghash-neon-core.o
libgf128hash-$(CONFIG_ARM64) += arm64/ghash-neon-core.o \
arm64/polyval-ce-core.o
-libgf128hash-$(CONFIG_X86) += x86/polyval-pclmul-avx.o
+
+ifeq ($(CONFIG_PPC),y)
+libgf128hash-y += powerpc/ghashp8-ppc.o
+quiet_cmd_perlasm_ghash = PERLASM $@
+ cmd_perlasm_ghash = $(PERL) $< $(ppc64-perlasm-flavour-y) $@
+$(obj)/powerpc/ghashp8-ppc.S: $(src)/powerpc/ghashp8-ppc.pl FORCE
+ $(call if_changed,perlasm_ghash)
+targets += powerpc/ghashp8-ppc.S
+OBJECT_FILES_NON_STANDARD_powerpc/ghashp8-ppc.o := y
endif
+libgf128hash-$(CONFIG_X86) += x86/polyval-pclmul-avx.o
+endif # CONFIG_CRYPTO_LIB_GF128HASH_ARCH
+
+# clean-files must be defined unconditionally
+clean-files += powerpc/ghashp8-ppc.S
+
################################################################################
obj-$(CONFIG_CRYPTO_LIB_MD5) += libmd5.o
diff --git a/lib/crypto/powerpc/.gitignore b/lib/crypto/powerpc/.gitignore
index 598ca7aff6b1..7aa71d83f739 100644
--- a/lib/crypto/powerpc/.gitignore
+++ b/lib/crypto/powerpc/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
aesp8-ppc.S
+ghashp8-ppc.S
diff --git a/lib/crypto/powerpc/gf128hash.h b/lib/crypto/powerpc/gf128hash.h
new file mode 100644
index 000000000000..629cd325d0c7
--- /dev/null
+++ b/lib/crypto/powerpc/gf128hash.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GHASH routines supporting VMX instructions on the Power 8
+ *
+ * Copyright (C) 2015, 2019 International Business Machines Inc.
+ * Copyright (C) 2014 - 2018 Linaro Ltd.
+ * Copyright 2026 Google LLC
+ */
+
+#include <asm/simd.h>
+#include <asm/switch_to.h>
+#include <linux/cpufeature.h>
+#include <linux/jump_label.h>
+#include <linux/preempt.h>
+#include <linux/uaccess.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_vec_crypto);
+
+void gcm_init_p8(u64 htable[4][2], const u8 h[16]);
+void gcm_gmult_p8(u8 Xi[16], const u64 htable[4][2]);
+void gcm_ghash_p8(u8 Xi[16], const u64 htable[4][2], const u8 *in, size_t len);
+
+#define ghash_preparekey_arch ghash_preparekey_arch
+static void ghash_preparekey_arch(struct ghash_key *key,
+ const u8 raw_key[GHASH_BLOCK_SIZE])
+{
+ ghash_key_to_polyval(raw_key, &key->h);
+
+ if (static_branch_likely(&have_vec_crypto) && likely(may_use_simd())) {
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ gcm_init_p8(key->htable, raw_key);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+ } else {
+ /* This reproduces gcm_init_p8() on both LE and BE systems. */
+ key->htable[0][0] = 0;
+ key->htable[0][1] = 0xc200000000000000;
+
+ key->htable[1][0] = 0;
+ key->htable[1][1] = le64_to_cpu(key->h.lo);
+
+ key->htable[2][0] = le64_to_cpu(key->h.lo);
+ key->htable[2][1] = le64_to_cpu(key->h.hi);
+
+ key->htable[3][0] = le64_to_cpu(key->h.hi);
+ key->htable[3][1] = 0;
+ }
+}
+
+#define ghash_mul_arch ghash_mul_arch
+static void ghash_mul_arch(struct polyval_elem *acc,
+ const struct ghash_key *key)
+{
+ if (static_branch_likely(&have_vec_crypto) && likely(may_use_simd())) {
+ u8 ghash_acc[GHASH_BLOCK_SIZE];
+
+ polyval_acc_to_ghash(acc, ghash_acc);
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ gcm_gmult_p8(ghash_acc, key->htable);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ghash_acc_to_polyval(ghash_acc, acc);
+ memzero_explicit(ghash_acc, sizeof(ghash_acc));
+ } else {
+ polyval_mul_generic(acc, &key->h);
+ }
+}
+
+#define ghash_blocks_arch ghash_blocks_arch
+static void ghash_blocks_arch(struct polyval_elem *acc,
+ const struct ghash_key *key,
+ const u8 *data, size_t nblocks)
+{
+ if (static_branch_likely(&have_vec_crypto) && likely(may_use_simd())) {
+ u8 ghash_acc[GHASH_BLOCK_SIZE];
+
+ polyval_acc_to_ghash(acc, ghash_acc);
+
+ preempt_disable();
+ pagefault_disable();
+ enable_kernel_vsx();
+ gcm_ghash_p8(ghash_acc, key->htable, data,
+ nblocks * GHASH_BLOCK_SIZE);
+ disable_kernel_vsx();
+ pagefault_enable();
+ preempt_enable();
+
+ ghash_acc_to_polyval(ghash_acc, acc);
+ memzero_explicit(ghash_acc, sizeof(ghash_acc));
+ } else {
+ ghash_blocks_generic(acc, &key->h, data, nblocks);
+ }
+}
+
+#define gf128hash_mod_init_arch gf128hash_mod_init_arch
+static void gf128hash_mod_init_arch(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
+ (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO))
+ static_branch_enable(&have_vec_crypto);
+}
diff --git a/lib/crypto/powerpc/ghashp8-ppc.pl b/lib/crypto/powerpc/ghashp8-ppc.pl
new file mode 100644
index 000000000000..7c38eedc02cc
--- /dev/null
+++ b/lib/crypto/powerpc/ghashp8-ppc.pl
@@ -0,0 +1,244 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
+
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# GHASH for PowerISA v2.07.
+#
+# July 2014
+#
+# Accurate performance measurements are problematic, because it's
+# always virtualized setup with possibly throttled processor.
+# Relative comparison is therefore more informative. This initial
+# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
+# faster than "4-bit" integer-only compiler-generated 64-bit code.
+# "Initial version" means that there is room for futher improvement.
+
+$flavour=shift;
+$output =shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
+ $STU="stdu";
+ $POP="ld";
+ $PUSH="std";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $LRSAVE=$SIZE_T;
+ $STU="stwu";
+ $POP="lwz";
+ $PUSH="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+( $xlate="${dir}../../../arch/powerpc/crypto/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
+
+my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
+
+my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
+my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
+my $vrsave="r12";
+
+$code=<<___;
+.machine "any"
+
+.text
+
+.globl .gcm_init_p8
+ lis r0,0xfff0
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $H,0,r4 # load H
+ le?xor r7,r7,r7
+ le?addi r7,r7,0x8 # need a vperm start with 08
+ le?lvsr 5,0,r7
+ le?vspltisb 6,0x0f
+ le?vxor 5,5,6 # set a b-endian mask
+ le?vperm $H,$H,$H,5
+
+ vspltisb $xC2,-16 # 0xf0
+ vspltisb $t0,1 # one
+ vaddubm $xC2,$xC2,$xC2 # 0xe0
+ vxor $zero,$zero,$zero
+ vor $xC2,$xC2,$t0 # 0xe1
+ vsldoi $xC2,$xC2,$zero,15 # 0xe1...
+ vsldoi $t1,$zero,$t0,1 # ...1
+ vaddubm $xC2,$xC2,$xC2 # 0xc2...
+ vspltisb $t2,7
+ vor $xC2,$xC2,$t1 # 0xc2....01
+ vspltb $t1,$H,0 # most significant byte
+ vsl $H,$H,$t0 # H<<=1
+ vsrab $t1,$t1,$t2 # broadcast carry bit
+ vand $t1,$t1,$xC2
+ vxor $H,$H,$t1 # twisted H
+
+ vsldoi $H,$H,$H,8 # twist even more ...
+ vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
+ vsldoi $Hl,$zero,$H,8 # ... and split
+ vsldoi $Hh,$H,$zero,8
+
+ stvx_u $xC2,0,r3 # save pre-computed table
+ stvx_u $Hl,r8,r3
+ stvx_u $H, r9,r3
+ stvx_u $Hh,r10,r3
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_init_p8,.-.gcm_init_p8
+
+.globl .gcm_gmult_p8
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $IN,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $zero,$zero,$zero
+
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ vxor $t1,$t1,$Xh
+ vxor $Xl,$Xl,$t1
+
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,2,0
+ .long 0
+.size .gcm_gmult_p8,.-.gcm_gmult_p8
+
+.globl .gcm_ghash_p8
+ lis r0,0xfff8
+ li r8,0x10
+ mfspr $vrsave,256
+ li r9,0x20
+ mtspr 256,r0
+ li r10,0x30
+ lvx_u $Xl,0,$Xip # load Xi
+
+ lvx_u $Hl,r8,$Htbl # load pre-computed table
+ le?lvsl $lemask,r0,r0
+ lvx_u $H, r9,$Htbl
+ le?vspltisb $t0,0x07
+ lvx_u $Hh,r10,$Htbl
+ le?vxor $lemask,$lemask,$t0
+ lvx_u $xC2,0,$Htbl
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ vxor $zero,$zero,$zero
+
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+ subi $len,$len,16
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $IN,$IN,$Xl
+ b Loop
+
+.align 5
+Loop:
+ subic $len,$len,16
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ subfe. r0,r0,r0 # borrow?-1:0
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ and r0,r0,$len
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ add $inp,$inp,r0
+
+ vpmsumd $t2,$Xl,$xC2 # 1st phase
+
+ vsldoi $t0,$Xm,$zero,8
+ vsldoi $t1,$zero,$Xm,8
+ vxor $Xl,$Xl,$t0
+ vxor $Xh,$Xh,$t1
+
+ vsldoi $Xl,$Xl,$Xl,8
+ vxor $Xl,$Xl,$t2
+ lvx_u $IN,0,$inp
+ addi $inp,$inp,16
+
+ vsldoi $t1,$Xl,$Xl,8 # 2nd phase
+ vpmsumd $Xl,$Xl,$xC2
+ le?vperm $IN,$IN,$IN,$lemask
+ vxor $t1,$t1,$Xh
+ vxor $IN,$IN,$t1
+ vxor $IN,$IN,$Xl
+ beq Loop # did $len-=16 borrow?
+
+ vxor $Xl,$Xl,$t1
+ le?vperm $Xl,$Xl,$Xl,$lemask
+ stvx_u $Xl,0,$Xip # write out Xi
+
+ mtspr 256,$vrsave
+ blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,4,0
+ .long 0
+.size .gcm_ghash_p8,.-.gcm_ghash_p8
+
+.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ if ($flavour =~ /le$/o) { # little-endian
+ s/le\?//o or
+ s/be\?/#be#/o;
+ } else {
+ s/le\?/#le#/o or
+ s/be\?//o;
+ }
+ print $_,"\n";
+}
+
+close STDOUT; # enforce flush