summaryrefslogtreecommitdiff
path: root/arch/arm64/crypto/aes-modes.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 12:11:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 12:11:14 -0700
commit8b53c76533aa4356602aea98f98a2f3b4051464c (patch)
treeab10ba58e21501407f8108a6bb9003daa2176962 /arch/arm64/crypto/aes-modes.S
parent6cfae0c26b21dce323fe8799b66cf4bc996e3565 (diff)
parent9575d1a5c0780ea26ff8dd29c94a32be32ce3c85 (diff)
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Add the ability to abort a skcipher walk. Algorithms: - Fix XTS to actually do the stealing. - Add library helpers for AES and DES for single-block users. - Add library helpers for SHA256. - Add new DES key verification helper. - Add surrounding bits for ESSIV generator. - Add accelerations for aegis128. - Add test vectors for lzo-rle. Drivers: - Add i.MX8MQ support to caam. - Add gcm/ccm/cfb/ofb aes support in inside-secure. - Add ofb/cfb aes support in media-tek. - Add HiSilicon ZIP accelerator support. Others: - Fix potential race condition in padata. - Use unbound workqueues in padata" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits) crypto: caam - Cast to long first before pointer conversion crypto: ccree - enable CTS support in AES-XTS crypto: inside-secure - Probe transform record cache RAM sizes crypto: inside-secure - Base RD fetchcount on actual RD FIFO size crypto: inside-secure - Base CD fetchcount on actual CD FIFO size crypto: inside-secure - Enable extended algorithms on newer HW crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL crypto: inside-secure - Add EIP97/EIP197 and endianness detection padata: remove cpu_index from the parallel_queue padata: unbind parallel jobs from specific CPUs padata: use separate workqueues for parallel and serial work padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible crypto: pcrypt - remove padata cpumask notifier padata: make padata_do_parallel find alternate callback CPU workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs workqueue: unconfine alloc/apply/free_workqueue_attrs() padata: allocate workqueue internally arm64: dts: imx8mq: Add CAAM node random: Use wait_event_freezable() in add_hwgenerator_randomness() crypto: ux500 - Fix COMPILE_TEST warnings ...
Diffstat (limited to 'arch/arm64/crypto/aes-modes.S')
-rw-r--r--arch/arm64/crypto/aes-modes.S135
1 files changed, 114 insertions, 21 deletions
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 324039b72094..131618389f1f 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -118,8 +118,23 @@ AES_ENDPROC(aes_ecb_decrypt)
* int blocks, u8 iv[])
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
* int blocks, u8 iv[])
+ * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+ * int rounds, int blocks, u8 iv[],
+ * u32 const rk2[]);
+ * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+ * int rounds, int blocks, u8 iv[],
+ * u32 const rk2[]);
*/
+AES_ENTRY(aes_essiv_cbc_encrypt)
+ ld1 {v4.16b}, [x5] /* get iv */
+
+ mov w8, #14 /* AES-256: 14 rounds */
+ enc_prepare w8, x6, x7
+ encrypt_block v4, w8, x6, x7, w9
+ enc_switch_key w3, x2, x6
+ b .Lcbcencloop4x
+
AES_ENTRY(aes_cbc_encrypt)
ld1 {v4.16b}, [x5] /* get iv */
enc_prepare w3, x2, x6
@@ -153,13 +168,25 @@ AES_ENTRY(aes_cbc_encrypt)
st1 {v4.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)
+AES_ENDPROC(aes_essiv_cbc_encrypt)
+AES_ENTRY(aes_essiv_cbc_decrypt)
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+
+ ld1 {cbciv.16b}, [x5] /* get iv */
+
+ mov w8, #14 /* AES-256: 14 rounds */
+ enc_prepare w8, x6, x7
+ encrypt_block cbciv, w8, x6, x7, w9
+ b .Lessivcbcdecstart
AES_ENTRY(aes_cbc_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
ld1 {cbciv.16b}, [x5] /* get iv */
+.Lessivcbcdecstart:
dec_prepare w3, x2, x6
.LcbcdecloopNx:
@@ -212,6 +239,7 @@ ST5( st1 {v4.16b}, [x0], #16 )
ldp x29, x30, [sp], #16
ret
AES_ENDPROC(aes_cbc_decrypt)
+AES_ENDPROC(aes_essiv_cbc_decrypt)
/*
@@ -265,12 +293,11 @@ AES_ENTRY(aes_cbc_cts_decrypt)
ld1 {v5.16b}, [x5] /* get iv */
dec_prepare w3, x2, x6
- tbl v2.16b, {v1.16b}, v4.16b
decrypt_block v0, w3, x2, x6, w7
- eor v2.16b, v2.16b, v0.16b
+ tbl v2.16b, {v0.16b}, v3.16b
+ eor v2.16b, v2.16b, v1.16b
tbx v0.16b, {v1.16b}, v4.16b
- tbl v2.16b, {v2.16b}, v3.16b
decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v5.16b /* xor with iv */
@@ -386,10 +413,10 @@ AES_ENDPROC(aes_ctr_encrypt)
/*
+ * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+ * int bytes, u8 const rk2[], u8 iv[], int first)
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 const rk2[], u8 iv[], int first)
- * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
- * int blocks, u8 const rk2[], u8 iv[], int first)
+ * int bytes, u8 const rk2[], u8 iv[], int first)
*/
.macro next_tweak, out, in, tmp
@@ -415,6 +442,7 @@ AES_ENTRY(aes_xts_encrypt)
cbz w7, .Lxtsencnotfirst
enc_prepare w3, x5, x8
+ xts_cts_skip_tw w7, .LxtsencNx
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
enc_switch_key w3, x2, x8
b .LxtsencNx
@@ -424,7 +452,7 @@ AES_ENTRY(aes_xts_encrypt)
.LxtsencloopNx:
next_tweak v4, v4, v8
.LxtsencNx:
- subs w4, w4, #4
+ subs w4, w4, #64
bmi .Lxtsenc1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
next_tweak v5, v4, v8
@@ -441,39 +469,74 @@ AES_ENTRY(aes_xts_encrypt)
eor v2.16b, v2.16b, v6.16b
st1 {v0.16b-v3.16b}, [x0], #64
mov v4.16b, v7.16b
- cbz w4, .Lxtsencout
+ cbz w4, .Lxtsencret
xts_reload_mask v8
b .LxtsencloopNx
.Lxtsenc1x:
- adds w4, w4, #4
+ adds w4, w4, #64
beq .Lxtsencout
+ subs w4, w4, #16
+ bmi .LxtsencctsNx
.Lxtsencloop:
- ld1 {v1.16b}, [x1], #16
- eor v0.16b, v1.16b, v4.16b
+ ld1 {v0.16b}, [x1], #16
+.Lxtsencctsout:
+ eor v0.16b, v0.16b, v4.16b
encrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
- beq .Lxtsencout
+ cbz w4, .Lxtsencout
+ subs w4, w4, #16
next_tweak v4, v4, v8
+ bmi .Lxtsenccts
+ st1 {v0.16b}, [x0], #16
b .Lxtsencloop
.Lxtsencout:
+ st1 {v0.16b}, [x0]
+.Lxtsencret:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
ret
-AES_ENDPROC(aes_xts_encrypt)
+.LxtsencctsNx:
+ mov v0.16b, v3.16b
+ sub x0, x0, #16
+.Lxtsenccts:
+ adr_l x8, .Lcts_permute_table
+
+ add x1, x1, w4, sxtw /* rewind input pointer */
+ add w4, w4, #16 /* # bytes in final block */
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ add x4, x0, x4 /* output address of final block */
+
+ ld1 {v1.16b}, [x1] /* load final block */
+ ld1 {v2.16b}, [x8]
+ ld1 {v3.16b}, [x9]
+
+ tbl v2.16b, {v0.16b}, v2.16b
+ tbx v0.16b, {v1.16b}, v3.16b
+ st1 {v2.16b}, [x4] /* overlapping stores */
+ mov w4, wzr
+ b .Lxtsencctsout
+AES_ENDPROC(aes_xts_encrypt)
AES_ENTRY(aes_xts_decrypt)
stp x29, x30, [sp, #-16]!
mov x29, sp
+ /* subtract 16 bytes if we are doing CTS */
+ sub w8, w4, #0x10
+ tst w4, #0xf
+ csel w4, w4, w8, eq
+
ld1 {v4.16b}, [x6]
xts_load_mask v8
+ xts_cts_skip_tw w7, .Lxtsdecskiptw
cbz w7, .Lxtsdecnotfirst
enc_prepare w3, x5, x8
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
+.Lxtsdecskiptw:
dec_prepare w3, x2, x8
b .LxtsdecNx
@@ -482,7 +545,7 @@ AES_ENTRY(aes_xts_decrypt)
.LxtsdecloopNx:
next_tweak v4, v4, v8
.LxtsdecNx:
- subs w4, w4, #4
+ subs w4, w4, #64
bmi .Lxtsdec1x
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
next_tweak v5, v4, v8
@@ -503,22 +566,52 @@ AES_ENTRY(aes_xts_decrypt)
xts_reload_mask v8
b .LxtsdecloopNx
.Lxtsdec1x:
- adds w4, w4, #4
+ adds w4, w4, #64
beq .Lxtsdecout
+ subs w4, w4, #16
.Lxtsdecloop:
- ld1 {v1.16b}, [x1], #16
- eor v0.16b, v1.16b, v4.16b
+ ld1 {v0.16b}, [x1], #16
+ bmi .Lxtsdeccts
+.Lxtsdecctsout:
+ eor v0.16b, v0.16b, v4.16b
decrypt_block v0, w3, x2, x8, w7
eor v0.16b, v0.16b, v4.16b
st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
- beq .Lxtsdecout
+ cbz w4, .Lxtsdecout
+ subs w4, w4, #16
next_tweak v4, v4, v8
b .Lxtsdecloop
.Lxtsdecout:
st1 {v4.16b}, [x6]
ldp x29, x30, [sp], #16
ret
+
+.Lxtsdeccts:
+ adr_l x8, .Lcts_permute_table
+
+ add x1, x1, w4, sxtw /* rewind input pointer */
+ add w4, w4, #16 /* # bytes in final block */
+ add x9, x8, #32
+ add x8, x8, x4
+ sub x9, x9, x4
+ add x4, x0, x4 /* output address of final block */
+
+ next_tweak v5, v4, v8
+
+ ld1 {v1.16b}, [x1] /* load final block */
+ ld1 {v2.16b}, [x8]
+ ld1 {v3.16b}, [x9]
+
+ eor v0.16b, v0.16b, v5.16b
+ decrypt_block v0, w3, x2, x8, w7
+ eor v0.16b, v0.16b, v5.16b
+
+ tbl v2.16b, {v0.16b}, v2.16b
+ tbx v0.16b, {v1.16b}, v3.16b
+
+ st1 {v2.16b}, [x4] /* overlapping stores */
+ mov w4, wzr
+ b .Lxtsdecctsout
AES_ENDPROC(aes_xts_decrypt)
/*