diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:11:14 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:11:14 -0700 |
| commit | 8b53c76533aa4356602aea98f98a2f3b4051464c (patch) | |
| tree | ab10ba58e21501407f8108a6bb9003daa2176962 /arch/arm64/crypto/aes-modes.S | |
| parent | 6cfae0c26b21dce323fe8799b66cf4bc996e3565 (diff) | |
| parent | 9575d1a5c0780ea26ff8dd29c94a32be32ce3c85 (diff) | |
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu:
"API:
- Add the ability to abort a skcipher walk.
Algorithms:
- Fix XTS to actually do the stealing.
- Add library helpers for AES and DES for single-block users.
- Add library helpers for SHA256.
- Add new DES key verification helper.
- Add surrounding bits for ESSIV generator.
- Add accelerations for aegis128.
- Add test vectors for lzo-rle.
Drivers:
- Add i.MX8MQ support to caam.
- Add gcm/ccm/cfb/ofb aes support in inside-secure.
- Add ofb/cfb aes support in media-tek.
- Add HiSilicon ZIP accelerator support.
Others:
- Fix potential race condition in padata.
- Use unbound workqueues in padata"
* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits)
crypto: caam - Cast to long first before pointer conversion
crypto: ccree - enable CTS support in AES-XTS
crypto: inside-secure - Probe transform record cache RAM sizes
crypto: inside-secure - Base RD fetchcount on actual RD FIFO size
crypto: inside-secure - Base CD fetchcount on actual CD FIFO size
crypto: inside-secure - Enable extended algorithms on newer HW
crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL
crypto: inside-secure - Add EIP97/EIP197 and endianness detection
padata: remove cpu_index from the parallel_queue
padata: unbind parallel jobs from specific CPUs
padata: use separate workqueues for parallel and serial work
padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible
crypto: pcrypt - remove padata cpumask notifier
padata: make padata_do_parallel find alternate callback CPU
workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs
workqueue: unconfine alloc/apply/free_workqueue_attrs()
padata: allocate workqueue internally
arm64: dts: imx8mq: Add CAAM node
random: Use wait_event_freezable() in add_hwgenerator_randomness()
crypto: ux500 - Fix COMPILE_TEST warnings
...
Diffstat (limited to 'arch/arm64/crypto/aes-modes.S')
| -rw-r--r-- | arch/arm64/crypto/aes-modes.S | 135 |
1 files changed, 114 insertions, 21 deletions
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 324039b72094..131618389f1f 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -118,8 +118,23 @@ AES_ENDPROC(aes_ecb_decrypt) * int blocks, u8 iv[]) * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[]) + * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], + * int rounds, int blocks, u8 iv[], + * u32 const rk2[]); + * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], + * int rounds, int blocks, u8 iv[], + * u32 const rk2[]); */ +AES_ENTRY(aes_essiv_cbc_encrypt) + ld1 {v4.16b}, [x5] /* get iv */ + + mov w8, #14 /* AES-256: 14 rounds */ + enc_prepare w8, x6, x7 + encrypt_block v4, w8, x6, x7, w9 + enc_switch_key w3, x2, x6 + b .Lcbcencloop4x + AES_ENTRY(aes_cbc_encrypt) ld1 {v4.16b}, [x5] /* get iv */ enc_prepare w3, x2, x6 @@ -153,13 +168,25 @@ AES_ENTRY(aes_cbc_encrypt) st1 {v4.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) +AES_ENDPROC(aes_essiv_cbc_encrypt) +AES_ENTRY(aes_essiv_cbc_decrypt) + stp x29, x30, [sp, #-16]! + mov x29, sp + + ld1 {cbciv.16b}, [x5] /* get iv */ + + mov w8, #14 /* AES-256: 14 rounds */ + enc_prepare w8, x6, x7 + encrypt_block cbciv, w8, x6, x7, w9 + b .Lessivcbcdecstart AES_ENTRY(aes_cbc_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp ld1 {cbciv.16b}, [x5] /* get iv */ +.Lessivcbcdecstart: dec_prepare w3, x2, x6 .LcbcdecloopNx: @@ -212,6 +239,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) ldp x29, x30, [sp], #16 ret AES_ENDPROC(aes_cbc_decrypt) +AES_ENDPROC(aes_essiv_cbc_decrypt) /* @@ -265,12 +293,11 @@ AES_ENTRY(aes_cbc_cts_decrypt) ld1 {v5.16b}, [x5] /* get iv */ dec_prepare w3, x2, x6 - tbl v2.16b, {v1.16b}, v4.16b decrypt_block v0, w3, x2, x6, w7 - eor v2.16b, v2.16b, v0.16b + tbl v2.16b, {v0.16b}, v3.16b + eor v2.16b, v2.16b, v1.16b tbx v0.16b, {v1.16b}, v4.16b - tbl v2.16b, {v2.16b}, v3.16b decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v5.16b /* xor with iv */ @@ -386,10 +413,10 @@ AES_ENDPROC(aes_ctr_encrypt) /* + * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, + * int bytes, u8 const rk2[], u8 iv[], int first) * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 const rk2[], u8 iv[], int first) - * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, - * int blocks, u8 const rk2[], u8 iv[], int first) + * int bytes, u8 const rk2[], u8 iv[], int first) */ .macro next_tweak, out, in, tmp @@ -415,6 +442,7 @@ AES_ENTRY(aes_xts_encrypt) cbz w7, .Lxtsencnotfirst enc_prepare w3, x5, x8 + xts_cts_skip_tw w7, .LxtsencNx encrypt_block v4, w3, x5, x8, w7 /* first tweak */ enc_switch_key w3, x2, x8 b .LxtsencNx @@ -424,7 +452,7 @@ AES_ENTRY(aes_xts_encrypt) .LxtsencloopNx: next_tweak v4, v4, v8 .LxtsencNx: - subs w4, w4, #4 + subs w4, w4, #64 bmi .Lxtsenc1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ next_tweak v5, v4, v8 @@ -441,39 +469,74 @@ AES_ENTRY(aes_xts_encrypt) eor v2.16b, v2.16b, v6.16b st1 {v0.16b-v3.16b}, [x0], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsencout + cbz w4, .Lxtsencret xts_reload_mask v8 b .LxtsencloopNx .Lxtsenc1x: - adds w4, w4, #4 + adds w4, w4, #64 beq .Lxtsencout + subs w4, w4, #16 + bmi .LxtsencctsNx .Lxtsencloop: - ld1 {v1.16b}, [x1], #16 - eor v0.16b, v1.16b, v4.16b + ld1 {v0.16b}, [x1], #16 +.Lxtsencctsout: + eor v0.16b, v0.16b, v4.16b encrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - beq .Lxtsencout + cbz w4, .Lxtsencout + subs w4, w4, #16 next_tweak v4, v4, v8 + bmi .Lxtsenccts + st1 {v0.16b}, [x0], #16 b .Lxtsencloop .Lxtsencout: + st1 {v0.16b}, [x0] +.Lxtsencret: st1 {v4.16b}, [x6] ldp x29, x30, [sp], #16 ret -AES_ENDPROC(aes_xts_encrypt) +.LxtsencctsNx: + mov v0.16b, v3.16b + sub x0, x0, #16 +.Lxtsenccts: + adr_l x8, .Lcts_permute_table + + add x1, x1, w4, sxtw /* rewind input pointer */ + add w4, w4, #16 /* # bytes in final block */ + add x9, x8, #32 + add x8, x8, x4 + sub x9, x9, x4 + add x4, x0, x4 /* output address of final block */ + + ld1 {v1.16b}, [x1] /* load final block */ + ld1 {v2.16b}, [x8] + ld1 {v3.16b}, [x9] + + tbl v2.16b, {v0.16b}, v2.16b + tbx v0.16b, {v1.16b}, v3.16b + st1 {v2.16b}, [x4] /* overlapping stores */ + mov w4, wzr + b .Lxtsencctsout +AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) stp x29, x30, [sp, #-16]! mov x29, sp + /* subtract 16 bytes if we are doing CTS */ + sub w8, w4, #0x10 + tst w4, #0xf + csel w4, w4, w8, eq + ld1 {v4.16b}, [x6] xts_load_mask v8 + xts_cts_skip_tw w7, .Lxtsdecskiptw cbz w7, .Lxtsdecnotfirst enc_prepare w3, x5, x8 encrypt_block v4, w3, x5, x8, w7 /* first tweak */ +.Lxtsdecskiptw: dec_prepare w3, x2, x8 b .LxtsdecNx @@ -482,7 +545,7 @@ AES_ENTRY(aes_xts_decrypt) .LxtsdecloopNx: next_tweak v4, v4, v8 .LxtsdecNx: - subs w4, w4, #4 + subs w4, w4, #64 bmi .Lxtsdec1x ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ next_tweak v5, v4, v8 @@ -503,22 +566,52 @@ AES_ENTRY(aes_xts_decrypt) xts_reload_mask v8 b .LxtsdecloopNx .Lxtsdec1x: - adds w4, w4, #4 + adds w4, w4, #64 beq .Lxtsdecout + subs w4, w4, #16 .Lxtsdecloop: - ld1 {v1.16b}, [x1], #16 - eor v0.16b, v1.16b, v4.16b + ld1 {v0.16b}, [x1], #16 + bmi .Lxtsdeccts +.Lxtsdecctsout: + eor v0.16b, v0.16b, v4.16b decrypt_block v0, w3, x2, x8, w7 eor v0.16b, v0.16b, v4.16b st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 - beq .Lxtsdecout + cbz w4, .Lxtsdecout + subs w4, w4, #16 next_tweak v4, v4, v8 b .Lxtsdecloop .Lxtsdecout: st1 {v4.16b}, [x6] ldp x29, x30, [sp], #16 ret + +.Lxtsdeccts: + adr_l x8, .Lcts_permute_table + + add x1, x1, w4, sxtw /* rewind input pointer */ + add w4, w4, #16 /* # bytes in final block */ + add x9, x8, #32 + add x8, x8, x4 + sub x9, x9, x4 + add x4, x0, x4 /* output address of final block */ + + next_tweak v5, v4, v8 + + ld1 {v1.16b}, [x1] /* load final block */ + ld1 {v2.16b}, [x8] + ld1 {v3.16b}, [x9] + + eor v0.16b, v0.16b, v5.16b + decrypt_block v0, w3, x2, x8, w7 + eor v0.16b, v0.16b, v5.16b + + tbl v2.16b, {v0.16b}, v2.16b + tbx v0.16b, {v1.16b}, v3.16b + + st1 {v2.16b}, [x4] /* overlapping stores */ + mov w4, wzr + b .Lxtsdecctsout AES_ENDPROC(aes_xts_decrypt) /* |
