diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 14:46:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-21 14:46:51 -0700 |
commit | 2a8ba8f032160552a3beffab8aae9019ff477504 (patch) | |
tree | b50f70a3c8f7c2e179e1587d33ea3542d68525f9 /arch/x86/crypto/aesni-intel_asm.S | |
parent | ec2a7587e0a91d5c1afe23a0a73edfce06c5e4e0 (diff) | |
parent | e954bc91bdd4bb08b8325478c5004b24a23a3522 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (46 commits)
random: simplify fips mode
crypto: authenc - Fix cryptlen calculation
crypto: talitos - add support for sha224
crypto: talitos - add hash algorithms
crypto: talitos - second prepare step for adding ahash algorithms
crypto: talitos - prepare for adding ahash algorithms
crypto: n2 - Add Niagara2 crypto driver
crypto: skcipher - Add ablkcipher_walk interfaces
crypto: testmgr - Add testing for async hashing and update/final
crypto: tcrypt - Add speed tests for async hashing
crypto: scatterwalk - Fix scatterwalk_done() test
crypto: hifn_795x - Rename ablkcipher_walk to hifn_cipher_walk
padata: Use get_online_cpus/put_online_cpus in padata_free
padata: Add some code comments
padata: Flush the padata queues actively
padata: Use a timer to handle remaining objects in the reorder queues
crypto: shash - Remove usage of CRYPTO_MINALIGN
crypto: mv_cesa - Use resource_size
crypto: omap - OMAP macros corrected
padata: Use get_online_cpus/put_online_cpus
...
Fix up conflicts in arch/arm/mach-omap2/devices.c
Diffstat (limited to 'arch/x86/crypto/aesni-intel_asm.S')
-rw-r--r-- | arch/x86/crypto/aesni-intel_asm.S | 115 |
1 files changed, 115 insertions, 0 deletions
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 20bb0e1ac681..ff16756a51c1 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,9 @@ #define IN IN1 #define KEY %xmm2 #define IV %xmm3 +#define BSWAP_MASK %xmm10 +#define CTR %xmm11 +#define INC %xmm12 #define KEYP %rdi #define OUTP %rsi @@ -42,6 +45,7 @@ #define T1 %r10 #define TKEYP T1 #define T2 %r11 +#define TCTR_LOW T2 _key_expansion_128: _key_expansion_256a: @@ -724,3 +728,114 @@ ENTRY(aesni_cbc_dec) movups IV, (IVP) .Lcbc_dec_just_ret: ret + +.align 16 +.Lbswap_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * _aesni_inc_init: internal ABI + * setup registers used by _aesni_inc + * input: + * IV + * output: + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + */ +_aesni_inc_init: + movaps .Lbswap_mask, BSWAP_MASK + movaps IV, CTR + PSHUFB_XMM BSWAP_MASK CTR + mov $1, TCTR_LOW + MOVQ_R64_XMM TCTR_LOW INC + MOVQ_R64_XMM CTR TCTR_LOW + ret + +/* + * _aesni_inc: internal ABI + * Increase IV by 1, IV is in big endian + * input: + * IV + * CTR: == IV, in little endian + * TCTR_LOW: == lower qword of CTR + * INC: == 1, in little endian + * BSWAP_MASK == endian swapping mask + * output: + * IV: Increase by 1 + * changed: + * CTR: == output IV, in little endian + * TCTR_LOW: == lower qword of CTR + */ +_aesni_inc: + paddq INC, CTR + add $1, TCTR_LOW + jnc .Linc_low + pslldq $8, INC + paddq INC, CTR + psrldq $8, INC +.Linc_low: + movaps CTR, IV + PSHUFB_XMM BSWAP_MASK IV + ret + +/* + * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, + * size_t len, u8 *iv) + */ +ENTRY(aesni_ctr_enc) + cmp $16, LEN + jb .Lctr_enc_just_ret + mov 480(KEYP), KLEN + movups (IVP), IV + call _aesni_inc_init + cmp $64, LEN + jb .Lctr_enc_loop1 +.align 4 +.Lctr_enc_loop4: + movaps IV, STATE1 + call _aesni_inc + movups (INP), IN1 + movaps IV, STATE2 + call _aesni_inc + movups 0x10(INP), IN2 + movaps IV, STATE3 + call _aesni_inc + movups 0x20(INP), IN3 + movaps IV, STATE4 + call _aesni_inc + movups 0x30(INP), IN4 + call _aesni_enc4 + pxor IN1, STATE1 + movups STATE1, (OUTP) + pxor IN2, STATE2 + movups STATE2, 0x10(OUTP) + pxor IN3, STATE3 + movups STATE3, 0x20(OUTP) + pxor IN4, STATE4 + movups STATE4, 0x30(OUTP) + sub $64, LEN + add $64, INP + add $64, OUTP + cmp $64, LEN + jge .Lctr_enc_loop4 + cmp $16, LEN + jb .Lctr_enc_ret +.align 4 +.Lctr_enc_loop1: + movaps IV, STATE + call _aesni_inc + movups (INP), IN + call _aesni_enc1 + pxor IN, STATE + movups STATE, (OUTP) + sub $16, LEN + add $16, INP + add $16, OUTP + cmp $16, LEN + jge .Lctr_enc_loop1 +.Lctr_enc_ret: + movups IV, (IVP) +.Lctr_enc_just_ret: + ret |