summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-14 09:47:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-14 09:47:01 -0800
commitfee5429e028c414d80d036198db30454cfd91b7a (patch)
tree485f37a974e4ab85339021c794d1782e2d761c5b /arch
parent83e047c104aa95a8a683d6bd421df1551c17dbd2 (diff)
parent96692a7305c49845e3cbf5a60cfcb207c5dc4030 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "Here is the crypto update for 3.20: - Added 192/256-bit key support to aesni GCM. - Added MIPS OCTEON MD5 support. - Fixed hwrng starvation and race conditions. - Added note that memzero_explicit is not a subsitute for memset. - Added user-space interface for crypto_rng. - Misc fixes" * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (71 commits) crypto: tcrypt - do not allocate iv on stack for aead speed tests crypto: testmgr - limit IV copy length in aead tests crypto: tcrypt - fix buflen reminder calculation crypto: testmgr - mark rfc4106(gcm(aes)) as fips_allowed crypto: caam - fix resource clean-up on error path for caam_jr_init crypto: caam - pair irq map and dispose in the same function crypto: ccp - terminate ccp_support array with empty element crypto: caam - remove unused local variable crypto: caam - remove dead code crypto: caam - don't emit ICV check failures to dmesg hwrng: virtio - drop extra empty line crypto: replace scatterwalk_sg_next with sg_next crypto: atmel - Free memory in error path crypto: doc - remove colons in comments crypto: seqiv - Ensure that IV size is at least 8 bytes crypto: cts - Weed out non-CBC algorithms MAINTAINERS: add linux-crypto to hw random crypto: cts - Remove bogus use of seqiv crypto: qat - don't need qat_auth_state struct crypto: algif_rng - fix sparse non static symbol warning ...
Diffstat (limited to 'arch')
-rw-r--r--arch/mips/cavium-octeon/Makefile1
-rw-r--r--arch/mips/cavium-octeon/crypto/Makefile7
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.c66
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-crypto.h75
-rw-r--r--arch/mips/cavium-octeon/crypto/octeon-md5.c216
-rw-r--r--arch/mips/cavium-octeon/executive/octeon-model.c6
-rw-r--r--arch/mips/include/asm/octeon/octeon-feature.h17
-rw-r--r--arch/mips/include/asm/octeon/octeon.h5
-rw-r--r--arch/sparc/crypto/aes_glue.c2
-rw-r--r--arch/sparc/crypto/camellia_glue.c2
-rw-r--r--arch/sparc/crypto/des_glue.c1
-rw-r--r--arch/sparc/crypto/md5_glue.c2
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S343
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c34
-rw-r--r--arch/x86/crypto/des3_ede_glue.c2
15 files changed, 595 insertions, 184 deletions
diff --git a/arch/mips/cavium-octeon/Makefile b/arch/mips/cavium-octeon/Makefile
index 42f5f1a4b40a..69a8a8dabc2b 100644
--- a/arch/mips/cavium-octeon/Makefile
+++ b/arch/mips/cavium-octeon/Makefile
@@ -16,6 +16,7 @@ obj-y := cpu.o setup.o octeon-platform.o octeon-irq.o csrc-octeon.o
obj-y += dma-octeon.o
obj-y += octeon-memcpy.o
obj-y += executive/
+obj-y += crypto/
obj-$(CONFIG_MTD) += flash_setup.o
obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/mips/cavium-octeon/crypto/Makefile b/arch/mips/cavium-octeon/crypto/Makefile
new file mode 100644
index 000000000000..a74f76d85a2f
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/Makefile
@@ -0,0 +1,7 @@
+#
+# OCTEON-specific crypto modules.
+#
+
+obj-y += octeon-crypto.o
+
+obj-$(CONFIG_CRYPTO_MD5_OCTEON) += octeon-md5.o
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.c b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
new file mode 100644
index 000000000000..7c82ff463b65
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.c
@@ -0,0 +1,66 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2012 Cavium Networks
+ */
+
+#include <asm/cop2.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+
+#include "octeon-crypto.h"
+
+/**
+ * Enable access to Octeon's COP2 crypto hardware for kernel use. Wrap any
+ * crypto operations in calls to octeon_crypto_enable/disable in order to make
+ * sure the state of COP2 isn't corrupted if userspace is also performing
+ * hardware crypto operations. Allocate the state parameter on the stack.
+ * Preemption must be disabled to prevent context switches.
+ *
+ * @state: Pointer to state structure to store current COP2 state in.
+ *
+ * Returns: Flags to be passed to octeon_crypto_disable()
+ */
+unsigned long octeon_crypto_enable(struct octeon_cop2_state *state)
+{
+ int status;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ status = read_c0_status();
+ write_c0_status(status | ST0_CU2);
+ if (KSTK_STATUS(current) & ST0_CU2) {
+ octeon_cop2_save(&(current->thread.cp2));
+ KSTK_STATUS(current) &= ~ST0_CU2;
+ status &= ~ST0_CU2;
+ } else if (status & ST0_CU2) {
+ octeon_cop2_save(state);
+ }
+ local_irq_restore(flags);
+ return status & ST0_CU2;
+}
+EXPORT_SYMBOL_GPL(octeon_crypto_enable);
+
+/**
+ * Disable access to Octeon's COP2 crypto hardware in the kernel. This must be
+ * called after an octeon_crypto_enable() before any context switch or return to
+ * userspace.
+ *
+ * @state: Pointer to COP2 state to restore
+ * @flags: Return value from octeon_crypto_enable()
+ */
+void octeon_crypto_disable(struct octeon_cop2_state *state,
+ unsigned long crypto_flags)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ if (crypto_flags & ST0_CU2)
+ octeon_cop2_restore(state);
+ else
+ write_c0_status(read_c0_status() & ~ST0_CU2);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(octeon_crypto_disable);
diff --git a/arch/mips/cavium-octeon/crypto/octeon-crypto.h b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
new file mode 100644
index 000000000000..e2a4aece9c24
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-crypto.h
@@ -0,0 +1,75 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012-2013 Cavium Inc., All Rights Reserved.
+ *
+ * MD5 instruction definitions added by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ */
+#ifndef __LINUX_OCTEON_CRYPTO_H
+#define __LINUX_OCTEON_CRYPTO_H
+
+#include <linux/sched.h>
+#include <asm/mipsregs.h>
+
+#define OCTEON_CR_OPCODE_PRIORITY 300
+
+extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state);
+extern void octeon_crypto_disable(struct octeon_cop2_state *state,
+ unsigned long flags);
+
+/*
+ * Macros needed to implement MD5:
+ */
+
+/*
+ * The index can be 0-1.
+ */
+#define write_octeon_64bit_hash_dword(value, index) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x0048+" STR(index) \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+/*
+ * The index can be 0-1.
+ */
+#define read_octeon_64bit_hash_dword(index) \
+({ \
+ u64 __value; \
+ \
+ __asm__ __volatile__ ( \
+ "dmfc2 %[rt],0x0048+" STR(index) \
+ : [rt] "=d" (__value) \
+ : ); \
+ \
+ __value; \
+})
+
+/*
+ * The index can be 0-6.
+ */
+#define write_octeon_64bit_block_dword(value, index) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x0040+" STR(index) \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+/*
+ * The value is the final block dword (64-bit).
+ */
+#define octeon_md5_start(value) \
+do { \
+ __asm__ __volatile__ ( \
+ "dmtc2 %[rt],0x4047" \
+ : \
+ : [rt] "d" (value)); \
+} while (0)
+
+#endif /* __LINUX_OCTEON_CRYPTO_H */
diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c
new file mode 100644
index 000000000000..b909881ba6c1
--- /dev/null
+++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c
@@ -0,0 +1,216 @@
+/*
+ * Cryptographic API.
+ *
+ * MD5 Message Digest Algorithm (RFC1321).
+ *
+ * Adapted for OCTEON by Aaro Koskinen <aaro.koskinen@iki.fi>.
+ *
+ * Based on crypto/md5.c, which is:
+ *
+ * Derived from cryptoapi implementation, originally based on the
+ * public domain implementation written by Colin Plumb in 1993.
+ *
+ * Copyright (c) Cryptoapi developers.
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <crypto/md5.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <asm/byteorder.h>
+#include <linux/cryptohash.h>
+#include <asm/octeon/octeon.h>
+#include <crypto/internal/hash.h>
+
+#include "octeon-crypto.h"
+
+/*
+ * We pass everything as 64-bit. OCTEON can handle misaligned data.
+ */
+
+static void octeon_md5_store_hash(struct md5_state *ctx)
+{
+ u64 *hash = (u64 *)ctx->hash;
+
+ write_octeon_64bit_hash_dword(hash[0], 0);
+ write_octeon_64bit_hash_dword(hash[1], 1);
+}
+
+static void octeon_md5_read_hash(struct md5_state *ctx)
+{
+ u64 *hash = (u64 *)ctx->hash;
+
+ hash[0] = read_octeon_64bit_hash_dword(0);
+ hash[1] = read_octeon_64bit_hash_dword(1);
+}
+
+static void octeon_md5_transform(const void *_block)
+{
+ const u64 *block = _block;
+
+ write_octeon_64bit_block_dword(block[0], 0);
+ write_octeon_64bit_block_dword(block[1], 1);
+ write_octeon_64bit_block_dword(block[2], 2);
+ write_octeon_64bit_block_dword(block[3], 3);
+ write_octeon_64bit_block_dword(block[4], 4);
+ write_octeon_64bit_block_dword(block[5], 5);
+ write_octeon_64bit_block_dword(block[6], 6);
+ octeon_md5_start(block[7]);
+}
+
+static int octeon_md5_init(struct shash_desc *desc)
+{
+ struct md5_state *mctx = shash_desc_ctx(desc);
+
+ mctx->hash[0] = cpu_to_le32(0x67452301);
+ mctx->hash[1] = cpu_to_le32(0xefcdab89);
+ mctx->hash[2] = cpu_to_le32(0x98badcfe);
+ mctx->hash[3] = cpu_to_le32(0x10325476);
+ mctx->byte_count = 0;
+
+ return 0;
+}
+
+static int octeon_md5_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ struct md5_state *mctx = shash_desc_ctx(desc);
+ const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+ struct octeon_cop2_state state;
+ unsigned long flags;
+
+ mctx->byte_count += len;
+
+ if (avail > len) {
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, len);
+ return 0;
+ }
+
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail), data,
+ avail);
+
+ local_bh_disable();
+ preempt_disable();
+ flags = octeon_crypto_enable(&state);
+ octeon_md5_store_hash(mctx);
+
+ octeon_md5_transform(mctx->block);
+ data += avail;
+ len -= avail;
+
+ while (len >= sizeof(mctx->block)) {
+ octeon_md5_transform(data);
+ data += sizeof(mctx->block);
+ len -= sizeof(mctx->block);
+ }
+
+ octeon_md5_read_hash(mctx);
+ octeon_crypto_disable(&state, flags);
+ preempt_enable();
+ local_bh_enable();
+
+ memcpy(mctx->block, data, len);
+
+ return 0;
+}
+
+static int octeon_md5_final(struct shash_desc *desc, u8 *out)
+{
+ struct md5_state *mctx = shash_desc_ctx(desc);
+ const unsigned int offset = mctx->byte_count & 0x3f;
+ char *p = (char *)mctx->block + offset;
+ int padding = 56 - (offset + 1);
+ struct octeon_cop2_state state;
+ unsigned long flags;
+
+ *p++ = 0x80;
+
+ local_bh_disable();
+ preempt_disable();
+ flags = octeon_crypto_enable(&state);
+ octeon_md5_store_hash(mctx);
+
+ if (padding < 0) {
+ memset(p, 0x00, padding + sizeof(u64));
+ octeon_md5_transform(mctx->block);
+ p = (char *)mctx->block;
+ padding = 56;
+ }
+
+ memset(p, 0, padding);
+ mctx->block[14] = cpu_to_le32(mctx->byte_count << 3);
+ mctx->block[15] = cpu_to_le32(mctx->byte_count >> 29);
+ octeon_md5_transform(mctx->block);
+
+ octeon_md5_read_hash(mctx);
+ octeon_crypto_disable(&state, flags);
+ preempt_enable();
+ local_bh_enable();
+
+ memcpy(out, mctx->hash, sizeof(mctx->hash));
+ memset(mctx, 0, sizeof(*mctx));
+
+ return 0;
+}
+
+static int octeon_md5_export(struct shash_desc *desc, void *out)
+{
+ struct md5_state *ctx = shash_desc_ctx(desc);
+
+ memcpy(out, ctx, sizeof(*ctx));
+ return 0;
+}
+
+static int octeon_md5_import(struct shash_desc *desc, const void *in)
+{
+ struct md5_state *ctx = shash_desc_ctx(desc);
+
+ memcpy(ctx, in, sizeof(*ctx));
+ return 0;
+}
+
+static struct shash_alg alg = {
+ .digestsize = MD5_DIGEST_SIZE,
+ .init = octeon_md5_init,
+ .update = octeon_md5_update,
+ .final = octeon_md5_final,
+ .export = octeon_md5_export,
+ .import = octeon_md5_import,
+ .descsize = sizeof(struct md5_state),
+ .statesize = sizeof(struct md5_state),
+ .base = {
+ .cra_name = "md5",
+ .cra_driver_name= "octeon-md5",
+ .cra_priority = OCTEON_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
+ .cra_module = THIS_MODULE,
+ }
+};
+
+static int __init md5_mod_init(void)
+{
+ if (!octeon_has_crypto())
+ return -ENOTSUPP;
+ return crypto_register_shash(&alg);
+}
+
+static void __exit md5_mod_fini(void)
+{
+ crypto_unregister_shash(&alg);
+}
+
+module_init(md5_mod_init);
+module_exit(md5_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("MD5 Message Digest Algorithm (OCTEON)");
+MODULE_AUTHOR("Aaro Koskinen <aaro.koskinen@iki.fi>");
diff --git a/arch/mips/cavium-octeon/executive/octeon-model.c b/arch/mips/cavium-octeon/executive/octeon-model.c
index e15b049b3bd7..b2104bd9ab3b 100644
--- a/arch/mips/cavium-octeon/executive/octeon-model.c
+++ b/arch/mips/cavium-octeon/executive/octeon-model.c
@@ -27,6 +27,9 @@
#include <asm/octeon/octeon.h>
+enum octeon_feature_bits __octeon_feature_bits __read_mostly;
+EXPORT_SYMBOL_GPL(__octeon_feature_bits);
+
/**
* Read a byte of fuse data
* @byte_addr: address to read
@@ -103,6 +106,9 @@ static const char *__init octeon_model_get_string_buffer(uint32_t chip_id,
else
suffix = "NSP";
+ if (!fus_dat2.s.nocrypto)
+ __octeon_feature_bits |= OCTEON_HAS_CRYPTO;
+
/*
* Assume pass number is encoded using <5:3><2:0>. Exceptions
* will be fixed later.
diff --git a/arch/mips/include/asm/octeon/octeon-feature.h b/arch/mips/include/asm/octeon/octeon-feature.h
index c4fe81f47f53..8ebd3f579b84 100644
--- a/arch/mips/include/asm/octeon/octeon-feature.h
+++ b/arch/mips/include/asm/octeon/octeon-feature.h
@@ -46,8 +46,6 @@ enum octeon_feature {
OCTEON_FEATURE_SAAD,
/* Does this Octeon support the ZIP offload engine? */
OCTEON_FEATURE_ZIP,
- /* Does this Octeon support crypto acceleration using COP2? */
- OCTEON_FEATURE_CRYPTO,
OCTEON_FEATURE_DORM_CRYPTO,
/* Does this Octeon support PCI express? */
OCTEON_FEATURE_PCIE,
@@ -86,6 +84,21 @@ enum octeon_feature {
OCTEON_MAX_FEATURE
};
+enum octeon_feature_bits {
+ OCTEON_HAS_CRYPTO = 0x0001, /* Crypto acceleration using COP2 */
+};
+extern enum octeon_feature_bits __octeon_feature_bits;
+
+/**
+ * octeon_has_crypto() - Check if this OCTEON has crypto acceleration support.
+ *
+ * Returns: Non-zero if the feature exists. Zero if the feature does not exist.
+ */
+static inline int octeon_has_crypto(void)
+{
+ return __octeon_feature_bits & OCTEON_HAS_CRYPTO;
+}
+
/**
* Determine if the current Octeon supports a specific feature. These
* checks have been optimized to be fairly quick, but they should still
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index d781f9e66884..6dfefd2d5cdf 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -44,11 +44,6 @@ extern int octeon_get_boot_num_arguments(void);
extern const char *octeon_get_boot_argument(int arg);
extern void octeon_hal_setup_reserved32(void);
extern void octeon_user_io_init(void);
-struct octeon_cop2_state;
-extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state);
-extern void octeon_crypto_disable(struct octeon_cop2_state *state,
- unsigned long flags);
-extern asmlinkage void octeon_cop2_restore(struct octeon_cop2_state *task);
extern void octeon_init_cvmcount(void);
extern void octeon_setup_delays(void);
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 705408766ab0..2e48eb8813ff 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -497,7 +497,7 @@ module_init(aes_sparc64_mod_init);
module_exit(aes_sparc64_mod_fini);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
+MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, sparc64 aes opcode accelerated");
MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 641f55cb61c3..6bf2479a12fb 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
-MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("camellia");
#include "crop_devid.c"
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index d11500972994..dd6a34fa6e19 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -533,5 +533,6 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des3_ede");
#include "crop_devid.c"
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index 64c7ff5f72a9..b688731d7ede 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -183,7 +183,7 @@ module_init(md5_sparc64_mod_init);
module_exit(md5_sparc64_mod_fini);
MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
+MODULE_DESCRIPTION("MD5 Message Digest Algorithm, sparc64 md5 opcode accelerated");
MODULE_ALIAS_CRYPTO("md5");
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 477e9d75149b..6bd2c6c95373 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -32,12 +32,23 @@
#include <linux/linkage.h>
#include <asm/inst.h>
+/*
+ * The following macros are used to move an (un)aligned 16 byte value to/from
+ * an XMM register. This can done for either FP or integer values, for FP use
+ * movaps (move aligned packed single) or integer use movdqa (move double quad
+ * aligned). It doesn't make a performance difference which instruction is used
+ * since Nehalem (original Core i7) was released. However, the movaps is a byte
+ * shorter, so that is the one we'll use for now. (same for unaligned).
+ */
+#define MOVADQ movaps
+#define MOVUDQ movups
+
#ifdef __x86_64__
+
.data
.align 16
.Lgf128mul_x_ble_mask:
.octa 0x00000000000000010000000000000087
-
POLY: .octa 0xC2000000000000000000000000000001
TWOONE: .octa 0x00000001000000000000000000000001
@@ -89,6 +100,7 @@ enc: .octa 0x2
#define arg8 STACK_OFFSET+16(%r14)
#define arg9 STACK_OFFSET+24(%r14)
#define arg10 STACK_OFFSET+32(%r14)
+#define keysize 2*15*16(%arg1)
#endif
@@ -213,10 +225,12 @@ enc: .octa 0x2
.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+ MOVADQ SHUF_MASK(%rip), %xmm14
mov arg7, %r10 # %r10 = AAD
mov arg8, %r12 # %r12 = aadLen
mov %r12, %r11
pxor %xmm\i, %xmm\i
+
_get_AAD_loop\num_initial_blocks\operation:
movd (%r10), \TMP1
pslldq $12, \TMP1
@@ -225,16 +239,18 @@ _get_AAD_loop\num_initial_blocks\operation:
add $4, %r10
sub $4, %r12
jne _get_AAD_loop\num_initial_blocks\operation
+
cmp $16, %r11
je _get_AAD_loop2_done\num_initial_blocks\operation
+
mov $16, %r12
_get_AAD_loop2\num_initial_blocks\operation:
psrldq $4, %xmm\i
sub $4, %r12
cmp %r11, %r12
jne _get_AAD_loop2\num_initial_blocks\operation
+
_get_AAD_loop2_done\num_initial_blocks\operation:
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
xor %r11, %r11 # initialise the data pointer offset as zero
@@ -243,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), \XMM0 # XMM0 = Y0
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM0
.if (\i == 5) || (\i == 6) || (\i == 7)
+ MOVADQ ONE(%RIP),\TMP1
+ MOVADQ (%arg1),\TMP2
.irpc index, \i_seq
- paddd ONE(%rip), \XMM0 # INCR Y0
+ paddd \TMP1, \XMM0 # INCR Y0
movdqa \XMM0, %xmm\index
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
-
-.endr
-.irpc index, \i_seq
- pxor 16*0(%arg1), %xmm\index
-.endr
-.irpc index, \i_seq
- movaps 0x10(%rdi), \TMP1
- AESENC \TMP1, %xmm\index # Round 1
-.endr
-.irpc index, \i_seq
- movaps 0x20(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x30(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x40(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x50(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x60(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
+ pxor \TMP2, %xmm\index
.endr
-.irpc index, \i_seq
- movaps 0x70(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x80(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x90(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
+ lea 0x10(%arg1),%r10
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ add $5,%eax # 128->9, 192->11, 256->13
+
+aes_loop_initial_dec\num_initial_blocks:
+ MOVADQ (%r10),\TMP1
+.irpc index, \i_seq
+ AESENC \TMP1, %xmm\index
.endr
+ add $16,%r10
+ sub $1,%eax
+ jnz aes_loop_initial_dec\num_initial_blocks
+
+ MOVADQ (%r10), \TMP1
.irpc index, \i_seq
- movaps 0xa0(%arg1), \TMP1
- AESENCLAST \TMP1, %xmm\index # Round 10
+ AESENCLAST \TMP1, %xmm\index # Last Round
.endr
.irpc index, \i_seq
movdqu (%arg3 , %r11, 1), \TMP1
@@ -305,10 +296,8 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
add $16, %r11
movdqa \TMP1, %xmm\index
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index
-
- # prepare plaintext/ciphertext for GHASH computation
+ # prepare plaintext/ciphertext for GHASH computation
.endr
.endif
GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
@@ -338,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
* Precomputations for HashKey parallel with encryption of first 4 blocks.
* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
*/
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM1
- movdqa SHUF_MASK(%rip), %xmm14
+ MOVADQ ONE(%rip), \TMP1
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM1
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM2
- movdqa SHUF_MASK(%rip), %xmm14
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM2
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM3
- movdqa SHUF_MASK(%rip), %xmm14
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM3
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM4
- movdqa SHUF_MASK(%rip), %xmm14
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM4
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
- pxor 16*0(%arg1), \XMM1
- pxor 16*0(%arg1), \XMM2
- pxor 16*0(%arg1), \XMM3
- pxor 16*0(%arg1), \XMM4
+ MOVADQ 0(%arg1),\TMP1
+ pxor \TMP1, \XMM1
+ pxor \TMP1, \XMM2
+ pxor \TMP1, \XMM3
+ pxor \TMP1, \XMM4
movdqa \TMP3, \TMP5
pshufd $78, \TMP3, \TMP1
pxor \TMP3, \TMP1
@@ -399,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_4_k(%rsp)
- movaps 0xa0(%arg1), \TMP2
+ lea 0xa0(%arg1),%r10
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ sub $4,%eax # 128->0, 192->2, 256->4
+ jz aes_loop_pre_dec_done\num_initial_blocks
+
+aes_loop_pre_dec\num_initial_blocks:
+ MOVADQ (%r10),\TMP2
+.irpc index, 1234
+ AESENC \TMP2, %xmm\index
+.endr
+ add $16,%r10
+ sub $1,%eax
+ jnz aes_loop_pre_dec\num_initial_blocks
+
+aes_loop_pre_dec_done\num_initial_blocks:
+ MOVADQ (%r10), \TMP2
AESENCLAST \TMP2, \XMM1
AESENCLAST \TMP2, \XMM2
AESENCLAST \TMP2, \XMM3
@@ -421,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
movdqa \TMP1, \XMM4
add $64, %r11
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
pxor \XMMDst, \XMM1
# combine GHASHed value with the corresponding ciphertext
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\num_initial_blocks\operation:
@@ -451,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation:
.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
+ MOVADQ SHUF_MASK(%rip), %xmm14
mov arg7, %r10 # %r10 = AAD
mov arg8, %r12 # %r12 = aadLen
mov %r12, %r11
@@ -472,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation:
cmp %r11, %r12
jne _get_AAD_loop2\num_initial_blocks\operation
_get_AAD_loop2_done\num_initial_blocks\operation:
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
xor %r11, %r11 # initialise the data pointer offset as zero
@@ -481,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
mov %arg5, %rax # %rax = *Y0
movdqu (%rax), \XMM0 # XMM0 = Y0
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM0
.if (\i == 5) || (\i == 6) || (\i == 7)
-.irpc index, \i_seq
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, %xmm\index
- movdqa SHUF_MASK(%rip), %xmm14
- PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
-.endr
-.irpc index, \i_seq
- pxor 16*0(%arg1), %xmm\index
-.endr
-.irpc index, \i_seq
- movaps 0x10(%rdi), \TMP1
- AESENC \TMP1, %xmm\index # Round 1
-.endr
-.irpc index, \i_seq
- movaps 0x20(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
+ MOVADQ ONE(%RIP),\TMP1
+ MOVADQ 0(%arg1),\TMP2
.irpc index, \i_seq
- movaps 0x30(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, %xmm\index
+ PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
+ pxor \TMP2, %xmm\index
.endr
-.irpc index, \i_seq
- movaps 0x40(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x50(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x60(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x70(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x80(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
-.endr
-.irpc index, \i_seq
- movaps 0x90(%arg1), \TMP1
- AESENC \TMP1, %xmm\index # Round 2
+ lea 0x10(%arg1),%r10
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ add $5,%eax # 128->9, 192->11, 256->13
+
+aes_loop_initial_enc\num_initial_blocks:
+ MOVADQ (%r10),\TMP1
+.irpc index, \i_seq
+ AESENC \TMP1, %xmm\index
.endr
+ add $16,%r10
+ sub $1,%eax
+ jnz aes_loop_initial_enc\num_initial_blocks
+
+ MOVADQ (%r10), \TMP1
.irpc index, \i_seq
- movaps 0xa0(%arg1), \TMP1
- AESENCLAST \TMP1, %xmm\index # Round 10
+ AESENCLAST \TMP1, %xmm\index # Last Round
.endr
.irpc index, \i_seq
movdqu (%arg3 , %r11, 1), \TMP1
@@ -541,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
movdqu %xmm\index, (%arg2 , %r11, 1)
# write back plaintext/ciphertext for num_initial_blocks
add $16, %r11
-
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, %xmm\index
# prepare plaintext/ciphertext for GHASH computation
@@ -575,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
* Precomputations for HashKey parallel with encryption of first 4 blocks.
* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
*/
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM1
- movdqa SHUF_MASK(%rip), %xmm14
+ MOVADQ ONE(%RIP),\TMP1
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM1
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM2
- movdqa SHUF_MASK(%rip), %xmm14
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM2
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM3
- movdqa SHUF_MASK(%rip), %xmm14
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM3
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
- paddd ONE(%rip), \XMM0 # INCR Y0
- movdqa \XMM0, \XMM4
- movdqa SHUF_MASK(%rip), %xmm14
+ paddd \TMP1, \XMM0 # INCR Y0
+ MOVADQ \XMM0, \XMM4
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
- pxor 16*0(%arg1), \XMM1
- pxor 16*0(%arg1), \XMM2
- pxor 16*0(%arg1), \XMM3
- pxor 16*0(%arg1), \XMM4
+ MOVADQ 0(%arg1),\TMP1
+ pxor \TMP1, \XMM1
+ pxor \TMP1, \XMM2
+ pxor \TMP1, \XMM3
+ pxor \TMP1, \XMM4
movdqa \TMP3, \TMP5
pshufd $78, \TMP3, \TMP1
pxor \TMP3, \TMP1
@@ -636,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
pshufd $78, \TMP5, \TMP1
pxor \TMP5, \TMP1
movdqa \TMP1, HashKey_4_k(%rsp)
- movaps 0xa0(%arg1), \TMP2
+ lea 0xa0(%arg1),%r10
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ sub $4,%eax # 128->0, 192->2, 256->4
+ jz aes_loop_pre_enc_done\num_initial_blocks
+
+aes_loop_pre_enc\num_initial_blocks:
+ MOVADQ (%r10),\TMP2
+.irpc index, 1234
+ AESENC \TMP2, %xmm\index
+.endr
+ add $16,%r10
+ sub $1,%eax
+ jnz aes_loop_pre_enc\num_initial_blocks
+
+aes_loop_pre_enc_done\num_initial_blocks:
+ MOVADQ (%r10), \TMP2
AESENCLAST \TMP2, \XMM1
AESENCLAST \TMP2, \XMM2
AESENCLAST \TMP2, \XMM3
@@ -655,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
add $64, %r11
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
pxor \XMMDst, \XMM1
# combine GHASHed value with the corresponding ciphertext
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
- movdqa SHUF_MASK(%rip), %xmm14
PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
_initial_blocks_done\num_initial_blocks\operation:
@@ -794,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
- movaps 0xa0(%arg1), \TMP3
+ lea 0xa0(%arg1),%r10
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ sub $4,%eax # 128->0, 192->2, 256->4
+ jz aes_loop_par_enc_done
+
+aes_loop_par_enc:
+ MOVADQ (%r10),\TMP3
+.irpc index, 1234
+ AESENC \TMP3, %xmm\index
+.endr
+ add $16,%r10
+ sub $1,%eax
+ jnz aes_loop_par_enc
+
+aes_loop_par_enc_done:
+ MOVADQ (%r10), \TMP3
AESENCLAST \TMP3, \XMM1 # Round 10
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
@@ -986,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
AESENC \TMP3, \XMM3
AESENC \TMP3, \XMM4
PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
- movaps 0xa0(%arg1), \TMP3
- AESENCLAST \TMP3, \XMM1 # Round 10
+ lea 0xa0(%arg1),%r10
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ sub $4,%eax # 128->0, 192->2, 256->4
+ jz aes_loop_par_dec_done
+
+aes_loop_par_dec:
+ MOVADQ (%r10),\TMP3
+.irpc index, 1234
+ AESENC \TMP3, %xmm\index
+.endr
+ add $16,%r10
+ sub $1,%eax
+ jnz aes_loop_par_dec
+
+aes_loop_par_dec_done:
+ MOVADQ (%r10), \TMP3
+ AESENCLAST \TMP3, \XMM1 # last round
AESENCLAST \TMP3, \XMM2
AESENCLAST \TMP3, \XMM3
AESENCLAST \TMP3, \XMM4
@@ -1155,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
pxor \TMP6, \XMMDst # reduced result is in XMMDst
.endm
-/* Encryption of a single block done*/
-.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
- pxor (%arg1), \XMM0
- movaps 16(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 32(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 48(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 64(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 80(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 96(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 112(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 128(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 144(%arg1), \TMP1
- AESENC \TMP1, \XMM0
- movaps 160(%arg1), \TMP1
- AESENCLAST \TMP1, \XMM0
-.endm
+/* Encryption of a single block
+* uses eax & r10
+*/
+.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
+ pxor (%arg1), \XMM0
+ mov keysize,%eax
+ shr $2,%eax # 128->4, 192->6, 256->8
+ add $5,%eax # 128->9, 192->11, 256->13
+ lea 16(%arg1), %r10 # get first expanded key address
+
+_esb_loop_\@:
+ MOVADQ (%r10),\TMP1
+ AESENC \TMP1,\XMM0
+ add $16,%r10
+ sub $1,%eax
+ jnz _esb_loop_\@
+
+ MOVADQ (%r10),\TMP1
+ AESENCLAST \TMP1,\XMM0
+.endm
/*****************************************************************************
* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
* u8 *out, // Plaintext output. Encrypt in-place is allowed.
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index ae855f4f64b7..947c6bf52c33 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -43,6 +43,7 @@
#include <asm/crypto/glue_helper.h>
#endif
+
/* This data is stored at the end of the crypto_tfm struct.
* It's a type of per "session" data storage location.
* This needs to be 16 byte aligned.
@@ -182,7 +183,8 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
- if (plaintext_len < AVX_GEN2_OPTSIZE) {
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
+ if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
aad_len, auth_tag, auth_tag_len);
} else {
@@ -197,7 +199,8 @@ static void aesni_gcm_dec_avx(void *ctx, u8 *out,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
- if (ciphertext_len < AVX_GEN2_OPTSIZE) {
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
+ if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
aad_len, auth_tag, auth_tag_len);
} else {
@@ -231,7 +234,8 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
- if (plaintext_len < AVX_GEN2_OPTSIZE) {
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
+ if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
aad_len, auth_tag, auth_tag_len);
} else if (plaintext_len < AVX_GEN4_OPTSIZE) {
@@ -250,7 +254,8 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
u8 *auth_tag, unsigned long auth_tag_len)
{
- if (ciphertext_len < AVX_GEN2_OPTSIZE) {
+ struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
+ if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
aad, aad_len, auth_tag, auth_tag_len);
} else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
@@ -511,7 +516,7 @@ static int ctr_crypt(struct blkcipher_desc *desc,
kernel_fpu_begin();
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
aesni_ctr_enc_tfm(ctx, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes & AES_BLOCK_MASK, walk.iv);
+ nbytes & AES_BLOCK_MASK, walk.iv);
nbytes &= AES_BLOCK_SIZE - 1;
err = blkcipher_walk_done(desc, &walk, nbytes);
}
@@ -902,7 +907,8 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
}
/*Account for 4 byte nonce at the end.*/
key_len -= 4;
- if (key_len != AES_KEYSIZE_128) {
+ if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256) {
crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -1013,6 +1019,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
__be32 counter = cpu_to_be32(1);
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ u32 key_len = ctx->aes_key_expanded.key_length;
void *aes_ctx = &(ctx->aes_key_expanded);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
u8 iv_tab[16+AESNI_ALIGN];
@@ -1027,6 +1034,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
/* to 8 or 12 bytes */
if (unlikely(req->assoclen != 8 && req->assoclen != 12))
return -EINVAL;
+ if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
+ return -EINVAL;
+ if (unlikely(key_len != AES_KEYSIZE_128 &&
+ key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256))
+ return -EINVAL;
+
/* IV below built */
for (i = 0; i < 4; i++)
*(iv+i) = ctx->nonce[i];
@@ -1091,6 +1105,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
int retval = 0;
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
+ u32 key_len = ctx->aes_key_expanded.key_length;
void *aes_ctx = &(ctx->aes_key_expanded);
unsigned long auth_tag_len = crypto_aead_authsize(tfm);
u8 iv_and_authTag[32+AESNI_ALIGN];
@@ -1104,6 +1119,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
if (unlikely((req->cryptlen < auth_tag_len) ||
(req->assoclen != 8 && req->assoclen != 12)))
return -EINVAL;
+ if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
+ return -EINVAL;
+ if (unlikely(key_len != AES_KEYSIZE_128 &&
+ key_len != AES_KEYSIZE_192 &&
+ key_len != AES_KEYSIZE_256))
+ return -EINVAL;
+
/* Assuming we are supporting rfc4106 64-bit extended */
/* sequence numbers We need to have the AAD length */
/* equal to 8 or 12 bytes */
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 38a14f818ef1..d6fc59aaaadf 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -504,6 +504,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
MODULE_ALIAS_CRYPTO("des3_ede");
MODULE_ALIAS_CRYPTO("des3_ede-asm");
-MODULE_ALIAS_CRYPTO("des");
-MODULE_ALIAS_CRYPTO("des-asm");
MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");