summaryrefslogtreecommitdiff
path: root/arch/arm/lib
diff options
context:
space:
mode:
authorChandler Zhang <chazhang@nvidia.com>2013-03-15 20:02:59 +0800
committerDan Willemsen <dwillemsen@nvidia.com>2013-09-14 13:05:32 -0700
commitdfe6bcb09f60516fa57b4eb8900b114c6aa7508a (patch)
treeddfb046fca4742615fd9dc94a6d082b6ecd6eee4 /arch/arm/lib
parentce62ca5a7c32518039015b86e873bde49b286746 (diff)
arch: arm: lib: optimize memcpy for cortex-A15
The LDRD/STRD instruction is faster than LDM/STM on Cortex-A15. Also optimized preload cache size for Cortex-A15. Added USE_LDRDSTRD_OVER_LDMSTM to turn on LDRD/STRD optimization. Added ARM_PLD_SIZE, default 32. Should set to 64 for Cortex-A15. Bug 1185248 Change-Id: I4fa8c25bcd9b7823a11018817a4d17e3357ae681 Signed-off-by: Chandler Zhang <chazhang@nvidia.com> Reviewed-on: http://git-master/r/211599 GVS: Gerrit_Virtual_Submit Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/copy_from_user.S11
-rw-r--r--arch/arm/lib/copy_template.S45
-rw-r--r--arch/arm/lib/copy_to_user.S11
-rw-r--r--arch/arm/lib/memcpy.S11
4 files changed, 76 insertions, 2 deletions
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index 66a477a3e3cc..317f5e3fe43b 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -68,6 +68,17 @@
stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
.endm
+#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM
+ .macro cpy8w dst src reg1 reg2 abort
+ .irp offset, #0, #8, #16, #24
+ ldr1w \src, \reg1, \abort
+ ldr1w \src, \reg2, \abort
+ strd \reg1, \reg2, [\dst, \offset]
+ .endr
+ add \dst, \dst, #32
+ .endm
+#endif
+
.macro str1b ptr reg cond=al abort
str\cond\()b \reg, [\ptr], #1
.endm
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..54c129673d5a 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -34,6 +34,11 @@
* in provided registers and increments 'ptr' past those words.
* The'abort' argument is used for fixup tables.
*
+ * cpy8w dst src reg1 reg2 abort
+ *
+ * This loads eight words starting from 'src' and stores them in 'dst'
+ * The 'abort' argument is used for fixup tables.
+ *
* ldr1b ptr reg cond abort
*
* Similar to ldr1w, but it loads a byte and increments 'ptr' one byte.
@@ -66,7 +71,6 @@
* than one 32bit instruction in Thumb-2)
*/
-
enter r4, lr
subs r2, r2, #4
@@ -90,6 +94,38 @@
CALGN( add pc, r4, ip )
PLD( pld [r1, #0] )
+#if CONFIG_ARM_PLD_64BYTE
+2: PLD( cmp r2, #32 )
+ PLD( blt .32cpy )
+.64cpy: PLD( subs r2, r2, #224 )
+ PLD( pld [r1, #60] )
+ PLD( blt 4f )
+ PLD( pld [r1, #124] )
+ PLD( pld [r1, #188] )
+3: PLD( pld [r1, #252] )
+#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM
+4: cpy8w r0, r1, r4, r5, abort = 20f
+ cpy8w r0, r1, r4, r5, abort = 20f
+ subs r2, r2, #64
+#else
+4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f
+ str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f
+ ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f
+ subs r2, r2, #64
+ str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f
+#endif
+ bge 3b
+ PLD( cmn r2, #192 )
+ PLD( bge 4b )
+ PLD( cmn r2, #224 )
+ PLD( blt 5f )
+#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM
+.32cpy: cpy8w r0, r1, r4, r5, abort = 20f
+#else
+.32cpy: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f
+ str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f
+#endif
+#else
2: PLD( subs r2, r2, #96 )
PLD( pld [r1, #28] )
PLD( blt 4f )
@@ -97,13 +133,18 @@
PLD( pld [r1, #92] )
3: PLD( pld [r1, #124] )
+#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM
+4: cpy8w r0, r1, r4, r5, abort = 20f
+ subs r2, r2, #32
+#else
4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
subs r2, r2, #32
str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+#endif
bge 3b
PLD( cmn r2, #96 )
PLD( bge 4b )
-
+#endif
5: ands ip, r2, #28
rsb ip, ip, #32
#if LDR1W_SHIFT > 0
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index d066df686e17..185460fc4a1f 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -71,6 +71,17 @@
str1w \ptr, \reg8, \abort
.endm
+#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM
+ .macro cpy8w dst src reg1 reg2 abort
+ .irp offset, #0, #8, #16, #24
+ ldrd \reg1, \reg2, [\src, \offset]
+ str1w \dst, \reg1, \abort
+ str1w \dst, \reg2, \abort
+ .endr
+ add \src, \src, #32
+ .endm
+#endif
+
.macro str1b ptr reg cond=al abort
strusr \reg, \ptr, 1, \cond, abort=\abort
.endm
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index a9b9e2287a09..7782a37c1529 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -40,6 +40,17 @@
stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
.endm
+#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM
+ .macro cpy8w dst src reg1 reg2 abort
+ .irp offset, #0, #8, #16, #24
+ ldrd \reg1, \reg2, [\src, \offset]
+ strd \reg1, \reg2, [\dst, \offset]
+ .endr
+ add \src, \src, #32
+ add \dst, \dst, #32
+ .endm
+#endif
+
.macro str1b ptr reg cond=al abort
str\cond\()b \reg, [\ptr], #1
.endm