diff options
author | Chandler Zhang <chazhang@nvidia.com> | 2013-03-15 20:02:59 +0800 |
---|---|---|
committer | Dan Willemsen <dwillemsen@nvidia.com> | 2013-09-14 13:05:32 -0700 |
commit | dfe6bcb09f60516fa57b4eb8900b114c6aa7508a (patch) | |
tree | ddfb046fca4742615fd9dc94a6d082b6ecd6eee4 /arch/arm/lib | |
parent | ce62ca5a7c32518039015b86e873bde49b286746 (diff) |
arch: arm: lib: optimize memcpy for cortex-A15
The LDRD/STRD instruction is faster than LDM/STM on Cortex-A15.
Also optimized preload cache size for Cortex-A15.
Added USE_LDRDSTRD_OVER_LDMSTM to turn on LDRD/STRD optimization.
Added ARM_PLD_SIZE, default 32. Should set to 64 for Cortex-A15.
Bug 1185248
Change-Id: I4fa8c25bcd9b7823a11018817a4d17e3357ae681
Signed-off-by: Chandler Zhang <chazhang@nvidia.com>
Reviewed-on: http://git-master/r/211599
GVS: Gerrit_Virtual_Submit
Reviewed-by: Krishna Reddy <vdumpa@nvidia.com>
Diffstat (limited to 'arch/arm/lib')
-rw-r--r-- | arch/arm/lib/copy_from_user.S | 11 | ||||
-rw-r--r-- | arch/arm/lib/copy_template.S | 45 | ||||
-rw-r--r-- | arch/arm/lib/copy_to_user.S | 11 | ||||
-rw-r--r-- | arch/arm/lib/memcpy.S | 11 |
4 files changed, 76 insertions, 2 deletions
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 66a477a3e3cc..317f5e3fe43b 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -68,6 +68,17 @@ stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} .endm +#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM + .macro cpy8w dst src reg1 reg2 abort + .irp offset, #0, #8, #16, #24 + ldr1w \src, \reg1, \abort + ldr1w \src, \reg2, \abort + strd \reg1, \reg2, [\dst, \offset] + .endr + add \dst, \dst, #32 + .endm +#endif + .macro str1b ptr reg cond=al abort str\cond\()b \reg, [\ptr], #1 .endm diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb007..54c129673d5a 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -34,6 +34,11 @@ * in provided registers and increments 'ptr' past those words. * The'abort' argument is used for fixup tables. * + * cpy8w dst src reg1 reg2 abort + * + * This loads eight words starting from 'src' and stores them in 'dst' + * The 'abort' argument is used for fixup tables. + * * ldr1b ptr reg cond abort * * Similar to ldr1w, but it loads a byte and increments 'ptr' one byte. @@ -66,7 +71,6 @@ * than one 32bit instruction in Thumb-2) */ - enter r4, lr subs r2, r2, #4 @@ -90,6 +94,38 @@ CALGN( add pc, r4, ip ) PLD( pld [r1, #0] ) +#if CONFIG_ARM_PLD_64BYTE +2: PLD( cmp r2, #32 ) + PLD( blt .32cpy ) +.64cpy: PLD( subs r2, r2, #224 ) + PLD( pld [r1, #60] ) + PLD( blt 4f ) + PLD( pld [r1, #124] ) + PLD( pld [r1, #188] ) +3: PLD( pld [r1, #252] ) +#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM +4: cpy8w r0, r1, r4, r5, abort = 20f + cpy8w r0, r1, r4, r5, abort = 20f + subs r2, r2, #64 +#else +4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f + str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f + ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f + subs r2, r2, #64 + str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f +#endif + bge 3b + PLD( cmn r2, #192 ) + PLD( bge 4b ) + PLD( cmn r2, #224 ) + PLD( blt 5f ) +#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM +.32cpy: cpy8w r0, r1, r4, r5, abort = 20f +#else +.32cpy: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f + str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort = 20f +#endif +#else 2: PLD( subs r2, r2, #96 ) PLD( pld [r1, #28] ) PLD( blt 4f ) @@ -97,13 +133,18 @@ PLD( pld [r1, #92] ) 3: PLD( pld [r1, #124] ) +#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM +4: cpy8w r0, r1, r4, r5, abort = 20f + subs r2, r2, #32 +#else 4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f subs r2, r2, #32 str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f +#endif bge 3b PLD( cmn r2, #96 ) PLD( bge 4b ) - +#endif 5: ands ip, r2, #28 rsb ip, ip, #32 #if LDR1W_SHIFT > 0 diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index d066df686e17..185460fc4a1f 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -71,6 +71,17 @@ str1w \ptr, \reg8, \abort .endm +#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM + .macro cpy8w dst src reg1 reg2 abort + .irp offset, #0, #8, #16, #24 + ldrd \reg1, \reg2, [\src, \offset] + str1w \dst, \reg1, \abort + str1w \dst, \reg2, \abort + .endr + add \src, \src, #32 + .endm +#endif + .macro str1b ptr reg cond=al abort strusr \reg, \ptr, 1, \cond, abort=\abort .endm diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index a9b9e2287a09..7782a37c1529 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -40,6 +40,17 @@ stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8} .endm +#ifdef CONFIG_USE_LDRDSTRD_OVER_LDMSTM + .macro cpy8w dst src reg1 reg2 abort + .irp offset, #0, #8, #16, #24 + ldrd \reg1, \reg2, [\src, \offset] + strd \reg1, \reg2, [\dst, \offset] + .endr + add \src, \src, #32 + add \dst, \dst, #32 + .endm +#endif + .macro str1b ptr reg cond=al abort str\cond\()b \reg, [\ptr], #1 .endm |