diff options
author | Gary King <gking@nvidia.com> | 2009-12-07 16:18:05 -0800 |
---|---|---|
committer | Gary King <gking@nvidia.com> | 2009-12-07 16:18:05 -0800 |
commit | bb11d46b72a4a2b53f890faa970a380cf0084bf2 (patch) | |
tree | 2d9d2955e069fed69febbe567a835fcc48ec6ec6 /arch/arm/lib | |
parent | e1b5e49debba7174e7b9c48195de8abfd54911dd (diff) | |
parent | 74ece4056571443eef30d4dff62180944b5a39d8 (diff) |
Merge commit 'arm/2.6.28-arm' into android-tegra-2.6.29
Conflicts:
MAINTAINERS
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/compressed/head.S
arch/arm/common/Makefile
arch/arm/configs/realview-smp_defconfig
arch/arm/configs/realview_defconfig
arch/arm/configs/versatile_defconfig
arch/arm/include/asm/elf.h
arch/arm/include/asm/uaccess.h
arch/arm/kernel/module.c
arch/arm/kernel/signal.c
arch/arm/mach-realview/Kconfig
arch/arm/mach-realview/Makefile
arch/arm/mach-realview/core.c
arch/arm/mach-realview/core.h
arch/arm/mach-realview/include/mach/board-pba8.h
arch/arm/mach-realview/include/mach/debug-macro.S
arch/arm/mach-realview/include/mach/hardware.h
arch/arm/mach-realview/include/mach/irqs.h
arch/arm/mach-realview/include/mach/memory.h
arch/arm/mach-realview/include/mach/uncompress.h
arch/arm/mach-realview/localtimer.c
arch/arm/mach-realview/platsmp.c
arch/arm/mach-realview/realview_eb.c
arch/arm/mach-realview/realview_pb1176.c
arch/arm/mach-realview/realview_pb11mp.c
arch/arm/mach-realview/realview_pba8.c
arch/arm/mm/Kconfig
arch/arm/mm/copypage-v6.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/proc-v7.S
arch/arm/oprofile/op_model_mpcore.c
arch/arm/tools/mach-types
arch/arm/vfp/vfpmodule.c
drivers/mtd/maps/integrator-flash.c
drivers/net/smsc911x.c
drivers/net/smsc911x.h
Diffstat (limited to 'arch/arm/lib')
38 files changed, 354 insertions, 113 deletions
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 1154d924080b..b18944b85e4b 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -41,9 +41,12 @@ ENTRY(__aeabi_llsl) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi ah, ah, lsl r2 movpl ah, al, lsl r3 - orrmi ah, ah, al, lsr ip + ARM( orrmi ah, ah, al, lsr ip ) + THUMB( lsrmi r3, al, ip ) + THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 mov pc, lr diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 9f8b35572f8c..0d5ace74dd9d 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -41,9 +41,12 @@ ENTRY(__aeabi_lasr) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi al, al, lsr r2 movpl al, ah, asr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 mov pc, lr diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index b0951d0e8b2c..41c4d3561b8f 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -28,7 +28,7 @@ ENTRY(__backtrace) ENTRY(c_backtrace) -#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) +#if defined(CONFIG_THUMB2_KERNEL) || !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) mov pc, lr ENDPROC(__backtrace) ENDPROC(c_backtrace) @@ -38,7 +38,10 @@ ENDPROC(c_backtrace) beq no_frame @ we have no stack frames tst r1, #0x10 @ 26 or 32-bit mode? - moveq mask, #0xfc000003 @ mask for 26-bit + itte eq + ARM( moveq mask, #0xfc000003 ) + THUMB( moveq mask, #0xfc000000 ) + THUMB( orreq mask, #0x03 ) movne mask, #0 @ mask for 32-bit 1: stmfd sp!, {pc} @ calculate offset of PC stored @@ -73,6 +76,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 @ adjust saved 'pc' back one teq r3, r2, lsr #10 @ instruction + ite ne subne r0, sv_pc, #4 @ allow for mov subeq r0, sv_pc, #8 @ allow for mov + stmia @@ -84,6 +88,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 teq r3, r1, lsr #10 + ittt eq ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg bleq .Ldumpstm @ dump saved registers @@ -91,6 +96,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #10 + itt eq subeq r0, frame, #16 bleq .Ldumpstm @ dump saved registers @@ -126,10 +132,13 @@ ENDPROC(c_backtrace) mov reg, #10 mov r7, #0 1: mov r3, #1 - tst instr, r3, lsl reg + ARM( tst instr, r3, lsl reg ) + THUMB( lsl r3, reg ) + THUMB( tst instr, r3 ) beq 2f add r7, r7, #1 teq r7, #6 + itte eq moveq r7, #1 moveq r1, #'\n' movne r1, #' ' @@ -140,6 +149,7 @@ ENDPROC(c_backtrace) 2: subs reg, reg, #1 bpl 1b teq r7, #0 + itt ne adrne r0, .Lcr blne printk ldmfd sp!, {instr, reg, stack, r7, pc} diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 2e787d40d599..28c7e4e1ef76 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -5,6 +5,13 @@ and r3, r0, #7 @ Get bit offset add r1, r1, r0, lsr #3 @ Get byte offset mov r3, r2, lsl r3 +#ifdef CONFIG_ARM_ERRATA_351422 + mrc p15, 0, r0, c0, c0, 5 + and r0, r0, #0xf + mov r0, r0, lsl #8 +3: subs r0, r0, #1 + bpl 3b +#endif 1: ldrexb r2, [r1] \instr r2, r2, r3 strexb r0, r2, [r1] @@ -13,18 +20,29 @@ mov pc, lr .endm - .macro testop, instr, store + .macro testop, instr, store, cond=al and r3, r0, #7 @ Get bit offset mov r2, #1 add r1, r1, r0, lsr #3 @ Get byte offset mov r3, r2, lsl r3 @ create mask +#ifdef CONFIG_ARM_ERRATA_351422 + mrc p15, 0, r0, c0, c0, 5 + and r0, r0, #0xf + mov r0, r0, lsl #8 +3: subs r0, r0, #1 + bpl 3b +#endif 1: ldrexb r2, [r1] ands r0, r2, r3 @ save old value of bit - \instr r2, r2, r3 @ toggle bit + .ifnc \cond,al + it \cond + .endif + \instr r2, r2, r3 @ toggle bit strexb ip, r2, [r1] cmp ip, #0 bne 1b cmp r0, #0 + it ne movne r0, #1 2: mov pc, lr .endm @@ -49,7 +67,7 @@ * Note: we can trivially conditionalise the store instruction * to avoid dirtying the data cache. */ - .macro testop, instr, store + .macro testop, instr, store, cond=al add r1, r1, r0, lsr #3 and r3, r0, #7 mov r0, #1 diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 4d6bc71231f3..2147bec77ce4 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -26,21 +26,20 @@ ENTRY(__clear_user) ands ip, r0, #3 beq 1f cmp ip, #2 -USER( strbt r2, [r0], #1) -USER( strlebt r2, [r0], #1) -USER( strltbt r2, [r0], #1) + strusr r2, r0, 1 + strusr r2, r0, 1, le + strusr r2, r0, 1, lt rsb ip, ip, #4 sub r1, r1, ip @ 7 6 5 4 3 2 1 1: subs r1, r1, #8 @ -1 -2 -3 -4 -5 -6 -7 -USER( strplt r2, [r0], #4) -USER( strplt r2, [r0], #4) + strusr r2, r0, 4, pl, rept=2 bpl 1b adds r1, r1, #4 @ 3 2 1 0 -1 -2 -3 -USER( strplt r2, [r0], #4) + strusr r2, r0, 4, pl 2: tst r1, #2 @ 1x 1x 0x 0x 1x 1x 0x -USER( strnebt r2, [r0], #1) -USER( strnebt r2, [r0], #1) + strusr r2, r0, 1, ne, rept=2 tst r1, #1 @ x1 x0 x1 x0 x1 x0 x1 + it ne @ explicit IT needed for the label USER( strnebt r2, [r0]) mov r0, #0 ldmfd sp!, {r1, pc} diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 56799a165cc4..9a7a16426428 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -33,11 +33,15 @@ * Number of bytes NOT copied. */ +#ifndef CONFIG_THUMB2_KERNEL +#define LDR1W_SHIFT 0 +#else +#define LDR1W_SHIFT 1 +#endif +#define STR1W_SHIFT 0 + .macro ldr1w ptr reg abort -100: ldrt \reg, [\ptr], #4 - .section __ex_table, "a" - .long 100b, \abort - .previous + ldrusr \reg, \ptr, 4, abort=\abort .endm .macro ldr4w ptr reg1 reg2 reg3 reg4 abort @@ -53,14 +57,11 @@ .endm .macro ldr1b ptr reg cond=al abort -100: ldr\cond\()bt \reg, [\ptr], #1 - .section __ex_table, "a" - .long 100b, \abort - .previous + ldrusr \reg, \ptr, 1, \cond, abort=\abort .endm .macro str1w ptr reg abort - str \reg, [\ptr], #4 + W(str) \reg, [\ptr], #4 .endm .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort @@ -68,6 +69,9 @@ .endm .macro str1b ptr reg cond=al abort + .ifnc \cond,al + it \cond + .endif str\cond\()b \reg, [\ptr], #1 .endm diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ae04db1ca4f..1c57a034cadf 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -39,8 +39,10 @@ ENTRY(copy_page) ldmia r1!, {r3, r4, ip, lr} @ 4 subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 + itt gt ldmgtia r1!, {r3, r4, ip, lr} @ 4 bgt 1b @ 1 + PLD( itt eq ) PLD( ldmeqia r1!, {r3, r4, ip, lr} ) PLD( beq 2b ) ldmfd sp!, {r4, pc} @ 3 diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 139cce646055..8e8fc03f55bd 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -57,6 +57,13 @@ * * Restore registers with the values previously saved with the * 'preserv' macro. Called upon code termination. + * + * LDR1W_SHIFT + * STR1W_SHIFT + * + * Correction to be applied to the "ip" register when branching into + * the ldr1w or str1w instructions (some of these macros may expand to + * than one 32bit instruction in Thumb-2) */ @@ -99,9 +106,16 @@ 5: ands ip, r2, #28 rsb ip, ip, #32 +#if LDR1W_SHIFT > 0 + lsl ip, ip, #LDR1W_SHIFT +#endif + it ne addne pc, pc, ip @ C is always clear here b 7f -6: nop +6: + .rept (1 << LDR1W_SHIFT) + W(nop) + .endr ldr1w r1, r3, abort=20f ldr1w r1, r4, abort=20f ldr1w r1, r5, abort=20f @@ -110,9 +124,16 @@ ldr1w r1, r8, abort=20f ldr1w r1, lr, abort=20f +#if LDR1W_SHIFT < STR1W_SHIFT + lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT +#elif LDR1W_SHIFT > STR1W_SHIFT + lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT +#endif add pc, pc, ip nop - nop + .rept (1 << STR1W_SHIFT) + W(nop) + .endr str1w r0, r3, abort=20f str1w r0, r4, abort=20f str1w r0, r5, abort=20f diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 22f968bbdffd..dc0fe7391527 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -33,8 +33,15 @@ * Number of bytes NOT copied. */ +#define LDR1W_SHIFT 0 +#ifndef CONFIG_THUMB2_KERNEL +#define STR1W_SHIFT 0 +#else +#define STR1W_SHIFT 1 +#endif + .macro ldr1w ptr reg abort - ldr \reg, [\ptr], #4 + W(ldr) \reg, [\ptr], #4 .endm .macro ldr4w ptr reg1 reg2 reg3 reg4 abort @@ -46,14 +53,14 @@ .endm .macro ldr1b ptr reg cond=al abort + .ifnc \cond,al + it \cond + .endif ldr\cond\()b \reg, [\ptr], #1 .endm .macro str1w ptr reg abort -100: strt \reg, [\ptr], #4 - .section __ex_table, "a" - .long 100b, \abort - .previous + strusr \reg, \ptr, 4, abort=\abort .endm .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort @@ -68,10 +75,7 @@ .endm .macro str1b ptr reg cond=al abort -100: str\cond\()bt \reg, [\ptr], #1 - .section __ex_table, "a" - .long 100b, \abort - .previous + strusr \reg, \ptr, 1, \cond, abort=\abort .endm .macro enter reg1 reg2 diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 31d3cb34740d..e9a504e4302e 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -39,6 +39,7 @@ td3 .req lr /* we must have at least one byte. */ tst buf, #1 @ odd address? + itttt ne movne sum, sum, ror #8 ldrneb td0, [buf], #1 subne len, len, #1 @@ -68,25 +69,30 @@ td3 .req lr bne .Lless8_wordlp .Lless8_byte: tst len, #1 @ odd number of bytes + itt ne ldrneb td0, [buf], #1 @ include last byte adcnes sum, sum, td0, put_byte_0 @ update checksum .Ldone: adc r0, sum, #0 @ collect up the last carry ldr td0, [sp], #4 tst td0, #1 @ check buffer alignment + it ne movne r0, r0, ror #8 @ rotate checksum by 8 bits ldr pc, [sp], #4 @ return .Lnot_aligned: tst buf, #1 @ odd address + ittt ne ldrneb td0, [buf], #1 @ make even subne len, len, #1 adcnes sum, sum, td0, put_byte_1 @ update checksum tst buf, #2 @ 32-bit aligned? #if __LINUX_ARM_ARCH__ >= 4 + itt ne ldrneh td0, [buf], #2 @ make 32-bit aligned subne len, len, #2 #else + itttt ne ldrneb td0, [buf], #1 ldrneb ip, [buf], #1 subne len, len, #2 @@ -96,6 +102,7 @@ td3 .req lr orrne td0, ip, td0, lsl #8 #endif #endif + it ne adcnes sum, sum, td0 @ update checksum mov pc, lr @@ -105,10 +112,12 @@ ENTRY(csum_partial) blo .Lless8 @ 8 bytes to copy. tst buf, #1 + it ne movne sum, sum, ror #8 adds sum, sum, #0 @ C = 0 tst buf, #3 @ Test destination alignment + it ne blne .Lnot_aligned @ align destination, return here 1: bics ip, len, #31 diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a09..8e1c141b6524 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -40,6 +40,7 @@ sum .req r3 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst dst, #2 + it eq moveq pc, lr @ dst is now 32bit aligned .Ldst_16bit: load2b r8, ip @@ -94,6 +95,7 @@ FN_ENTRY adds sum, sum, #0 @ C = 0 tst dst, #3 @ Test destination alignment + it ne blne .Ldst_unaligned @ align destination, return here /* @@ -147,6 +149,7 @@ FN_ENTRY strb r5, [dst], #1 mov r5, r4, get_byte_2 .Lexit: tst len, #1 + ittt ne strneb r5, [dst], #1 andne r5, r5, #255 adcnes sum, sum, r5, put_byte_0 @@ -160,6 +163,7 @@ FN_ENTRY .Ldone: adc r0, sum, #0 ldr sum, [sp, #0] @ dst tst sum, #1 + it ne movne r0, r0, ror #8 load_regs diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 14677fb4b0c4..152ed83480f7 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -26,50 +26,28 @@ .endm .macro load1b, reg1 -9999: ldrbt \reg1, [r0], $1 - .section __ex_table, "a" - .align 3 - .long 9999b, 6001f - .previous + ldrusr \reg1, r0, 1 .endm .macro load2b, reg1, reg2 -9999: ldrbt \reg1, [r0], $1 -9998: ldrbt \reg2, [r0], $1 - .section __ex_table, "a" - .long 9999b, 6001f - .long 9998b, 6001f - .previous + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 .endm .macro load1l, reg1 -9999: ldrt \reg1, [r0], $4 - .section __ex_table, "a" - .align 3 - .long 9999b, 6001f - .previous + ldrusr \reg1, r0, 4 .endm .macro load2l, reg1, reg2 -9999: ldrt \reg1, [r0], $4 -9998: ldrt \reg2, [r0], $4 - .section __ex_table, "a" - .long 9999b, 6001f - .long 9998b, 6001f - .previous + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 .endm .macro load4l, reg1, reg2, reg3, reg4 -9999: ldrt \reg1, [r0], $4 -9998: ldrt \reg2, [r0], $4 -9997: ldrt \reg3, [r0], $4 -9996: ldrt \reg4, [r0], $4 - .section __ex_table, "a" - .long 9999b, 6001f - .long 9998b, 6001f - .long 9997b, 6001f - .long 9996b, 6001f - .previous + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 .endm /* @@ -92,14 +70,15 @@ */ .section .fixup,"ax" .align 4 -6001: mov r4, #-EFAULT +9001: mov r4, #-EFAULT ldr r5, [fp, #4] @ *err_ptr str r4, [r5] ldmia sp, {r1, r2} @ retrieve dst, len add r2, r2, r1 mov r0, #0 @ zero the buffer -6002: teq r2, r1 +9002: teq r2, r1 + it ne strneb r0, [r1], #1 - bne 6002b + bne 9002b load_regs .previous diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S index 8d6a8762ab88..fcd87ffe2b1d 100644 --- a/arch/arm/lib/delay.S +++ b/arch/arm/lib/delay.S @@ -31,6 +31,7 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 mov r2, r2, lsr #10 @ max = 0x00007fff mul r0, r2, r0 @ max = 2^32-1 movs r0, r0, lsr #6 + it eq moveq pc, lr /* @@ -58,6 +59,7 @@ ENTRY(__delay) movls pc, lr subs r0, r0, #1 #endif + it hi bhi __delay mov pc, lr ENDPROC(__udelay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index 1425e789ba86..d02268ac7baf 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -84,8 +84,10 @@ ENTRY(__do_div64) @ The division loop for needed upper bit positions. @ Break out early if dividend reaches 0. 2: cmp xh, yl + itt cs orrcs yh, yh, ip subcss xh, xh, yl + it ne movnes ip, ip, lsr #1 mov yl, yl, lsr #1 bne 2b @@ -93,7 +95,9 @@ ENTRY(__do_div64) @ See if we need to handle lower 32-bit result. 3: cmp xh, #0 mov yl, #0 + it eq cmpeq xl, r4 + itt lo movlo xh, xl movlo pc, lr @@ -104,7 +108,9 @@ ENTRY(__do_div64) 4: movs xl, xl, lsl #1 adcs xh, xh, xh beq 6f + it cc cmpcc xh, r4 + itt cs 5: orrcs yl, yl, ip subcs xh, xh, r4 movs ip, ip, lsr #1 @@ -116,6 +122,7 @@ ENTRY(__do_div64) @ Otherwise, if lower part is also null then we are done. 6: bcs 5b cmp xl, #0 + it eq moveq pc, lr @ We still have remainer bits in the low part. Bring them up. @@ -177,13 +184,16 @@ ENTRY(__do_div64) mov yh, xh, lsr ip mov yl, xl, lsr ip rsb ip, ip, #32 - orr yl, yl, xh, lsl ip + ARM( orr yl, yl, xh, lsl ip ) + THUMB( lsl xh, xh, ip ) + THUMB( orr yl, yl, xh ) mov xh, xl, lsl ip mov xh, xh, lsr ip mov pc, lr @ eq -> division by 1: obvious enough... -9: moveq yl, xl +9: itttt eq + moveq yl, xl moveq yh, xh moveq xh, #0 moveq pc, lr diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 8c4defc4f3c4..1e4cbd4e7be9 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -25,7 +25,10 @@ ENTRY(_find_first_zero_bit_le) teq r1, #0 beq 3f mov r2, #0 -1: ldrb r3, [r0, r2, lsr #3] +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) eors r3, r3, #0xff @ invert bits bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer @@ -44,7 +47,9 @@ ENTRY(_find_next_zero_bit_le) beq 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine - ldrb r3, [r0, r2, lsr #3] + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) eor r3, r3, #0xff @ now looking for a 1 bit movs r3, r3, lsr ip @ shift off unused bits bne .L_found @@ -61,7 +66,10 @@ ENTRY(_find_first_bit_le) teq r1, #0 beq 3f mov r2, #0 -1: ldrb r3, [r0, r2, lsr #3] +1: + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3 bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer @@ -80,7 +88,9 @@ ENTRY(_find_next_bit_le) beq 3b ands ip, r2, #7 beq 1b @ If new byte, goto old routine - ldrb r3, [r0, r2, lsr #3] + ARM( ldrb r3, [r0, r2, lsr #3] ) + THUMB( lsr r3, r2, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3, lsr ip @ shift off unused bits bne .L_found orr r2, r2, #7 @ if zero, then no bits here @@ -95,7 +105,9 @@ ENTRY(_find_first_zero_bit_be) beq 3f mov r2, #0 1: eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) eors r3, r3, #0xff @ invert bits bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer @@ -111,7 +123,9 @@ ENTRY(_find_next_zero_bit_be) ands ip, r2, #7 beq 1b @ If new byte, goto old routine eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) eor r3, r3, #0xff @ now looking for a 1 bit movs r3, r3, lsr ip @ shift off unused bits bne .L_found @@ -125,7 +139,9 @@ ENTRY(_find_first_bit_be) beq 3f mov r2, #0 1: eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3 bne .L_found @ any now set - found zero bit add r2, r2, #8 @ next bit pointer @@ -141,7 +157,9 @@ ENTRY(_find_next_bit_be) ands ip, r2, #7 beq 1b @ If new byte, goto old routine eor r3, r2, #0x18 @ big endian byte ordering - ldrb r3, [r0, r3, lsr #3] + ARM( ldrb r3, [r0, r3, lsr #3] ) + THUMB( lsr r3, #3 ) + THUMB( ldrb r3, [r0, r3] ) movs r3, r3, lsr ip @ shift off unused bits bne .L_found orr r2, r2, #7 @ if zero, then no bits here diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 6763088b7607..3e1b98056c50 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -36,7 +36,10 @@ ENTRY(__get_user_1) ENDPROC(__get_user_1) ENTRY(__get_user_2) -2: ldrbt r2, [r0], #1 +2: + ARM( ldrbt r2, [r0], #1 ) + THUMB( ldrbt r2, [r0] ) + THUMB( add r0, #1 ) 3: ldrbt r3, [r0] #ifndef __ARMEB__ orr r2, r2, r3, lsl #8 diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 9f4238987fe9..45229e46ed5b 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -12,12 +12,15 @@ .Linsb_align: rsb ip, ip, #4 cmp ip, r2 + it gt movgt ip, r2 cmp ip, #2 ldrb r3, [r0] strb r3, [r1], #1 + itt ge ldrgeb r3, [r0] strgeb r3, [r1], #1 + itt gt ldrgtb r3, [r0] strgtb r3, [r1], #1 subs r2, r2, ip @@ -25,6 +28,7 @@ ENTRY(__raw_readsb) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne .Linsb_align @@ -72,6 +76,7 @@ ENTRY(__raw_readsb) bpl .Linsb_16_lp tst r2, #15 + it eq ldmeqfd sp!, {r4 - r6, pc} .Linsb_no_16: tst r2, #8 @@ -109,13 +114,16 @@ ENTRY(__raw_readsb) str r3, [r1], #4 .Linsb_no_4: ands r2, r2, #3 + it eq ldmeqfd sp!, {r4 - r6, pc} cmp r2, #2 ldrb r3, [r0] strb r3, [r1], #1 + itt ge ldrgeb r3, [r0] strgeb r3, [r1], #1 + itt gt ldrgtb r3, [r0] strgtb r3, [r1] diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4b..1f02e66d079c 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -12,6 +12,7 @@ ENTRY(__raw_readsl) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne 3f @@ -28,9 +29,11 @@ ENTRY(__raw_readsl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 + ittt cs ldrcs r3, [r0, #0] ldrcs ip, [r0, #0] stmcsia r1!, {r3, ip} + itt ne ldrne r3, [r0, #0] strne r3, [r1, #0] mov pc, lr @@ -48,6 +51,7 @@ ENTRY(__raw_readsl) 4: subs r2, r2, #1 mov ip, r3, pull #24 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #8 strne ip, [r1], #4 @@ -56,6 +60,7 @@ ENTRY(__raw_readsl) 5: subs r2, r2, #1 mov ip, r3, pull #16 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #16 strne ip, [r1], #4 @@ -64,6 +69,7 @@ ENTRY(__raw_readsl) 6: subs r2, r2, #1 mov ip, r3, pull #8 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #24 strne ip, [r1], #4 diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index 1f393d42593d..9db32f0541da 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -26,6 +26,7 @@ ENTRY(__raw_readsw) teq r2, #0 + it eq moveq pc, lr tst r1, #3 bne .Linsw_align @@ -76,7 +77,8 @@ ENTRY(__raw_readsw) pack r3, r3, ip str r3, [r1], #4 -.Lno_insw_2: ldrneh r3, [r0] +.Lno_insw_2: itt ne + ldrneh r3, [r0] strneh r3, [r1] ldmfd sp!, {r4, r5, pc} @@ -94,6 +96,7 @@ ENTRY(__raw_readsw) #endif .Linsw_noalign: stmfd sp!, {r4, lr} + it cc ldrccb ip, [r1, #-1]! bcc 1f @@ -121,6 +124,7 @@ ENTRY(__raw_readsw) 3: tst r2, #1 strb ip, [r1], #1 + itttt ne ldrneh ip, [r0] _BE_ONLY_( movne ip, ip, ror #8 ) strneb ip, [r1], #1 diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index 68b92f4acaeb..5fad6b0c7f05 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -32,12 +32,15 @@ .Loutsb_align: rsb ip, ip, #4 cmp ip, r2 + it gt movgt ip, r2 cmp ip, #2 ldrb r3, [r1], #1 strb r3, [r0] + itt ge ldrgeb r3, [r1], #1 strgeb r3, [r0] + itt gt ldrgtb r3, [r1], #1 strgtb r3, [r0] subs r2, r2, ip @@ -45,6 +48,7 @@ ENTRY(__raw_writesb) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne .Loutsb_align @@ -64,6 +68,7 @@ ENTRY(__raw_writesb) bpl .Loutsb_16_lp tst r2, #15 + it eq ldmeqfd sp!, {r4, r5, pc} .Loutsb_no_16: tst r2, #8 @@ -80,13 +85,16 @@ ENTRY(__raw_writesb) outword r3 .Loutsb_no_4: ands r2, r2, #3 + it eq ldmeqfd sp!, {r4, r5, pc} cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0] + itt ge ldrgeb r3, [r1], #1 strgeb r3, [r0] + itt gt ldrgtb r3, [r1] strgtb r3, [r0] diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725c..ced1d9169090 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -12,6 +12,7 @@ ENTRY(__raw_writesl) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne 3f @@ -28,10 +29,14 @@ ENTRY(__raw_writesl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 + itt cs ldmcsia r1!, {r3, ip} strcs r3, [r0, #0] + it ne ldrne r3, [r1, #0] + it cs strcs ip, [r0, #0] + it ne strne r3, [r0, #0] mov pc, lr diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index d6585612c86b..bb8530310ff3 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -31,6 +31,7 @@ ENTRY(__raw_writesw) teq r2, #0 + it eq moveq pc, lr ands r3, r1, #3 bne .Loutsw_align @@ -61,7 +62,8 @@ ENTRY(__raw_writesw) ldr r3, [r1], #4 outword r3 -.Lno_outsw_2: ldrneh r3, [r1] +.Lno_outsw_2: itt ne + ldrneh r3, [r1] strneh r3, [r0] ldmfd sp!, {r4, r5, pc} @@ -75,7 +77,11 @@ ENTRY(__raw_writesw) #endif .Loutsw_noalign: - ldr r3, [r1, -r3]! + ARM( ldr r3, [r1, -r3]! ) + THUMB( rsb r3, r3, #0 ) + THUMB( ldr r3, [r1, r3] ) + THUMB( sub r1, r3 ) + it cs subcs r2, r2, #1 bcs 2f subs r2, r2, #2 @@ -91,7 +97,8 @@ ENTRY(__raw_writesw) bpl 1b tst r2, #1 -3: movne ip, r3, lsr #8 +3: itt ne + movne ip, r3, lsr #8 strneh ip, [r0] mov pc, lr ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 67964bcfc854..1eb73e769c6c 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -56,6 +56,7 @@ Boston, MA 02111-1307, USA. */ @ at the left end of each 4 bit nibbles in the division loop @ to save one loop in most cases. tst \divisor, #0xe0000000 + itte eq moveq \divisor, \divisor, lsl #3 moveq \curbit, #8 movne \curbit, #1 @@ -65,6 +66,7 @@ Boston, MA 02111-1307, USA. */ @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 movlo \curbit, \curbit, lsl #4 @@ -73,6 +75,7 @@ Boston, MA 02111-1307, USA. */ @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 movlo \curbit, \curbit, lsl #1 @@ -84,19 +87,25 @@ Boston, MA 02111-1307, USA. */ @ Division loop 1: cmp \dividend, \divisor + itt hs subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 + itt hs subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 + itt hs subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 + itt hs subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? + it ne movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + it ne movne \divisor, \divisor, lsr #4 bne 1b @@ -113,19 +122,24 @@ Boston, MA 02111-1307, USA. */ #else cmp \divisor, #(1 << 16) + itt hs movhs \divisor, \divisor, lsr #16 movhs \order, #16 + it lo movlo \order, #0 cmp \divisor, #(1 << 8) + itt hs movhs \divisor, \divisor, lsr #8 addhs \order, \order, #8 cmp \divisor, #(1 << 4) + itt hs movhs \divisor, \divisor, lsr #4 addhs \order, \order, #4 cmp \divisor, #(1 << 2) + ite hi addhi \order, \order, #3 addls \order, \order, \divisor, lsr #1 @@ -152,6 +166,7 @@ Boston, MA 02111-1307, USA. */ @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 addlo \order, \order, #4 @@ -160,6 +175,7 @@ Boston, MA 02111-1307, USA. */ @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 addlo \order, \order, #1 @@ -173,19 +189,25 @@ Boston, MA 02111-1307, USA. */ blt 2f 1: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor cmp \dividend, \divisor, lsr #1 + it hs subhs \dividend, \dividend, \divisor, lsr #1 cmp \dividend, \divisor, lsr #2 + it hs subhs \dividend, \dividend, \divisor, lsr #2 cmp \dividend, \divisor, lsr #3 + it hs subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 + it ge subges \order, \order, #4 bge 1b tst \order, #3 + it ne teqne \dividend, #0 beq 5f @@ -194,12 +216,15 @@ Boston, MA 02111-1307, USA. */ blt 4f beq 3f cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 3: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 4: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor 5: .endm @@ -209,6 +234,7 @@ ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) subs r2, r1, #1 + it eq moveq pc, lr bcc Ldiv0 cmp r0, r1 @@ -221,7 +247,8 @@ ENTRY(__aeabi_uidiv) mov r0, r2 mov pc, lr -11: moveq r0, #1 +11: ite eq + moveq r0, #1 movne r0, #0 mov pc, lr @@ -237,10 +264,14 @@ ENTRY(__umodsi3) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 + ite ne cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 + it hi tsthi r1, r2 @ see if divisor is power of 2 + it eq andeq r0, r0, r2 + it ls movls pc, lr ARM_MOD_BODY r0, r1, r2, r3 @@ -255,10 +286,12 @@ ENTRY(__aeabi_idiv) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 + it mi rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 + it mi rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f @@ -268,14 +301,18 @@ ENTRY(__aeabi_idiv) ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 + it mi rsbmi r0, r0, #0 mov pc, lr 10: teq ip, r0 @ same sign ? + it mi rsbmi r0, r0, #0 mov pc, lr -11: movlo r0, #0 +11: it lo + movlo r0, #0 + itt eq moveq r0, ip, asr #31 orreq r0, r0, #1 mov pc, lr @@ -284,6 +321,7 @@ ENTRY(__aeabi_idiv) cmp ip, #0 mov r0, r3, lsr r2 + it mi rsbmi r0, r0, #0 mov pc, lr @@ -294,19 +332,25 @@ ENTRY(__modsi3) cmp r1, #0 beq Ldiv0 + it mi rsbmi r1, r1, #0 @ loops below use unsigned. movs ip, r0 @ preserve sign of dividend + it mi rsbmi r0, r0, #0 @ if negative make positive subs r2, r1, #1 @ compare divisor with 1 + ite ne cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 + it hi tsthi r1, r2 @ see if divisor is power of 2 + it eq andeq r0, r0, r2 bls 10f ARM_MOD_BODY r0, r1, r2, r3 10: cmp ip, #0 + it mi rsbmi r0, r0, #0 mov pc, lr diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index 99ea338bf87c..57db3a265e5b 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -41,9 +41,12 @@ ENTRY(__aeabi_llsr) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi al, al, lsr r2 movpl al, ah, lsr r3 - orrmi al, al, ah, lsl ip + ARM( orrmi al, al, ah, lsl ip ) + THUMB( lslmi r3, ah, ip ) + THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 mov pc, lr diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index 1da86991d700..0d1d596ad8cd 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -21,6 +21,7 @@ ENTRY(memchr) teq r3, r1 bne 1b sub r0, r0, #1 -2: movne r0, #0 +2: it ne + movne r0, #0 mov pc, lr ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index e0d002641d3f..c7a810dee294 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -13,8 +13,11 @@ #include <linux/linkage.h> #include <asm/assembler.h> +#define LDR1W_SHIFT 0 +#define STR1W_SHIFT 0 + .macro ldr1w ptr reg abort - ldr \reg, [\ptr], #4 + W(ldr) \reg, [\ptr], #4 .endm .macro ldr4w ptr reg1 reg2 reg3 reg4 abort @@ -26,11 +29,16 @@ .endm .macro ldr1b ptr reg cond=al abort + .ifnc \cond,al + it \cond ldr\cond\()b \reg, [\ptr], #1 + .else + ldrb \reg, [\ptr], #1 + .endif .endm .macro str1w ptr reg abort - str \reg, [\ptr], #4 + W(str) \reg, [\ptr], #4 .endm .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort @@ -38,7 +46,12 @@ .endm .macro str1b ptr reg cond=al abort + .ifnc \cond,al + it \cond str\cond\()b \reg, [\ptr], #1 + .else + strb \reg, [\ptr], #1 + .endif .endm .macro enter reg1 reg2 diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 12549187088c..191a5dc41596 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -29,7 +29,9 @@ ENTRY(memmove) subs ip, r0, r1 + it hi cmphi r2, ip + it ls bls memcpy stmfd sp!, {r0, r4, lr} @@ -72,46 +74,55 @@ ENTRY(memmove) 5: ands ip, r2, #28 rsb ip, ip, #32 + it ne addne pc, pc, ip @ C is always clear here b 7f 6: nop - ldr r3, [r1, #-4]! - ldr r4, [r1, #-4]! - ldr r5, [r1, #-4]! - ldr r6, [r1, #-4]! - ldr r7, [r1, #-4]! - ldr r8, [r1, #-4]! - ldr lr, [r1, #-4]! + W(ldr) r3, [r1, #-4]! + W(ldr) r4, [r1, #-4]! + W(ldr) r5, [r1, #-4]! + W(ldr) r6, [r1, #-4]! + W(ldr) r7, [r1, #-4]! + W(ldr) r8, [r1, #-4]! + W(ldr) lr, [r1, #-4]! add pc, pc, ip nop nop - str r3, [r0, #-4]! - str r4, [r0, #-4]! - str r5, [r0, #-4]! - str r6, [r0, #-4]! - str r7, [r0, #-4]! - str r8, [r0, #-4]! - str lr, [r0, #-4]! + W(str) r3, [r0, #-4]! + W(str) r4, [r0, #-4]! + W(str) r5, [r0, #-4]! + W(str) r6, [r0, #-4]! + W(str) r7, [r0, #-4]! + W(str) r8, [r0, #-4]! + W(str) lr, [r0, #-4]! CALGN( bcs 2b ) 7: ldmfd sp!, {r5 - r8} 8: movs r2, r2, lsl #31 + it ne ldrneb r3, [r1, #-1]! + itt cs ldrcsb r4, [r1, #-1]! ldrcsb ip, [r1, #-1] + it ne strneb r3, [r0, #-1]! + itt cs strcsb r4, [r0, #-1]! strcsb ip, [r0, #-1] ldmfd sp!, {r0, r4, pc} 9: cmp ip, #2 + it gt ldrgtb r3, [r1, #-1]! + it ge ldrgeb r4, [r1, #-1]! ldrb lr, [r1, #-1]! + it gt strgtb r3, [r0, #-1]! + it ge strgeb r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 650d5923ab83..5e1253d4df5c 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -19,7 +19,9 @@ 1: subs r2, r2, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 + it lt strltb r1, [r0], #1 @ 1 + it le strleb r1, [r0], #1 @ 1 strb r1, [r0], #1 @ 1 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) @@ -51,19 +53,23 @@ ENTRY(memset) mov lr, r1 2: subs r2, r2, #64 + itttt ge stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. stmgeia r0!, {r1, r3, ip, lr} stmgeia r0!, {r1, r3, ip, lr} stmgeia r0!, {r1, r3, ip, lr} bgt 2b + it eq ldmeqfd sp!, {pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 + itt ne stmneia r0!, {r1, r3, ip, lr} stmneia r0!, {r1, r3, ip, lr} tst r2, #16 + it ne stmneia r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 @@ -111,17 +117,21 @@ ENTRY(memset) #endif 4: tst r2, #8 + it ne stmneia r0!, {r1, r3} tst r2, #4 + it ne strne r1, [r0], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 + itt ne strneb r1, [r0], #1 strneb r1, [r0], #1 tst r2, #1 + it ne strneb r1, [r0], #1 mov pc, lr ENDPROC(memset) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 3fbdef5f802a..a0e319a4c03f 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -21,7 +21,9 @@ 1: subs r1, r1, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 + it lt strltb r2, [r0], #1 @ 1 + it le strleb r2, [r0], #1 @ 1 strb r2, [r0], #1 @ 1 add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) @@ -51,19 +53,23 @@ ENTRY(__memzero) mov lr, r2 @ 1 3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + itttt ge stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 bgt 3b @ 1 + it eq ldmeqfd sp!, {pc} @ 1/2 quick exit /* * No need to correct the count; we're only testing bits from now on */ tst r1, #32 @ 1 + itt ne stmneia r0!, {r2, r3, ip, lr} @ 4 stmneia r0!, {r2, r3, ip, lr} @ 4 tst r1, #16 @ 1 16 bytes or more? + it ne stmneia r0!, {r2, r3, ip, lr} @ 4 ldr lr, [sp], #4 @ 1 @@ -109,17 +115,21 @@ ENTRY(__memzero) #endif 4: tst r1, #8 @ 1 8 bytes or more? + it ne stmneia r0!, {r2, r3} @ 2 tst r1, #4 @ 1 4 bytes or more? + it ne strne r2, [r0], #4 @ 1 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r1, #2 @ 1 2 bytes or more? + itt ne strneb r2, [r0], #1 @ 1 strneb r2, [r0], #1 @ 1 tst r1, #1 @ 1 a byte left over + it ne strneb r2, [r0], #1 @ 1 mov pc, lr @ 1 ENDPROC(__memzero) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 864f3c1c4f18..4d9dc1a526ee 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -38,10 +38,16 @@ ENDPROC(__put_user_1) ENTRY(__put_user_2) mov ip, r2, lsr #8 #ifndef __ARMEB__ -2: strbt r2, [r0], #1 +2: + ARM( strbt r2, [r0], #1 ) + THUMB( strbt r2, [r0] ) + THUMB( add r0, #1 ) 3: strbt ip, [r0] #else -2: strbt ip, [r0], #1 +2: + ARM( strbt ip, [r0], #1 ) + THUMB( strbt ip, [r0] ) + THUMB( add r0, #1 ) 3: strbt r2, [r0] #endif mov r0, #0 @@ -55,7 +61,10 @@ ENTRY(__put_user_4) ENDPROC(__put_user_4) ENTRY(__put_user_8) -5: strt r2, [r0], #4 +5: + ARM( strt r2, [r0], #4 ) + THUMB( strt r2, [r0] ) + THUMB( add r0, #4 ) 6: strt r3, [r0] mov r0, #0 mov pc, lr diff --git a/arch/arm/lib/sha1.S b/arch/arm/lib/sha1.S index a16fb208c841..09b548cac1a4 100644 --- a/arch/arm/lib/sha1.S +++ b/arch/arm/lib/sha1.S @@ -187,6 +187,7 @@ ENTRY(sha_transform) ENDPROC(sha_transform) + .align 2 .L_sha_K: .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 @@ -195,6 +196,7 @@ ENDPROC(sha_transform) * void sha_init(__u32 *buf) */ + .align 2 .L_sha_initial_digest: .word 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index d8f2a1c1aea4..fd4014e54e37 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -18,9 +18,11 @@ ENTRY(strchr) and r1, r1, #0xff 1: ldrb r2, [r0], #1 teq r2, r1 + it ne teqne r2, #0 bne 1b teq r2, r1 + ite ne movne r0, #0 subeq r0, r0, #1 mov pc, lr diff --git a/arch/arm/lib/strncpy_from_user.S b/arch/arm/lib/strncpy_from_user.S index 330373c26dd9..1c9814f346c6 100644 --- a/arch/arm/lib/strncpy_from_user.S +++ b/arch/arm/lib/strncpy_from_user.S @@ -23,7 +23,7 @@ ENTRY(__strncpy_from_user) mov ip, r1 1: subs r2, r2, #1 -USER( ldrplbt r3, [r1], #1) + ldrusr r3, r1, 1, pl bmi 2f strb r3, [r0], #1 teq r3, #0 diff --git a/arch/arm/lib/strnlen_user.S b/arch/arm/lib/strnlen_user.S index 90bb9d020836..7855b2906659 100644 --- a/arch/arm/lib/strnlen_user.S +++ b/arch/arm/lib/strnlen_user.S @@ -23,7 +23,7 @@ ENTRY(__strnlen_user) mov r2, r0 1: -USER( ldrbt r3, [r0], #1) + ldrusr r3, r0, 1 teq r3, #0 beq 2f subs r1, r1, #1 diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index 302f20cd2423..d7a9440de6b8 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -18,6 +18,7 @@ ENTRY(strrchr) mov r3, #0 1: ldrb r2, [r0], #1 teq r2, r1 + it eq subeq r3, r0, #1 teq r2, #0 bne 1b diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 543d7094d18e..df66c76e8b29 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -15,6 +15,6 @@ ENTRY(_test_and_clear_bit_be) eor r0, r0, #0x18 @ big endian byte ordering ENTRY(_test_and_clear_bit_le) - testop bicne, strneb + testop bicne, strneb, ne ENDPROC(_test_and_clear_bit_be) ENDPROC(_test_and_clear_bit_le) diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index 0b3f390401ce..3938bdf446a6 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -15,6 +15,6 @@ ENTRY(_test_and_set_bit_be) eor r0, r0, #0x18 @ big endian byte ordering ENTRY(_test_and_set_bit_le) - testop orreq, streqb + testop orreq, streqb, eq ENDPROC(_test_and_set_bit_be) ENDPROC(_test_and_set_bit_le) diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index f0df6a91db04..503288955242 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -27,9 +27,13 @@ ENTRY(__ucmpdi2) cmp xh, yh + it eq cmpeq xl, yl + it lo movlo r0, #0 + it eq moveq r0, #1 + it hi movhi r0, #2 mov pc, lr @@ -40,9 +44,13 @@ ENDPROC(__ucmpdi2) ENTRY(__aeabi_ulcmp) cmp xh, yh + it eq cmpeq xl, yl + it lo movlo r0, #-1 + it eq moveq r0, #0 + it hi movhi r0, #1 mov pc, lr |