diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2009-03-10 10:24:54 +0000 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2009-03-10 10:24:54 +0000 |
commit | 77582cfa8a38fc71d1c46b3296a9f7ba4ad80275 (patch) | |
tree | 5d1e747b8d65aa5198d2203623800632e17685e9 /arch/arm/lib | |
parent | 1745b660c1511279f83ec45e6404d484ba98e578 (diff) |
Thumb-2: Add IT instructions to the kernel assembly code
With modified GNU assembler, these instructions are automatically
generated. This patch is to be used if such gas isn't available.
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm/lib')
31 files changed, 190 insertions, 12 deletions
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 638deb13da1c..b18944b85e4b 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -41,6 +41,7 @@ ENTRY(__aeabi_llsl) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi ah, ah, lsl r2 movpl ah, al, lsl r3 ARM( orrmi ah, ah, al, lsr ip ) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 015e8aa5a1d1..0d5ace74dd9d 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -41,6 +41,7 @@ ENTRY(__aeabi_lasr) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi al, al, lsr r2 movpl al, ah, asr r3 ARM( orrmi al, al, ah, lsl ip ) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index aaf7220d9e30..42e62dd54188 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -38,6 +38,7 @@ ENDPROC(c_backtrace) beq no_frame @ we have no stack frames tst r1, #0x10 @ 26 or 32-bit mode? + itte eq ARM( moveq mask, #0xfc000003 ) THUMB( moveq mask, #0xfc000000 ) THUMB( orreq mask, #0x03 ) @@ -75,6 +76,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 @ adjust saved 'pc' back one teq r3, r2, lsr #10 @ instruction + ite ne subne r0, sv_pc, #4 @ allow for mov subeq r0, sv_pc, #8 @ allow for mov + stmia @@ -86,6 +88,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 teq r3, r1, lsr #10 + ittt eq ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg bleq .Ldumpstm @ dump saved registers @@ -93,6 +96,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #10 + itt eq subeq r0, frame, #16 bleq .Ldumpstm @ dump saved registers @@ -134,6 +138,7 @@ ENDPROC(c_backtrace) beq 2f add r7, r7, #1 teq r7, #6 + itte eq moveq r7, #1 moveq r1, #'\n' movne r1, #' ' @@ -144,6 +149,7 @@ ENDPROC(c_backtrace) 2: subs reg, reg, #1 bpl 1b teq r7, #0 + itt ne adrne r0, .Lcr blne printk ldmfd sp!, {instr, reg, stack, r7, pc} diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 2e787d40d599..5e34c1238103 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -13,18 +13,22 @@ mov pc, lr .endm - .macro testop, instr, store + .macro testop, instr, store, cond=al and r3, r0, #7 @ Get bit offset mov r2, #1 add r1, r1, r0, lsr #3 @ Get byte offset mov r3, r2, lsl r3 @ create mask 1: ldrexb r2, [r1] ands r0, r2, r3 @ save old value of bit - \instr r2, r2, r3 @ toggle bit + .ifnc \cond,al + it \cond + .endif + \instr r2, r2, r3 @ toggle bit strexb ip, r2, [r1] cmp ip, #0 bne 1b cmp r0, #0 + it ne movne r0, #1 2: mov pc, lr .endm @@ -49,7 +53,7 @@ * Note: we can trivially conditionalise the store instruction * to avoid dirtying the data cache. */ - .macro testop, instr, store + .macro testop, instr, store, cond=al add r1, r1, r0, lsr #3 and r3, r0, #7 mov r0, #1 diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index e4fe124acedc..9a7a16426428 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -69,6 +69,9 @@ .endm .macro str1b ptr reg cond=al abort + .ifnc \cond,al + it \cond + .endif str\cond\()b \reg, [\ptr], #1 .endm diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ae04db1ca4f..1c57a034cadf 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -39,8 +39,10 @@ ENTRY(copy_page) ldmia r1!, {r3, r4, ip, lr} @ 4 subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 + itt gt ldmgtia r1!, {r3, r4, ip, lr} @ 4 bgt 1b @ 1 + PLD( itt eq ) PLD( ldmeqia r1!, {r3, r4, ip, lr} ) PLD( beq 2b ) ldmfd sp!, {r4, pc} @ 3 diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb007..8e8fc03f55bd 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -109,6 +109,7 @@ #if LDR1W_SHIFT > 0 lsl ip, ip, #LDR1W_SHIFT #endif + it ne addne pc, pc, ip @ C is always clear here b 7f 6: diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 44354fdba350..dc0fe7391527 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -53,6 +53,9 @@ .endm .macro ldr1b ptr reg cond=al abort + .ifnc \cond,al + it \cond + .endif ldr\cond\()b \reg, [\ptr], #1 .endm diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 31d3cb34740d..e9a504e4302e 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -39,6 +39,7 @@ td3 .req lr /* we must have at least one byte. */ tst buf, #1 @ odd address? + itttt ne movne sum, sum, ror #8 ldrneb td0, [buf], #1 subne len, len, #1 @@ -68,25 +69,30 @@ td3 .req lr bne .Lless8_wordlp .Lless8_byte: tst len, #1 @ odd number of bytes + itt ne ldrneb td0, [buf], #1 @ include last byte adcnes sum, sum, td0, put_byte_0 @ update checksum .Ldone: adc r0, sum, #0 @ collect up the last carry ldr td0, [sp], #4 tst td0, #1 @ check buffer alignment + it ne movne r0, r0, ror #8 @ rotate checksum by 8 bits ldr pc, [sp], #4 @ return .Lnot_aligned: tst buf, #1 @ odd address + ittt ne ldrneb td0, [buf], #1 @ make even subne len, len, #1 adcnes sum, sum, td0, put_byte_1 @ update checksum tst buf, #2 @ 32-bit aligned? #if __LINUX_ARM_ARCH__ >= 4 + itt ne ldrneh td0, [buf], #2 @ make 32-bit aligned subne len, len, #2 #else + itttt ne ldrneb td0, [buf], #1 ldrneb ip, [buf], #1 subne len, len, #2 @@ -96,6 +102,7 @@ td3 .req lr orrne td0, ip, td0, lsl #8 #endif #endif + it ne adcnes sum, sum, td0 @ update checksum mov pc, lr @@ -105,10 +112,12 @@ ENTRY(csum_partial) blo .Lless8 @ 8 bytes to copy. tst buf, #1 + it ne movne sum, sum, ror #8 adds sum, sum, #0 @ C = 0 tst buf, #3 @ Test destination alignment + it ne blne .Lnot_aligned @ align destination, return here 1: bics ip, len, #31 diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a09..8e1c141b6524 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -40,6 +40,7 @@ sum .req r3 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst dst, #2 + it eq moveq pc, lr @ dst is now 32bit aligned .Ldst_16bit: load2b r8, ip @@ -94,6 +95,7 @@ FN_ENTRY adds sum, sum, #0 @ C = 0 tst dst, #3 @ Test destination alignment + it ne blne .Ldst_unaligned @ align destination, return here /* @@ -147,6 +149,7 @@ FN_ENTRY strb r5, [dst], #1 mov r5, r4, get_byte_2 .Lexit: tst len, #1 + ittt ne strneb r5, [dst], #1 andne r5, r5, #255 adcnes sum, sum, r5, put_byte_0 @@ -160,6 +163,7 @@ FN_ENTRY .Ldone: adc r0, sum, #0 ldr sum, [sp, #0] @ dst tst sum, #1 + it ne movne r0, r0, ror #8 load_regs diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index fd0e9dcd9fdc..152ed83480f7 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -77,6 +77,7 @@ add r2, r2, r1 mov r0, #0 @ zero the buffer 9002: teq r2, r1 + it ne strneb r0, [r1], #1 bne 9002b load_regs diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S index 8d6a8762ab88..fcd87ffe2b1d 100644 --- a/arch/arm/lib/delay.S +++ b/arch/arm/lib/delay.S @@ -31,6 +31,7 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 mov r2, r2, lsr #10 @ max = 0x00007fff mul r0, r2, r0 @ max = 2^32-1 movs r0, r0, lsr #6 + it eq moveq pc, lr /* @@ -58,6 +59,7 @@ ENTRY(__delay) movls pc, lr subs r0, r0, #1 #endif + it hi bhi __delay mov pc, lr ENDPROC(__udelay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index faa7748142da..d02268ac7baf 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -84,8 +84,10 @@ ENTRY(__do_div64) @ The division loop for needed upper bit positions. @ Break out early if dividend reaches 0. 2: cmp xh, yl + itt cs orrcs yh, yh, ip subcss xh, xh, yl + it ne movnes ip, ip, lsr #1 mov yl, yl, lsr #1 bne 2b @@ -93,7 +95,9 @@ ENTRY(__do_div64) @ See if we need to handle lower 32-bit result. 3: cmp xh, #0 mov yl, #0 + it eq cmpeq xl, r4 + itt lo movlo xh, xl movlo pc, lr @@ -104,7 +108,9 @@ ENTRY(__do_div64) 4: movs xl, xl, lsl #1 adcs xh, xh, xh beq 6f + it cc cmpcc xh, r4 + itt cs 5: orrcs yl, yl, ip subcs xh, xh, r4 movs ip, ip, lsr #1 @@ -116,6 +122,7 @@ ENTRY(__do_div64) @ Otherwise, if lower part is also null then we are done. 6: bcs 5b cmp xl, #0 + it eq moveq pc, lr @ We still have remainer bits in the low part. Bring them up. @@ -185,7 +192,8 @@ ENTRY(__do_div64) mov pc, lr @ eq -> division by 1: obvious enough... -9: moveq yl, xl +9: itttt eq + moveq yl, xl moveq yh, xh moveq xh, #0 moveq pc, lr diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 9f4238987fe9..45229e46ed5b 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -12,12 +12,15 @@ .Linsb_align: rsb ip, ip, #4 cmp ip, r2 + it gt movgt ip, r2 cmp ip, #2 ldrb r3, [r0] strb r3, [r1], #1 + itt ge ldrgeb r3, [r0] strgeb r3, [r1], #1 + itt gt ldrgtb r3, [r0] strgtb r3, [r1], #1 subs r2, r2, ip @@ -25,6 +28,7 @@ ENTRY(__raw_readsb) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne .Linsb_align @@ -72,6 +76,7 @@ ENTRY(__raw_readsb) bpl .Linsb_16_lp tst r2, #15 + it eq ldmeqfd sp!, {r4 - r6, pc} .Linsb_no_16: tst r2, #8 @@ -109,13 +114,16 @@ ENTRY(__raw_readsb) str r3, [r1], #4 .Linsb_no_4: ands r2, r2, #3 + it eq ldmeqfd sp!, {r4 - r6, pc} cmp r2, #2 ldrb r3, [r0] strb r3, [r1], #1 + itt ge ldrgeb r3, [r0] strgeb r3, [r1], #1 + itt gt ldrgtb r3, [r0] strgtb r3, [r1] diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4b..1f02e66d079c 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -12,6 +12,7 @@ ENTRY(__raw_readsl) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne 3f @@ -28,9 +29,11 @@ ENTRY(__raw_readsl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 + ittt cs ldrcs r3, [r0, #0] ldrcs ip, [r0, #0] stmcsia r1!, {r3, ip} + itt ne ldrne r3, [r0, #0] strne r3, [r1, #0] mov pc, lr @@ -48,6 +51,7 @@ ENTRY(__raw_readsl) 4: subs r2, r2, #1 mov ip, r3, pull #24 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #8 strne ip, [r1], #4 @@ -56,6 +60,7 @@ ENTRY(__raw_readsl) 5: subs r2, r2, #1 mov ip, r3, pull #16 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #16 strne ip, [r1], #4 @@ -64,6 +69,7 @@ ENTRY(__raw_readsl) 6: subs r2, r2, #1 mov ip, r3, pull #8 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #24 strne ip, [r1], #4 diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index 1f393d42593d..9db32f0541da 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -26,6 +26,7 @@ ENTRY(__raw_readsw) teq r2, #0 + it eq moveq pc, lr tst r1, #3 bne .Linsw_align @@ -76,7 +77,8 @@ ENTRY(__raw_readsw) pack r3, r3, ip str r3, [r1], #4 -.Lno_insw_2: ldrneh r3, [r0] +.Lno_insw_2: itt ne + ldrneh r3, [r0] strneh r3, [r1] ldmfd sp!, {r4, r5, pc} @@ -94,6 +96,7 @@ ENTRY(__raw_readsw) #endif .Linsw_noalign: stmfd sp!, {r4, lr} + it cc ldrccb ip, [r1, #-1]! bcc 1f @@ -121,6 +124,7 @@ ENTRY(__raw_readsw) 3: tst r2, #1 strb ip, [r1], #1 + itttt ne ldrneh ip, [r0] _BE_ONLY_( movne ip, ip, ror #8 ) strneb ip, [r1], #1 diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index 68b92f4acaeb..5fad6b0c7f05 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -32,12 +32,15 @@ .Loutsb_align: rsb ip, ip, #4 cmp ip, r2 + it gt movgt ip, r2 cmp ip, #2 ldrb r3, [r1], #1 strb r3, [r0] + itt ge ldrgeb r3, [r1], #1 strgeb r3, [r0] + itt gt ldrgtb r3, [r1], #1 strgtb r3, [r0] subs r2, r2, ip @@ -45,6 +48,7 @@ ENTRY(__raw_writesb) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne .Loutsb_align @@ -64,6 +68,7 @@ ENTRY(__raw_writesb) bpl .Loutsb_16_lp tst r2, #15 + it eq ldmeqfd sp!, {r4, r5, pc} .Loutsb_no_16: tst r2, #8 @@ -80,13 +85,16 @@ ENTRY(__raw_writesb) outword r3 .Loutsb_no_4: ands r2, r2, #3 + it eq ldmeqfd sp!, {r4, r5, pc} cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0] + itt ge ldrgeb r3, [r1], #1 strgeb r3, [r0] + itt gt ldrgtb r3, [r1] strgtb r3, [r0] diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725c..ced1d9169090 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -12,6 +12,7 @@ ENTRY(__raw_writesl) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne 3f @@ -28,10 +29,14 @@ ENTRY(__raw_writesl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 + itt cs ldmcsia r1!, {r3, ip} strcs r3, [r0, #0] + it ne ldrne r3, [r1, #0] + it cs strcs ip, [r0, #0] + it ne strne r3, [r0, #0] mov pc, lr diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index ff4f71b579ee..bb8530310ff3 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -31,6 +31,7 @@ ENTRY(__raw_writesw) teq r2, #0 + it eq moveq pc, lr ands r3, r1, #3 bne .Loutsw_align @@ -61,7 +62,8 @@ ENTRY(__raw_writesw) ldr r3, [r1], #4 outword r3 -.Lno_outsw_2: ldrneh r3, [r1] +.Lno_outsw_2: itt ne + ldrneh r3, [r1] strneh r3, [r0] ldmfd sp!, {r4, r5, pc} @@ -79,6 +81,7 @@ ENTRY(__raw_writesw) THUMB( rsb r3, r3, #0 ) THUMB( ldr r3, [r1, r3] ) THUMB( sub r1, r3 ) + it cs subcs r2, r2, #1 bcs 2f subs r2, r2, #2 @@ -94,7 +97,8 @@ ENTRY(__raw_writesw) bpl 1b tst r2, #1 -3: movne ip, r3, lsr #8 +3: itt ne + movne ip, r3, lsr #8 strneh ip, [r0] mov pc, lr ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 67964bcfc854..1eb73e769c6c 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -56,6 +56,7 @@ Boston, MA 02111-1307, USA. */ @ at the left end of each 4 bit nibbles in the division loop @ to save one loop in most cases. tst \divisor, #0xe0000000 + itte eq moveq \divisor, \divisor, lsl #3 moveq \curbit, #8 movne \curbit, #1 @@ -65,6 +66,7 @@ Boston, MA 02111-1307, USA. */ @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 movlo \curbit, \curbit, lsl #4 @@ -73,6 +75,7 @@ Boston, MA 02111-1307, USA. */ @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 movlo \curbit, \curbit, lsl #1 @@ -84,19 +87,25 @@ Boston, MA 02111-1307, USA. */ @ Division loop 1: cmp \dividend, \divisor + itt hs subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 + itt hs subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 + itt hs subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 + itt hs subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? + it ne movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + it ne movne \divisor, \divisor, lsr #4 bne 1b @@ -113,19 +122,24 @@ Boston, MA 02111-1307, USA. */ #else cmp \divisor, #(1 << 16) + itt hs movhs \divisor, \divisor, lsr #16 movhs \order, #16 + it lo movlo \order, #0 cmp \divisor, #(1 << 8) + itt hs movhs \divisor, \divisor, lsr #8 addhs \order, \order, #8 cmp \divisor, #(1 << 4) + itt hs movhs \divisor, \divisor, lsr #4 addhs \order, \order, #4 cmp \divisor, #(1 << 2) + ite hi addhi \order, \order, #3 addls \order, \order, \divisor, lsr #1 @@ -152,6 +166,7 @@ Boston, MA 02111-1307, USA. */ @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 addlo \order, \order, #4 @@ -160,6 +175,7 @@ Boston, MA 02111-1307, USA. */ @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 addlo \order, \order, #1 @@ -173,19 +189,25 @@ Boston, MA 02111-1307, USA. */ blt 2f 1: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor cmp \dividend, \divisor, lsr #1 + it hs subhs \dividend, \dividend, \divisor, lsr #1 cmp \dividend, \divisor, lsr #2 + it hs subhs \dividend, \dividend, \divisor, lsr #2 cmp \dividend, \divisor, lsr #3 + it hs subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 + it ge subges \order, \order, #4 bge 1b tst \order, #3 + it ne teqne \dividend, #0 beq 5f @@ -194,12 +216,15 @@ Boston, MA 02111-1307, USA. */ blt 4f beq 3f cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 3: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 4: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor 5: .endm @@ -209,6 +234,7 @@ ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) subs r2, r1, #1 + it eq moveq pc, lr bcc Ldiv0 cmp r0, r1 @@ -221,7 +247,8 @@ ENTRY(__aeabi_uidiv) mov r0, r2 mov pc, lr -11: moveq r0, #1 +11: ite eq + moveq r0, #1 movne r0, #0 mov pc, lr @@ -237,10 +264,14 @@ ENTRY(__umodsi3) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 + ite ne cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 + it hi tsthi r1, r2 @ see if divisor is power of 2 + it eq andeq r0, r0, r2 + it ls movls pc, lr ARM_MOD_BODY r0, r1, r2, r3 @@ -255,10 +286,12 @@ ENTRY(__aeabi_idiv) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 + it mi rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 + it mi rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f @@ -268,14 +301,18 @@ ENTRY(__aeabi_idiv) ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 + it mi rsbmi r0, r0, #0 mov pc, lr 10: teq ip, r0 @ same sign ? + it mi rsbmi r0, r0, #0 mov pc, lr -11: movlo r0, #0 +11: it lo + movlo r0, #0 + itt eq moveq r0, ip, asr #31 orreq r0, r0, #1 mov pc, lr @@ -284,6 +321,7 @@ ENTRY(__aeabi_idiv) cmp ip, #0 mov r0, r3, lsr r2 + it mi rsbmi r0, r0, #0 mov pc, lr @@ -294,19 +332,25 @@ ENTRY(__modsi3) cmp r1, #0 beq Ldiv0 + it mi rsbmi r1, r1, #0 @ loops below use unsigned. movs ip, r0 @ preserve sign of dividend + it mi rsbmi r0, r0, #0 @ if negative make positive subs r2, r1, #1 @ compare divisor with 1 + ite ne cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 + it hi tsthi r1, r2 @ see if divisor is power of 2 + it eq andeq r0, r0, r2 bls 10f ARM_MOD_BODY r0, r1, r2, r3 10: cmp ip, #0 + it mi rsbmi r0, r0, #0 mov pc, lr diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index f83d449141f7..57db3a265e5b 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -41,6 +41,7 @@ ENTRY(__aeabi_llsr) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi al, al, lsr r2 movpl al, ah, lsr r3 ARM( orrmi al, al, ah, lsl ip ) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index 1da86991d700..0d1d596ad8cd 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -21,6 +21,7 @@ ENTRY(memchr) teq r3, r1 bne 1b sub r0, r0, #1 -2: movne r0, #0 +2: it ne + movne r0, #0 mov pc, lr ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index a9b9e2287a09..c7a810dee294 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -29,7 +29,12 @@ .endm .macro ldr1b ptr reg cond=al abort + .ifnc \cond,al + it \cond ldr\cond\()b \reg, [\ptr], #1 + .else + ldrb \reg, [\ptr], #1 + .endif .endm .macro str1w ptr reg abort @@ -41,7 +46,12 @@ .endm .macro str1b ptr reg cond=al abort + .ifnc \cond,al + it \cond str\cond\()b \reg, [\ptr], #1 + .else + strb \reg, [\ptr], #1 + .endif .endm .macro enter reg1 reg2 diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 5025c863713d..191a5dc41596 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -29,7 +29,9 @@ ENTRY(memmove) subs ip, r0, r1 + it hi cmphi r2, ip + it ls bls memcpy stmfd sp!, {r0, r4, lr} @@ -72,6 +74,7 @@ ENTRY(memmove) 5: ands ip, r2, #28 rsb ip, ip, #32 + it ne addne pc, pc, ip @ C is always clear here b 7f 6: nop @@ -99,19 +102,27 @@ ENTRY(memmove) 7: ldmfd sp!, {r5 - r8} 8: movs r2, r2, lsl #31 + it ne ldrneb r3, [r1, #-1]! + itt cs ldrcsb r4, [r1, #-1]! ldrcsb ip, [r1, #-1] + it ne strneb r3, [r0, #-1]! + itt cs strcsb r4, [r0, #-1]! strcsb ip, [r0, #-1] ldmfd sp!, {r0, r4, pc} 9: cmp ip, #2 + it gt ldrgtb r3, [r1, #-1]! + it ge ldrgeb r4, [r1, #-1]! ldrb lr, [r1, #-1]! + it gt strgtb r3, [r0, #-1]! + it ge strgeb r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 761eefa76243..ef022bdc0235 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -19,7 +19,9 @@ 1: subs r2, r2, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 + it lt strltb r1, [r0], #1 @ 1 + it le strleb r1, [r0], #1 @ 1 strb r1, [r0], #1 @ 1 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) @@ -51,19 +53,23 @@ ENTRY(memset) mov lr, r1 2: subs r2, r2, #64 + itttt ge stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. stmgeia r0!, {r1, r3, ip, lr} stmgeia r0!, {r1, r3, ip, lr} stmgeia r0!, {r1, r3, ip, lr} bgt 2b + it eq ldmeqfd sp!, {pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 + itt ne stmneia r0!, {r1, r3, ip, lr} stmneia r0!, {r1, r3, ip, lr} tst r2, #16 + it ne stmneia r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 @@ -111,17 +117,21 @@ ENTRY(memset) #endif 4: tst r2, #8 + it ne stmneia r0!, {r1, r3} tst r2, #4 + it ne strne r1, [r0], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 + itt ne strneb r1, [r0], #1 strneb r1, [r0], #1 tst r2, #1 + it ne strneb r1, [r0], #1 mov pc, lr ENDPROC(memset) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 3fbdef5f802a..a0e319a4c03f 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -21,7 +21,9 @@ 1: subs r1, r1, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 + it lt strltb r2, [r0], #1 @ 1 + it le strleb r2, [r0], #1 @ 1 strb r2, [r0], #1 @ 1 add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) @@ -51,19 +53,23 @@ ENTRY(__memzero) mov lr, r2 @ 1 3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + itttt ge stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 bgt 3b @ 1 + it eq ldmeqfd sp!, {pc} @ 1/2 quick exit /* * No need to correct the count; we're only testing bits from now on */ tst r1, #32 @ 1 + itt ne stmneia r0!, {r2, r3, ip, lr} @ 4 stmneia r0!, {r2, r3, ip, lr} @ 4 tst r1, #16 @ 1 16 bytes or more? + it ne stmneia r0!, {r2, r3, ip, lr} @ 4 ldr lr, [sp], #4 @ 1 @@ -109,17 +115,21 @@ ENTRY(__memzero) #endif 4: tst r1, #8 @ 1 8 bytes or more? + it ne stmneia r0!, {r2, r3} @ 2 tst r1, #4 @ 1 4 bytes or more? + it ne strne r2, [r0], #4 @ 1 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r1, #2 @ 1 2 bytes or more? + itt ne strneb r2, [r0], #1 @ 1 strneb r2, [r0], #1 @ 1 tst r1, #1 @ 1 a byte left over + it ne strneb r2, [r0], #1 @ 1 mov pc, lr @ 1 ENDPROC(__memzero) diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index d8f2a1c1aea4..fd4014e54e37 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -18,9 +18,11 @@ ENTRY(strchr) and r1, r1, #0xff 1: ldrb r2, [r0], #1 teq r2, r1 + it ne teqne r2, #0 bne 1b teq r2, r1 + ite ne movne r0, #0 subeq r0, r0, #1 mov pc, lr diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index 302f20cd2423..d7a9440de6b8 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -18,6 +18,7 @@ ENTRY(strrchr) mov r3, #0 1: ldrb r2, [r0], #1 teq r2, r1 + it eq subeq r3, r0, #1 teq r2, #0 bne 1b diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 543d7094d18e..df66c76e8b29 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -15,6 +15,6 @@ ENTRY(_test_and_clear_bit_be) eor r0, r0, #0x18 @ big endian byte ordering ENTRY(_test_and_clear_bit_le) - testop bicne, strneb + testop bicne, strneb, ne ENDPROC(_test_and_clear_bit_be) ENDPROC(_test_and_clear_bit_le) diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index 0b3f390401ce..3938bdf446a6 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -15,6 +15,6 @@ ENTRY(_test_and_set_bit_be) eor r0, r0, #0x18 @ big endian byte ordering ENTRY(_test_and_set_bit_le) - testop orreq, streqb + testop orreq, streqb, eq ENDPROC(_test_and_set_bit_be) ENDPROC(_test_and_set_bit_le) diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index f0df6a91db04..503288955242 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -27,9 +27,13 @@ ENTRY(__ucmpdi2) cmp xh, yh + it eq cmpeq xl, yl + it lo movlo r0, #0 + it eq moveq r0, #1 + it hi movhi r0, #2 mov pc, lr @@ -40,9 +44,13 @@ ENDPROC(__ucmpdi2) ENTRY(__aeabi_ulcmp) cmp xh, yh + it eq cmpeq xl, yl + it lo movlo r0, #-1 + it eq moveq r0, #0 + it hi movhi r0, #1 mov pc, lr |