summaryrefslogtreecommitdiff
path: root/arch/arm/lib
diff options
context:
space:
mode:
authorCatalin Marinas <catalin.marinas@arm.com>2009-03-10 10:24:54 +0000
committerCatalin Marinas <catalin.marinas@arm.com>2009-03-10 10:24:54 +0000
commit77582cfa8a38fc71d1c46b3296a9f7ba4ad80275 (patch)
tree5d1e747b8d65aa5198d2203623800632e17685e9 /arch/arm/lib
parent1745b660c1511279f83ec45e6404d484ba98e578 (diff)
Thumb-2: Add IT instructions to the kernel assembly code
With modified GNU assembler, these instructions are automatically generated. This patch is to be used if such gas isn't available. Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Diffstat (limited to 'arch/arm/lib')
-rw-r--r--arch/arm/lib/ashldi3.S1
-rw-r--r--arch/arm/lib/ashrdi3.S1
-rw-r--r--arch/arm/lib/backtrace.S6
-rw-r--r--arch/arm/lib/bitops.h10
-rw-r--r--arch/arm/lib/copy_from_user.S3
-rw-r--r--arch/arm/lib/copy_page.S2
-rw-r--r--arch/arm/lib/copy_template.S1
-rw-r--r--arch/arm/lib/copy_to_user.S3
-rw-r--r--arch/arm/lib/csumpartial.S9
-rw-r--r--arch/arm/lib/csumpartialcopygeneric.S4
-rw-r--r--arch/arm/lib/csumpartialcopyuser.S1
-rw-r--r--arch/arm/lib/delay.S2
-rw-r--r--arch/arm/lib/div64.S10
-rw-r--r--arch/arm/lib/io-readsb.S8
-rw-r--r--arch/arm/lib/io-readsl.S6
-rw-r--r--arch/arm/lib/io-readsw-armv4.S6
-rw-r--r--arch/arm/lib/io-writesb.S8
-rw-r--r--arch/arm/lib/io-writesl.S5
-rw-r--r--arch/arm/lib/io-writesw-armv4.S8
-rw-r--r--arch/arm/lib/lib1funcs.S48
-rw-r--r--arch/arm/lib/lshrdi3.S1
-rw-r--r--arch/arm/lib/memchr.S3
-rw-r--r--arch/arm/lib/memcpy.S10
-rw-r--r--arch/arm/lib/memmove.S11
-rw-r--r--arch/arm/lib/memset.S10
-rw-r--r--arch/arm/lib/memzero.S10
-rw-r--r--arch/arm/lib/strchr.S2
-rw-r--r--arch/arm/lib/strrchr.S1
-rw-r--r--arch/arm/lib/testclearbit.S2
-rw-r--r--arch/arm/lib/testsetbit.S2
-rw-r--r--arch/arm/lib/ucmpdi2.S8
31 files changed, 190 insertions, 12 deletions
diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S
index 638deb13da1c..b18944b85e4b 100644
--- a/arch/arm/lib/ashldi3.S
+++ b/arch/arm/lib/ashldi3.S
@@ -41,6 +41,7 @@ ENTRY(__aeabi_llsl)
subs r3, r2, #32
rsb ip, r2, #32
+ itett mi
movmi ah, ah, lsl r2
movpl ah, al, lsl r3
ARM( orrmi ah, ah, al, lsr ip )
diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S
index 015e8aa5a1d1..0d5ace74dd9d 100644
--- a/arch/arm/lib/ashrdi3.S
+++ b/arch/arm/lib/ashrdi3.S
@@ -41,6 +41,7 @@ ENTRY(__aeabi_lasr)
subs r3, r2, #32
rsb ip, r2, #32
+ itett mi
movmi al, al, lsr r2
movpl al, ah, asr r3
ARM( orrmi al, al, ah, lsl ip )
diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S
index aaf7220d9e30..42e62dd54188 100644
--- a/arch/arm/lib/backtrace.S
+++ b/arch/arm/lib/backtrace.S
@@ -38,6 +38,7 @@ ENDPROC(c_backtrace)
beq no_frame @ we have no stack frames
tst r1, #0x10 @ 26 or 32-bit mode?
+ itte eq
ARM( moveq mask, #0xfc000003 )
THUMB( moveq mask, #0xfc000000 )
THUMB( orreq mask, #0x03 )
@@ -75,6 +76,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions
1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
ldr r3, .Ldsi+4 @ adjust saved 'pc' back one
teq r3, r2, lsr #10 @ instruction
+ ite ne
subne r0, sv_pc, #4 @ allow for mov
subeq r0, sv_pc, #8 @ allow for mov + stmia
@@ -86,6 +88,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions
ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists,
ldr r3, .Ldsi+4
teq r3, r1, lsr #10
+ ittt eq
ldreq r0, [frame, #-8] @ get sp
subeq r0, r0, #4 @ point at the last arg
bleq .Ldumpstm @ dump saved registers
@@ -93,6 +96,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions
1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc}
ldr r3, .Ldsi @ instruction exists,
teq r3, r1, lsr #10
+ itt eq
subeq r0, frame, #16
bleq .Ldumpstm @ dump saved registers
@@ -134,6 +138,7 @@ ENDPROC(c_backtrace)
beq 2f
add r7, r7, #1
teq r7, #6
+ itte eq
moveq r7, #1
moveq r1, #'\n'
movne r1, #' '
@@ -144,6 +149,7 @@ ENDPROC(c_backtrace)
2: subs reg, reg, #1
bpl 1b
teq r7, #0
+ itt ne
adrne r0, .Lcr
blne printk
ldmfd sp!, {instr, reg, stack, r7, pc}
diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h
index 2e787d40d599..5e34c1238103 100644
--- a/arch/arm/lib/bitops.h
+++ b/arch/arm/lib/bitops.h
@@ -13,18 +13,22 @@
mov pc, lr
.endm
- .macro testop, instr, store
+ .macro testop, instr, store, cond=al
and r3, r0, #7 @ Get bit offset
mov r2, #1
add r1, r1, r0, lsr #3 @ Get byte offset
mov r3, r2, lsl r3 @ create mask
1: ldrexb r2, [r1]
ands r0, r2, r3 @ save old value of bit
- \instr r2, r2, r3 @ toggle bit
+ .ifnc \cond,al
+ it \cond
+ .endif
+ \instr r2, r2, r3 @ toggle bit
strexb ip, r2, [r1]
cmp ip, #0
bne 1b
cmp r0, #0
+ it ne
movne r0, #1
2: mov pc, lr
.endm
@@ -49,7 +53,7 @@
* Note: we can trivially conditionalise the store instruction
* to avoid dirtying the data cache.
*/
- .macro testop, instr, store
+ .macro testop, instr, store, cond=al
add r1, r1, r0, lsr #3
and r3, r0, #7
mov r0, #1
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S
index e4fe124acedc..9a7a16426428 100644
--- a/arch/arm/lib/copy_from_user.S
+++ b/arch/arm/lib/copy_from_user.S
@@ -69,6 +69,9 @@
.endm
.macro str1b ptr reg cond=al abort
+ .ifnc \cond,al
+ it \cond
+ .endif
str\cond\()b \reg, [\ptr], #1
.endm
diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S
index 6ae04db1ca4f..1c57a034cadf 100644
--- a/arch/arm/lib/copy_page.S
+++ b/arch/arm/lib/copy_page.S
@@ -39,8 +39,10 @@ ENTRY(copy_page)
ldmia r1!, {r3, r4, ip, lr} @ 4
subs r2, r2, #1 @ 1
stmia r0!, {r3, r4, ip, lr} @ 4
+ itt gt
ldmgtia r1!, {r3, r4, ip, lr} @ 4
bgt 1b @ 1
+ PLD( itt eq )
PLD( ldmeqia r1!, {r3, r4, ip, lr} )
PLD( beq 2b )
ldmfd sp!, {r4, pc} @ 3
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index 805e3f8fb007..8e8fc03f55bd 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -109,6 +109,7 @@
#if LDR1W_SHIFT > 0
lsl ip, ip, #LDR1W_SHIFT
#endif
+ it ne
addne pc, pc, ip @ C is always clear here
b 7f
6:
diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S
index 44354fdba350..dc0fe7391527 100644
--- a/arch/arm/lib/copy_to_user.S
+++ b/arch/arm/lib/copy_to_user.S
@@ -53,6 +53,9 @@
.endm
.macro ldr1b ptr reg cond=al abort
+ .ifnc \cond,al
+ it \cond
+ .endif
ldr\cond\()b \reg, [\ptr], #1
.endm
diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S
index 31d3cb34740d..e9a504e4302e 100644
--- a/arch/arm/lib/csumpartial.S
+++ b/arch/arm/lib/csumpartial.S
@@ -39,6 +39,7 @@ td3 .req lr
/* we must have at least one byte. */
tst buf, #1 @ odd address?
+ itttt ne
movne sum, sum, ror #8
ldrneb td0, [buf], #1
subne len, len, #1
@@ -68,25 +69,30 @@ td3 .req lr
bne .Lless8_wordlp
.Lless8_byte: tst len, #1 @ odd number of bytes
+ itt ne
ldrneb td0, [buf], #1 @ include last byte
adcnes sum, sum, td0, put_byte_0 @ update checksum
.Ldone: adc r0, sum, #0 @ collect up the last carry
ldr td0, [sp], #4
tst td0, #1 @ check buffer alignment
+ it ne
movne r0, r0, ror #8 @ rotate checksum by 8 bits
ldr pc, [sp], #4 @ return
.Lnot_aligned: tst buf, #1 @ odd address
+ ittt ne
ldrneb td0, [buf], #1 @ make even
subne len, len, #1
adcnes sum, sum, td0, put_byte_1 @ update checksum
tst buf, #2 @ 32-bit aligned?
#if __LINUX_ARM_ARCH__ >= 4
+ itt ne
ldrneh td0, [buf], #2 @ make 32-bit aligned
subne len, len, #2
#else
+ itttt ne
ldrneb td0, [buf], #1
ldrneb ip, [buf], #1
subne len, len, #2
@@ -96,6 +102,7 @@ td3 .req lr
orrne td0, ip, td0, lsl #8
#endif
#endif
+ it ne
adcnes sum, sum, td0 @ update checksum
mov pc, lr
@@ -105,10 +112,12 @@ ENTRY(csum_partial)
blo .Lless8 @ 8 bytes to copy.
tst buf, #1
+ it ne
movne sum, sum, ror #8
adds sum, sum, #0 @ C = 0
tst buf, #3 @ Test destination alignment
+ it ne
blne .Lnot_aligned @ align destination, return here
1: bics ip, len, #31
diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S
index d620a5f22a09..8e1c141b6524 100644
--- a/arch/arm/lib/csumpartialcopygeneric.S
+++ b/arch/arm/lib/csumpartialcopygeneric.S
@@ -40,6 +40,7 @@ sum .req r3
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst dst, #2
+ it eq
moveq pc, lr @ dst is now 32bit aligned
.Ldst_16bit: load2b r8, ip
@@ -94,6 +95,7 @@ FN_ENTRY
adds sum, sum, #0 @ C = 0
tst dst, #3 @ Test destination alignment
+ it ne
blne .Ldst_unaligned @ align destination, return here
/*
@@ -147,6 +149,7 @@ FN_ENTRY
strb r5, [dst], #1
mov r5, r4, get_byte_2
.Lexit: tst len, #1
+ ittt ne
strneb r5, [dst], #1
andne r5, r5, #255
adcnes sum, sum, r5, put_byte_0
@@ -160,6 +163,7 @@ FN_ENTRY
.Ldone: adc r0, sum, #0
ldr sum, [sp, #0] @ dst
tst sum, #1
+ it ne
movne r0, r0, ror #8
load_regs
diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S
index fd0e9dcd9fdc..152ed83480f7 100644
--- a/arch/arm/lib/csumpartialcopyuser.S
+++ b/arch/arm/lib/csumpartialcopyuser.S
@@ -77,6 +77,7 @@
add r2, r2, r1
mov r0, #0 @ zero the buffer
9002: teq r2, r1
+ it ne
strneb r0, [r1], #1
bne 9002b
load_regs
diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S
index 8d6a8762ab88..fcd87ffe2b1d 100644
--- a/arch/arm/lib/delay.S
+++ b/arch/arm/lib/delay.S
@@ -31,6 +31,7 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06
mov r2, r2, lsr #10 @ max = 0x00007fff
mul r0, r2, r0 @ max = 2^32-1
movs r0, r0, lsr #6
+ it eq
moveq pc, lr
/*
@@ -58,6 +59,7 @@ ENTRY(__delay)
movls pc, lr
subs r0, r0, #1
#endif
+ it hi
bhi __delay
mov pc, lr
ENDPROC(__udelay)
diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S
index faa7748142da..d02268ac7baf 100644
--- a/arch/arm/lib/div64.S
+++ b/arch/arm/lib/div64.S
@@ -84,8 +84,10 @@ ENTRY(__do_div64)
@ The division loop for needed upper bit positions.
@ Break out early if dividend reaches 0.
2: cmp xh, yl
+ itt cs
orrcs yh, yh, ip
subcss xh, xh, yl
+ it ne
movnes ip, ip, lsr #1
mov yl, yl, lsr #1
bne 2b
@@ -93,7 +95,9 @@ ENTRY(__do_div64)
@ See if we need to handle lower 32-bit result.
3: cmp xh, #0
mov yl, #0
+ it eq
cmpeq xl, r4
+ itt lo
movlo xh, xl
movlo pc, lr
@@ -104,7 +108,9 @@ ENTRY(__do_div64)
4: movs xl, xl, lsl #1
adcs xh, xh, xh
beq 6f
+ it cc
cmpcc xh, r4
+ itt cs
5: orrcs yl, yl, ip
subcs xh, xh, r4
movs ip, ip, lsr #1
@@ -116,6 +122,7 @@ ENTRY(__do_div64)
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
+ it eq
moveq pc, lr
@ We still have remainer bits in the low part. Bring them up.
@@ -185,7 +192,8 @@ ENTRY(__do_div64)
mov pc, lr
@ eq -> division by 1: obvious enough...
-9: moveq yl, xl
+9: itttt eq
+ moveq yl, xl
moveq yh, xh
moveq xh, #0
moveq pc, lr
diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S
index 9f4238987fe9..45229e46ed5b 100644
--- a/arch/arm/lib/io-readsb.S
+++ b/arch/arm/lib/io-readsb.S
@@ -12,12 +12,15 @@
.Linsb_align: rsb ip, ip, #4
cmp ip, r2
+ it gt
movgt ip, r2
cmp ip, #2
ldrb r3, [r0]
strb r3, [r1], #1
+ itt ge
ldrgeb r3, [r0]
strgeb r3, [r1], #1
+ itt gt
ldrgtb r3, [r0]
strgtb r3, [r1], #1
subs r2, r2, ip
@@ -25,6 +28,7 @@
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
+ it eq
moveq pc, lr
ands ip, r1, #3
bne .Linsb_align
@@ -72,6 +76,7 @@ ENTRY(__raw_readsb)
bpl .Linsb_16_lp
tst r2, #15
+ it eq
ldmeqfd sp!, {r4 - r6, pc}
.Linsb_no_16: tst r2, #8
@@ -109,13 +114,16 @@ ENTRY(__raw_readsb)
str r3, [r1], #4
.Linsb_no_4: ands r2, r2, #3
+ it eq
ldmeqfd sp!, {r4 - r6, pc}
cmp r2, #2
ldrb r3, [r0]
strb r3, [r1], #1
+ itt ge
ldrgeb r3, [r0]
strgeb r3, [r1], #1
+ itt gt
ldrgtb r3, [r0]
strgtb r3, [r1]
diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S
index 5fb97e7f9f4b..1f02e66d079c 100644
--- a/arch/arm/lib/io-readsl.S
+++ b/arch/arm/lib/io-readsl.S
@@ -12,6 +12,7 @@
ENTRY(__raw_readsl)
teq r2, #0 @ do we have to check for the zero len?
+ it eq
moveq pc, lr
ands ip, r1, #3
bne 3f
@@ -28,9 +29,11 @@ ENTRY(__raw_readsl)
bpl 1b
ldmfd sp!, {r4, lr}
2: movs r2, r2, lsl #31
+ ittt cs
ldrcs r3, [r0, #0]
ldrcs ip, [r0, #0]
stmcsia r1!, {r3, ip}
+ itt ne
ldrne r3, [r0, #0]
strne r3, [r1, #0]
mov pc, lr
@@ -48,6 +51,7 @@ ENTRY(__raw_readsl)
4: subs r2, r2, #1
mov ip, r3, pull #24
+ itttt ne
ldrne r3, [r0]
orrne ip, ip, r3, push #8
strne ip, [r1], #4
@@ -56,6 +60,7 @@ ENTRY(__raw_readsl)
5: subs r2, r2, #1
mov ip, r3, pull #16
+ itttt ne
ldrne r3, [r0]
orrne ip, ip, r3, push #16
strne ip, [r1], #4
@@ -64,6 +69,7 @@ ENTRY(__raw_readsl)
6: subs r2, r2, #1
mov ip, r3, pull #8
+ itttt ne
ldrne r3, [r0]
orrne ip, ip, r3, push #24
strne ip, [r1], #4
diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S
index 1f393d42593d..9db32f0541da 100644
--- a/arch/arm/lib/io-readsw-armv4.S
+++ b/arch/arm/lib/io-readsw-armv4.S
@@ -26,6 +26,7 @@
ENTRY(__raw_readsw)
teq r2, #0
+ it eq
moveq pc, lr
tst r1, #3
bne .Linsw_align
@@ -76,7 +77,8 @@ ENTRY(__raw_readsw)
pack r3, r3, ip
str r3, [r1], #4
-.Lno_insw_2: ldrneh r3, [r0]
+.Lno_insw_2: itt ne
+ ldrneh r3, [r0]
strneh r3, [r1]
ldmfd sp!, {r4, r5, pc}
@@ -94,6 +96,7 @@ ENTRY(__raw_readsw)
#endif
.Linsw_noalign: stmfd sp!, {r4, lr}
+ it cc
ldrccb ip, [r1, #-1]!
bcc 1f
@@ -121,6 +124,7 @@ ENTRY(__raw_readsw)
3: tst r2, #1
strb ip, [r1], #1
+ itttt ne
ldrneh ip, [r0]
_BE_ONLY_( movne ip, ip, ror #8 )
strneb ip, [r1], #1
diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S
index 68b92f4acaeb..5fad6b0c7f05 100644
--- a/arch/arm/lib/io-writesb.S
+++ b/arch/arm/lib/io-writesb.S
@@ -32,12 +32,15 @@
.Loutsb_align: rsb ip, ip, #4
cmp ip, r2
+ it gt
movgt ip, r2
cmp ip, #2
ldrb r3, [r1], #1
strb r3, [r0]
+ itt ge
ldrgeb r3, [r1], #1
strgeb r3, [r0]
+ itt gt
ldrgtb r3, [r1], #1
strgtb r3, [r0]
subs r2, r2, ip
@@ -45,6 +48,7 @@
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
+ it eq
moveq pc, lr
ands ip, r1, #3
bne .Loutsb_align
@@ -64,6 +68,7 @@ ENTRY(__raw_writesb)
bpl .Loutsb_16_lp
tst r2, #15
+ it eq
ldmeqfd sp!, {r4, r5, pc}
.Loutsb_no_16: tst r2, #8
@@ -80,13 +85,16 @@ ENTRY(__raw_writesb)
outword r3
.Loutsb_no_4: ands r2, r2, #3
+ it eq
ldmeqfd sp!, {r4, r5, pc}
cmp r2, #2
ldrb r3, [r1], #1
strb r3, [r0]
+ itt ge
ldrgeb r3, [r1], #1
strgeb r3, [r0]
+ itt gt
ldrgtb r3, [r1]
strgtb r3, [r0]
diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S
index 8d3b7813725c..ced1d9169090 100644
--- a/arch/arm/lib/io-writesl.S
+++ b/arch/arm/lib/io-writesl.S
@@ -12,6 +12,7 @@
ENTRY(__raw_writesl)
teq r2, #0 @ do we have to check for the zero len?
+ it eq
moveq pc, lr
ands ip, r1, #3
bne 3f
@@ -28,10 +29,14 @@ ENTRY(__raw_writesl)
bpl 1b
ldmfd sp!, {r4, lr}
2: movs r2, r2, lsl #31
+ itt cs
ldmcsia r1!, {r3, ip}
strcs r3, [r0, #0]
+ it ne
ldrne r3, [r1, #0]
+ it cs
strcs ip, [r0, #0]
+ it ne
strne r3, [r0, #0]
mov pc, lr
diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S
index ff4f71b579ee..bb8530310ff3 100644
--- a/arch/arm/lib/io-writesw-armv4.S
+++ b/arch/arm/lib/io-writesw-armv4.S
@@ -31,6 +31,7 @@
ENTRY(__raw_writesw)
teq r2, #0
+ it eq
moveq pc, lr
ands r3, r1, #3
bne .Loutsw_align
@@ -61,7 +62,8 @@ ENTRY(__raw_writesw)
ldr r3, [r1], #4
outword r3
-.Lno_outsw_2: ldrneh r3, [r1]
+.Lno_outsw_2: itt ne
+ ldrneh r3, [r1]
strneh r3, [r0]
ldmfd sp!, {r4, r5, pc}
@@ -79,6 +81,7 @@ ENTRY(__raw_writesw)
THUMB( rsb r3, r3, #0 )
THUMB( ldr r3, [r1, r3] )
THUMB( sub r1, r3 )
+ it cs
subcs r2, r2, #1
bcs 2f
subs r2, r2, #2
@@ -94,7 +97,8 @@ ENTRY(__raw_writesw)
bpl 1b
tst r2, #1
-3: movne ip, r3, lsr #8
+3: itt ne
+ movne ip, r3, lsr #8
strneh ip, [r0]
mov pc, lr
ENDPROC(__raw_writesw)
diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S
index 67964bcfc854..1eb73e769c6c 100644
--- a/arch/arm/lib/lib1funcs.S
+++ b/arch/arm/lib/lib1funcs.S
@@ -56,6 +56,7 @@ Boston, MA 02111-1307, USA. */
@ at the left end of each 4 bit nibbles in the division loop
@ to save one loop in most cases.
tst \divisor, #0xe0000000
+ itte eq
moveq \divisor, \divisor, lsl #3
moveq \curbit, #8
movne \curbit, #1
@@ -65,6 +66,7 @@ Boston, MA 02111-1307, USA. */
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
+ ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
movlo \curbit, \curbit, lsl #4
@@ -73,6 +75,7 @@ Boston, MA 02111-1307, USA. */
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
+ ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
movlo \curbit, \curbit, lsl #1
@@ -84,19 +87,25 @@ Boston, MA 02111-1307, USA. */
@ Division loop
1: cmp \dividend, \divisor
+ itt hs
subhs \dividend, \dividend, \divisor
orrhs \result, \result, \curbit
cmp \dividend, \divisor, lsr #1
+ itt hs
subhs \dividend, \dividend, \divisor, lsr #1
orrhs \result, \result, \curbit, lsr #1
cmp \dividend, \divisor, lsr #2
+ itt hs
subhs \dividend, \dividend, \divisor, lsr #2
orrhs \result, \result, \curbit, lsr #2
cmp \dividend, \divisor, lsr #3
+ itt hs
subhs \dividend, \dividend, \divisor, lsr #3
orrhs \result, \result, \curbit, lsr #3
cmp \dividend, #0 @ Early termination?
+ it ne
movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ it ne
movne \divisor, \divisor, lsr #4
bne 1b
@@ -113,19 +122,24 @@ Boston, MA 02111-1307, USA. */
#else
cmp \divisor, #(1 << 16)
+ itt hs
movhs \divisor, \divisor, lsr #16
movhs \order, #16
+ it lo
movlo \order, #0
cmp \divisor, #(1 << 8)
+ itt hs
movhs \divisor, \divisor, lsr #8
addhs \order, \order, #8
cmp \divisor, #(1 << 4)
+ itt hs
movhs \divisor, \divisor, lsr #4
addhs \order, \order, #4
cmp \divisor, #(1 << 2)
+ ite hi
addhi \order, \order, #3
addls \order, \order, \divisor, lsr #1
@@ -152,6 +166,7 @@ Boston, MA 02111-1307, USA. */
@ division loop. Continue shifting until the divisor is
@ larger than the dividend.
1: cmp \divisor, #0x10000000
+ ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #4
addlo \order, \order, #4
@@ -160,6 +175,7 @@ Boston, MA 02111-1307, USA. */
@ For very big divisors, we must shift it a bit at a time, or
@ we will be in danger of overflowing.
1: cmp \divisor, #0x80000000
+ ittt lo
cmplo \divisor, \dividend
movlo \divisor, \divisor, lsl #1
addlo \order, \order, #1
@@ -173,19 +189,25 @@ Boston, MA 02111-1307, USA. */
blt 2f
1: cmp \dividend, \divisor
+ it hs
subhs \dividend, \dividend, \divisor
cmp \dividend, \divisor, lsr #1
+ it hs
subhs \dividend, \dividend, \divisor, lsr #1
cmp \dividend, \divisor, lsr #2
+ it hs
subhs \dividend, \dividend, \divisor, lsr #2
cmp \dividend, \divisor, lsr #3
+ it hs
subhs \dividend, \dividend, \divisor, lsr #3
cmp \dividend, #1
mov \divisor, \divisor, lsr #4
+ it ge
subges \order, \order, #4
bge 1b
tst \order, #3
+ it ne
teqne \dividend, #0
beq 5f
@@ -194,12 +216,15 @@ Boston, MA 02111-1307, USA. */
blt 4f
beq 3f
cmp \dividend, \divisor
+ it hs
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
3: cmp \dividend, \divisor
+ it hs
subhs \dividend, \dividend, \divisor
mov \divisor, \divisor, lsr #1
4: cmp \dividend, \divisor
+ it hs
subhs \dividend, \dividend, \divisor
5:
.endm
@@ -209,6 +234,7 @@ ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
subs r2, r1, #1
+ it eq
moveq pc, lr
bcc Ldiv0
cmp r0, r1
@@ -221,7 +247,8 @@ ENTRY(__aeabi_uidiv)
mov r0, r2
mov pc, lr
-11: moveq r0, #1
+11: ite eq
+ moveq r0, #1
movne r0, #0
mov pc, lr
@@ -237,10 +264,14 @@ ENTRY(__umodsi3)
subs r2, r1, #1 @ compare divisor with 1
bcc Ldiv0
+ ite ne
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
+ it hi
tsthi r1, r2 @ see if divisor is power of 2
+ it eq
andeq r0, r0, r2
+ it ls
movls pc, lr
ARM_MOD_BODY r0, r1, r2, r3
@@ -255,10 +286,12 @@ ENTRY(__aeabi_idiv)
cmp r1, #0
eor ip, r0, r1 @ save the sign of the result.
beq Ldiv0
+ it mi
rsbmi r1, r1, #0 @ loops below use unsigned.
subs r2, r1, #1 @ division by 1 or -1 ?
beq 10f
movs r3, r0
+ it mi
rsbmi r3, r0, #0 @ positive dividend value
cmp r3, r1
bls 11f
@@ -268,14 +301,18 @@ ENTRY(__aeabi_idiv)
ARM_DIV_BODY r3, r1, r0, r2
cmp ip, #0
+ it mi
rsbmi r0, r0, #0
mov pc, lr
10: teq ip, r0 @ same sign ?
+ it mi
rsbmi r0, r0, #0
mov pc, lr
-11: movlo r0, #0
+11: it lo
+ movlo r0, #0
+ itt eq
moveq r0, ip, asr #31
orreq r0, r0, #1
mov pc, lr
@@ -284,6 +321,7 @@ ENTRY(__aeabi_idiv)
cmp ip, #0
mov r0, r3, lsr r2
+ it mi
rsbmi r0, r0, #0
mov pc, lr
@@ -294,19 +332,25 @@ ENTRY(__modsi3)
cmp r1, #0
beq Ldiv0
+ it mi
rsbmi r1, r1, #0 @ loops below use unsigned.
movs ip, r0 @ preserve sign of dividend
+ it mi
rsbmi r0, r0, #0 @ if negative make positive
subs r2, r1, #1 @ compare divisor with 1
+ ite ne
cmpne r0, r1 @ compare dividend with divisor
moveq r0, #0
+ it hi
tsthi r1, r2 @ see if divisor is power of 2
+ it eq
andeq r0, r0, r2
bls 10f
ARM_MOD_BODY r0, r1, r2, r3
10: cmp ip, #0
+ it mi
rsbmi r0, r0, #0
mov pc, lr
diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S
index f83d449141f7..57db3a265e5b 100644
--- a/arch/arm/lib/lshrdi3.S
+++ b/arch/arm/lib/lshrdi3.S
@@ -41,6 +41,7 @@ ENTRY(__aeabi_llsr)
subs r3, r2, #32
rsb ip, r2, #32
+ itett mi
movmi al, al, lsr r2
movpl al, ah, lsr r3
ARM( orrmi al, al, ah, lsl ip )
diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S
index 1da86991d700..0d1d596ad8cd 100644
--- a/arch/arm/lib/memchr.S
+++ b/arch/arm/lib/memchr.S
@@ -21,6 +21,7 @@ ENTRY(memchr)
teq r3, r1
bne 1b
sub r0, r0, #1
-2: movne r0, #0
+2: it ne
+ movne r0, #0
mov pc, lr
ENDPROC(memchr)
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index a9b9e2287a09..c7a810dee294 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -29,7 +29,12 @@
.endm
.macro ldr1b ptr reg cond=al abort
+ .ifnc \cond,al
+ it \cond
ldr\cond\()b \reg, [\ptr], #1
+ .else
+ ldrb \reg, [\ptr], #1
+ .endif
.endm
.macro str1w ptr reg abort
@@ -41,7 +46,12 @@
.endm
.macro str1b ptr reg cond=al abort
+ .ifnc \cond,al
+ it \cond
str\cond\()b \reg, [\ptr], #1
+ .else
+ strb \reg, [\ptr], #1
+ .endif
.endm
.macro enter reg1 reg2
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index 5025c863713d..191a5dc41596 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -29,7 +29,9 @@
ENTRY(memmove)
subs ip, r0, r1
+ it hi
cmphi r2, ip
+ it ls
bls memcpy
stmfd sp!, {r0, r4, lr}
@@ -72,6 +74,7 @@ ENTRY(memmove)
5: ands ip, r2, #28
rsb ip, ip, #32
+ it ne
addne pc, pc, ip @ C is always clear here
b 7f
6: nop
@@ -99,19 +102,27 @@ ENTRY(memmove)
7: ldmfd sp!, {r5 - r8}
8: movs r2, r2, lsl #31
+ it ne
ldrneb r3, [r1, #-1]!
+ itt cs
ldrcsb r4, [r1, #-1]!
ldrcsb ip, [r1, #-1]
+ it ne
strneb r3, [r0, #-1]!
+ itt cs
strcsb r4, [r0, #-1]!
strcsb ip, [r0, #-1]
ldmfd sp!, {r0, r4, pc}
9: cmp ip, #2
+ it gt
ldrgtb r3, [r1, #-1]!
+ it ge
ldrgeb r4, [r1, #-1]!
ldrb lr, [r1, #-1]!
+ it gt
strgtb r3, [r0, #-1]!
+ it ge
strgeb r4, [r0, #-1]!
subs r2, r2, ip
strb lr, [r0, #-1]!
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 761eefa76243..ef022bdc0235 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -19,7 +19,9 @@
1: subs r2, r2, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
+ it lt
strltb r1, [r0], #1 @ 1
+ it le
strleb r1, [r0], #1 @ 1
strb r1, [r0], #1 @ 1
add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
@@ -51,19 +53,23 @@ ENTRY(memset)
mov lr, r1
2: subs r2, r2, #64
+ itttt ge
stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
stmgeia r0!, {r1, r3, ip, lr}
bgt 2b
+ it eq
ldmeqfd sp!, {pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r2, #32
+ itt ne
stmneia r0!, {r1, r3, ip, lr}
stmneia r0!, {r1, r3, ip, lr}
tst r2, #16
+ it ne
stmneia r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
@@ -111,17 +117,21 @@ ENTRY(memset)
#endif
4: tst r2, #8
+ it ne
stmneia r0!, {r1, r3}
tst r2, #4
+ it ne
strne r1, [r0], #4
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r2, #2
+ itt ne
strneb r1, [r0], #1
strneb r1, [r0], #1
tst r2, #1
+ it ne
strneb r1, [r0], #1
mov pc, lr
ENDPROC(memset)
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index 3fbdef5f802a..a0e319a4c03f 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -21,7 +21,9 @@
1: subs r1, r1, #4 @ 1 do we have enough
blt 5f @ 1 bytes to align with?
cmp r3, #2 @ 1
+ it lt
strltb r2, [r0], #1 @ 1
+ it le
strleb r2, [r0], #1 @ 1
strb r2, [r0], #1 @ 1
add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3))
@@ -51,19 +53,23 @@ ENTRY(__memzero)
mov lr, r2 @ 1
3: subs r1, r1, #64 @ 1 write 32 bytes out per loop
+ itttt ge
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
stmgeia r0!, {r2, r3, ip, lr} @ 4
bgt 3b @ 1
+ it eq
ldmeqfd sp!, {pc} @ 1/2 quick exit
/*
* No need to correct the count; we're only testing bits from now on
*/
tst r1, #32 @ 1
+ itt ne
stmneia r0!, {r2, r3, ip, lr} @ 4
stmneia r0!, {r2, r3, ip, lr} @ 4
tst r1, #16 @ 1 16 bytes or more?
+ it ne
stmneia r0!, {r2, r3, ip, lr} @ 4
ldr lr, [sp], #4 @ 1
@@ -109,17 +115,21 @@ ENTRY(__memzero)
#endif
4: tst r1, #8 @ 1 8 bytes or more?
+ it ne
stmneia r0!, {r2, r3} @ 2
tst r1, #4 @ 1 4 bytes or more?
+ it ne
strne r2, [r0], #4 @ 1
/*
* When we get here, we've got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
5: tst r1, #2 @ 1 2 bytes or more?
+ itt ne
strneb r2, [r0], #1 @ 1
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
+ it ne
strneb r2, [r0], #1 @ 1
mov pc, lr @ 1
ENDPROC(__memzero)
diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S
index d8f2a1c1aea4..fd4014e54e37 100644
--- a/arch/arm/lib/strchr.S
+++ b/arch/arm/lib/strchr.S
@@ -18,9 +18,11 @@ ENTRY(strchr)
and r1, r1, #0xff
1: ldrb r2, [r0], #1
teq r2, r1
+ it ne
teqne r2, #0
bne 1b
teq r2, r1
+ ite ne
movne r0, #0
subeq r0, r0, #1
mov pc, lr
diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S
index 302f20cd2423..d7a9440de6b8 100644
--- a/arch/arm/lib/strrchr.S
+++ b/arch/arm/lib/strrchr.S
@@ -18,6 +18,7 @@ ENTRY(strrchr)
mov r3, #0
1: ldrb r2, [r0], #1
teq r2, r1
+ it eq
subeq r3, r0, #1
teq r2, #0
bne 1b
diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S
index 543d7094d18e..df66c76e8b29 100644
--- a/arch/arm/lib/testclearbit.S
+++ b/arch/arm/lib/testclearbit.S
@@ -15,6 +15,6 @@
ENTRY(_test_and_clear_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
ENTRY(_test_and_clear_bit_le)
- testop bicne, strneb
+ testop bicne, strneb, ne
ENDPROC(_test_and_clear_bit_be)
ENDPROC(_test_and_clear_bit_le)
diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S
index 0b3f390401ce..3938bdf446a6 100644
--- a/arch/arm/lib/testsetbit.S
+++ b/arch/arm/lib/testsetbit.S
@@ -15,6 +15,6 @@
ENTRY(_test_and_set_bit_be)
eor r0, r0, #0x18 @ big endian byte ordering
ENTRY(_test_and_set_bit_le)
- testop orreq, streqb
+ testop orreq, streqb, eq
ENDPROC(_test_and_set_bit_be)
ENDPROC(_test_and_set_bit_le)
diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S
index f0df6a91db04..503288955242 100644
--- a/arch/arm/lib/ucmpdi2.S
+++ b/arch/arm/lib/ucmpdi2.S
@@ -27,9 +27,13 @@
ENTRY(__ucmpdi2)
cmp xh, yh
+ it eq
cmpeq xl, yl
+ it lo
movlo r0, #0
+ it eq
moveq r0, #1
+ it hi
movhi r0, #2
mov pc, lr
@@ -40,9 +44,13 @@ ENDPROC(__ucmpdi2)
ENTRY(__aeabi_ulcmp)
cmp xh, yh
+ it eq
cmpeq xl, yl
+ it lo
movlo r0, #-1
+ it eq
moveq r0, #0
+ it hi
movhi r0, #1
mov pc, lr