diff options
author | Catalin Marinas <catalin.marinas@arm.com> | 2009-03-10 10:24:54 +0000 |
---|---|---|
committer | Catalin Marinas <catalin.marinas@arm.com> | 2009-03-10 10:24:54 +0000 |
commit | 77582cfa8a38fc71d1c46b3296a9f7ba4ad80275 (patch) | |
tree | 5d1e747b8d65aa5198d2203623800632e17685e9 | |
parent | 1745b660c1511279f83ec45e6404d484ba98e578 (diff) |
Thumb-2: Add IT instructions to the kernel assembly code
With modified GNU assembler, these instructions are automatically
generated. This patch is to be used if such gas isn't available.
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
54 files changed, 285 insertions, 15 deletions
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 7f8a12deeea8..72277d93b161 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -406,8 +406,10 @@ __setup_mmu: sub r3, r4, #16384 @ Page directory size orr r1, r1, #3 << 10 add r2, r3, #16384 1: cmp r1, r9 @ if virt > start of RAM + it hs orrhs r1, r1, #0x0c @ set cacheable, bufferable cmp r1, r10 @ if virt > end of RAM + it hs bichs r1, r1, #0x0c @ clear cacheable, bufferable str r1, [r0], #4 @ 1:1 mapping add r1, r1, #1048576 @@ -454,10 +456,12 @@ __armv7_mmu_cache_on: #ifdef CONFIG_MMU mrc p15, 0, r11, c0, c1, 4 @ read ID_MMFR0 tst r11, #0xf @ VMSA + it ne blne __setup_mmu mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer tst r11, #0xf @ VMSA + it ne mcrne p15, 0, r0, c8, c7, 0 @ flush I,D TLBs #endif mrc p15, 0, r0, c1, c0, 0 @ read control reg @@ -467,6 +471,7 @@ __armv7_mmu_cache_on: orr r0, r0, #1 << 25 @ big-endian page tables #endif #ifdef CONFIG_MMU + itttt ne orrne r0, r0, #1 @ MMU enabled movne r1, #-1 mcrne p15, 0, r3, c2, c0, 0 @ load page table pointer @@ -567,6 +572,7 @@ call_cache_fn: adr r12, proc_types ldr r2, [r12, #4] @ get mask eor r1, r1, r6 @ (real ^ match) tst r1, r2 @ & mask + itt eq ARM( addeq pc, r12, r3 ) @ call cache function THUMB( addeq r12, r3 ) THUMB( moveq pc, r12 ) @ call cache function @@ -810,6 +816,7 @@ __armv4_mpu_cache_flush: bcs 1b @ segments 7 to 0 teq r2, #0 + it ne mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c7, c10, 4 @ drain WB mov pc, lr @@ -901,6 +908,7 @@ __armv4_mmu_cache_flush: mov r2, #1024 mov r2, r2, lsl r1 @ base dcache size *2 tst r3, #1 << 14 @ test M bit + it ne addne r2, r2, r2, lsr #1 @ +1/2 size if M == 1 mov r3, r3, lsr #12 and r3, r3, #3 diff --git a/arch/arm/include/asm/atomic.h b/arch/arm/include/asm/atomic.h index 325f881ccb50..8af16337982b 100644 --- a/arch/arm/include/asm/atomic.h +++ b/arch/arm/include/asm/atomic.h @@ -90,6 +90,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) "ldrex %1, [%2]\n" "mov %0, #0\n" "teq %1, %3\n" + "it eq\n" "strexeq %0, %4, [%2]\n" : "=&r" (res), "=&r" (oldval) : "r" (&ptr->counter), "Ir" (old), "r" (new) diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h index 6dcc16430868..e4b9f6e17365 100644 --- a/arch/arm/include/asm/checksum.h +++ b/arch/arm/include/asm/checksum.h @@ -73,6 +73,7 @@ ip_fast_csum(const void *iph, unsigned int ihl) 1: adcs %0, %0, %3 \n\ ldr %3, [%1], #4 \n\ tst %2, #15 @ do this carefully \n\ + it ne \n\ subne %2, %2, #1 @ without destroying \n\ bne 1b @ the carry flag \n\ adcs %0, %0, %3 \n\ diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 9ee743b95de8..0efee0f0c08e 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -99,6 +99,7 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: ldrt %0, [%3]\n" " teq %0, %1\n" + " it eq\n" "2: streqt %2, [%3]\n" "3:\n" " .section __ex_table,\"a\"\n" diff --git a/arch/arm/include/asm/locks.h b/arch/arm/include/asm/locks.h index ef4c897772d1..c6f0c9363f0f 100644 --- a/arch/arm/include/asm/locks.h +++ b/arch/arm/include/asm/locks.h @@ -24,6 +24,7 @@ " teq ip, #0\n" \ " bne 1b\n" \ " teq lr, #0\n" \ +" itt mi\n" \ " movmi ip, %0\n" \ " blmi " #fail \ : \ @@ -43,6 +44,7 @@ " teq ip, #0\n" \ " bne 1b\n" \ " teq lr, #0\n" \ +" itet mi\n" \ " movmi ip, %1\n" \ " movpl ip, #0\n" \ " blmi " #fail "\n" \ @@ -65,6 +67,7 @@ " teq ip, #0\n" \ " bne 1b\n" \ " cmp lr, #0\n" \ +" itt le\n" \ " movle ip, %0\n" \ " blle " #wake \ : \ @@ -91,6 +94,7 @@ " teq ip, #0\n" \ " bne 1b\n" \ " teq lr, #0\n" \ +" itt ne\n" \ " movne ip, %0\n" \ " blne " #fail \ : \ @@ -150,6 +154,7 @@ " subs lr, lr, %1\n" \ " str lr, [%0]\n" \ " msr cpsr_c, ip\n" \ +" itt mi\n" \ " movmi ip, %0\n" \ " blmi " #fail \ : \ @@ -170,6 +175,7 @@ " subs lr, lr, %2\n" \ " str lr, [%1]\n" \ " msr cpsr_c, ip\n" \ +" itet mi\n" \ " movmi ip, %1\n" \ " movpl ip, #0\n" \ " blmi " #fail "\n" \ @@ -193,6 +199,7 @@ " adds lr, lr, %1\n" \ " str lr, [%0]\n" \ " msr cpsr_c, ip\n" \ +" itt le\n" \ " movle ip, %0\n" \ " blle " #wake \ : \ @@ -220,6 +227,7 @@ " subs lr, lr, %1\n" \ " str lr, [%0]\n" \ " msr cpsr_c, ip\n" \ +" itt ne\n" \ " movne ip, %0\n" \ " blne " #fail \ : \ @@ -239,6 +247,7 @@ " adds lr, lr, %1\n" \ " str lr, [%0]\n" \ " msr cpsr_c, ip\n" \ +" itt cs\n" \ " movcs ip, %0\n" \ " blcs " #wake \ : \ @@ -262,6 +271,7 @@ " adds lr, lr, %1\n" \ " str lr, [%0]\n" \ " msr cpsr_c, ip\n" \ +" itt eq\n" \ " moveq ip, %0\n" \ " bleq " #wake \ : \ diff --git a/arch/arm/include/asm/mutex.h b/arch/arm/include/asm/mutex.h index 93226cf23ae0..5c3ede822a6e 100644 --- a/arch/arm/include/asm/mutex.h +++ b/arch/arm/include/asm/mutex.h @@ -111,8 +111,11 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *)) "1: ldrex %0, [%3] \n\t" "subs %1, %0, #1 \n\t" + "it eq\n\t" "strexeq %2, %1, [%3] \n\t" + "it lt\n\t" "movlt %0, #0 \n\t" + "it eq\n\t" "cmpeq %2, #0 \n\t" "bgt 1b " diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index 2b41ebbfa7ff..bc95b3c02f49 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -31,7 +31,10 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) "1: ldrex %0, [%1]\n" " teq %0, #0\n" #ifdef CONFIG_CPU_32v6K +" itee ne\n" " wfene\n" +#else +" itt eq\n" #endif " strexeq %0, %2, [%1]\n" " teqeq %0, #0\n" @@ -50,6 +53,7 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) __asm__ __volatile__( " ldrex %0, [%1]\n" " teq %0, #0\n" +" it eq\n" " strexeq %0, %2, [%1]" : "=&r" (tmp) : "r" (&lock->lock), "r" (1) @@ -94,7 +98,10 @@ static inline void __raw_write_lock(raw_rwlock_t *rw) "1: ldrex %0, [%1]\n" " teq %0, #0\n" #ifdef CONFIG_CPU_32v6K +" ite ne\n" " wfene\n" +#else +" it eq\n" #endif " strexeq %0, %2, [%1]\n" " teq %0, #0\n" @@ -113,6 +120,7 @@ static inline int __raw_write_trylock(raw_rwlock_t *rw) __asm__ __volatile__( "1: ldrex %0, [%1]\n" " teq %0, #0\n" +" it eq\n" " strexeq %0, %2, [%1]" : "=&r" (tmp) : "r" (&rw->lock), "r" (0x80000000) @@ -163,6 +171,11 @@ static inline void __raw_read_lock(raw_rwlock_t *rw) __asm__ __volatile__( "1: ldrex %0, [%2]\n" " adds %0, %0, #1\n" +#ifdef CONFIG_CPU_32v6K +" itet pl\n" +#else +" itt pl\n" +#endif " strexpl %1, %0, [%2]\n" #ifdef CONFIG_CPU_32v6K " wfemi\n" @@ -190,6 +203,7 @@ static inline void __raw_read_unlock(raw_rwlock_t *rw) " bne 1b" #ifdef CONFIG_CPU_32v6K "\n cmp %0, #0\n" +" itt eq\n" " mcreq p15, 0, %0, c7, c10, 4\n" " seveq" #endif @@ -205,6 +219,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *rw) __asm__ __volatile__( "1: ldrex %0, [%2]\n" " adds %0, %0, #1\n" +" it pl\n" " strexpl %1, %0, [%2]\n" : "=&r" (tmp), "+r" (tmp2) : "r" (&rw->lock) diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 16af317c3bad..a6367611cd70 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -338,6 +338,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, " ldrexb %1, [%2]\n" " mov %0, #0\n" " teq %1, %3\n" + " it eq\n" " strexeqb %0, %4, [%2]\n" : "=&r" (res), "=&r" (oldval) : "r" (ptr), "Ir" (old), "r" (new) @@ -351,6 +352,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, " ldrex %1, [%2]\n" " mov %0, #0\n" " teq %1, %3\n" + " it eq\n" " strexeq %0, %4, [%2]\n" : "=&r" (res), "=&r" (oldval) : "r" (ptr), "Ir" (old), "r" (new) diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 2bafb1dc3ca8..4c4295a37f47 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -69,7 +69,7 @@ static inline void set_fs(mm_segment_t fs) #define __addr_ok(addr) ({ \ unsigned long flag; \ - __asm__("cmp %2, %0; movlo %0, #0" \ + __asm__("cmp %2, %0; it lo; movlo %0, #0" \ : "=&r" (flag) \ : "0" (current_thread_info()->addr_limit), "r" (addr) \ : "cc"); \ @@ -79,7 +79,7 @@ static inline void set_fs(mm_segment_t fs) #define __range_ok(addr,size) ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ - __asm__("adds %1, %2, %3; sbcccs %1, %1, %0; movcc %0, #0" \ + __asm__("adds %1, %2, %3; it cc; sbcccs %1, %1, %0; it cc; movcc %0, #0" \ : "=&r" (flag), "=&r" (roksum) \ : "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \ : "cc"); \ diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h index 422f3cc204a2..b30d7022c52a 100644 --- a/arch/arm/include/asm/vfpmacros.h +++ b/arch/arm/include/asm/vfpmacros.h @@ -25,6 +25,7 @@ VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? + ite eq ldceql p11, cr0, [\base],#32*4 @ FLDMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif @@ -41,6 +42,7 @@ VFPFMRX \tmp, MVFR0 @ Media and VFP Feature Register 0 and \tmp, \tmp, #MVFR0_A_SIMD_MASK @ A_SIMD field cmp \tmp, #2 @ 32 x 64bit registers? + ite eq stceql p11, cr0, [\base],#32*4 @ FSTMIAD \base!, {d16-d31} addne \base, \base, #32*4 @ step over unused register space #endif diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index f53c58290543..e95a5b6657f8 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -105,6 +105,7 @@ printhex: adr r2, hexbuf 1: and r1, r0, #15 mov r0, r0, lsr #4 cmp r1, #10 + ite lt addlt r1, r1, #'0' addge r1, r1, #'a' - 10 strb r1, [r3, #-1]! @@ -123,9 +124,11 @@ ENTRY(printascii) senduart r1, r3 busyuart r2, r3 teq r1, #'\n' + itt eq moveq r1, #'\r' beq 1b 2: teq r0, #0 + itt ne ldrneb r1, [r0], #1 teqne r1, #0 bne 1b diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 3ecaa2e0718c..9970015974f9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -30,6 +30,7 @@ .macro irq_handler get_irqnr_preamble r5, lr 1: get_irqnr_and_base r0, r6, r5, lr + ittt ne movne r1, sp @ @ routine called with r0 = irq number, r1 = struct pt_regs * @@ -51,12 +52,14 @@ * preserved from get_irqnr_and_base above */ test_for_ipi r0, r6, r5, lr + ittt ne movne r0, sp adrne lr, BSYM(1b) bne do_IPI #ifdef CONFIG_LOCAL_TIMERS test_for_ltirq r0, r6, r5, lr + ittt ne movne r0, sp adrne lr, BSYM(1b) bne do_local_timer @@ -144,6 +147,7 @@ ENDPROC(__und_invalid) #else SPFIX( tst sp, #4 ) #endif + SPFIX( it eq ) SPFIX( subeq sp, sp, #4 ) stmia sp, {r1 - r12} @@ -151,6 +155,7 @@ ENDPROC(__und_invalid) add r5, sp, #S_SP - 4 @ here for interlock avoidance mov r4, #-1 @ "" "" "" "" add r0, sp, #(S_FRAME_SIZE + \stack_hole - 4) + SPFIX( it eq ) SPFIX( addeq r0, r0, #4 ) str r1, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack @@ -178,6 +183,7 @@ __dabt_svc: @ mrs r9, cpsr tst r3, #PSR_I_BIT + it eq biceq r9, r9, #PSR_I_BIT @ @@ -236,8 +242,10 @@ __irq_svc: str r8, [tsk, #TI_PREEMPT] @ restore preempt count ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 + it ne movne r0, #0 @ force flags to 0 tst r0, #_TIF_NEED_RESCHED + it ne blne svc_preempt #endif ldr r2, [sp, #S_PSR] @ irqs are already disabled @@ -257,6 +265,7 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED + it eq moveq pc, r8 @ go again b 1b #endif @@ -308,6 +317,7 @@ __pabt_svc: @ mrs r9, cpsr tst r3, #PSR_I_BIT + it eq biceq r9, r9, #PSR_I_BIT @ @@ -466,6 +476,7 @@ __irq_usr: ldr r0, [tsk, #TI_PREEMPT] str r8, [tsk, #TI_PREEMPT] teq r0, r7 + itt ne ARM( strne r0, [r0, -r0] ) THUMB( movne r0, #0 ) THUMB( strne r0, [r0] ) @@ -495,9 +506,9 @@ __und_usr: adr r9, BSYM(ret_from_exception) adr lr, BSYM(__und_usr_unknown) tst r3, #PSR_T_BIT @ Thumb mode? + itet eq subeq r4, r2, #4 @ ARM instr at LR - 4 subne r4, r2, #2 @ Thumb instr at LR - 2 - it eq @ explicit IT needed for the label 1: ldreqt r0, [r4] #ifdef CONFIG_CPU_ENDIAN_BE8 rev r0, r0 @ little endian instruction @@ -589,6 +600,7 @@ call_fpe: 1: #endif tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 + ite ne tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710) and r8, r0, #0x0f000000 @ mask out op-code bits @@ -974,6 +986,7 @@ kuser_cmpxchg_fixup: #endif 1: ldrex r3, [r2] subs r3, r3, r0 + it eq strexeq r3, r1, [r2] teqeq r3, #1 beq 1b diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index dee4cea60aa7..f89b9ca7f010 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -186,6 +186,7 @@ ENTRY(vector_swi) */ #ifdef CONFIG_ARM_THUMB tst r8, #PSR_T_BIT + ite ne movne r10, #0 @ no thumb OABI emulation ldreq r10, [lr, #-4] @ get SWI instruction #else @@ -244,6 +245,7 @@ ENTRY(vector_swi) * get the old ABI syscall table address. */ bics r10, r10, #0xff000000 + itt ne eorne scno, r10, #__NR_OABI_SYSCALL_BASE ldrne tbl, =sys_oabi_call_table #elif !defined(CONFIG_AEABI) @@ -257,6 +259,7 @@ ENTRY(vector_swi) cmp scno, #NR_syscalls @ check upper syscall limit adr lr, BSYM(ret_fast_syscall) @ return address + it cc ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF @@ -281,6 +284,7 @@ __sys_trace: mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit + itt cc ldmccia r1, {r0 - r3} @ have to reload r0 - r3 ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine b 2b @@ -326,11 +330,14 @@ ENTRY(sys_call_table) sys_syscall: bic scno, r0, #__NR_OABI_SYSCALL_BASE cmp scno, #__NR_syscall - __NR_SYSCALL_BASE + it ne cmpne scno, #NR_syscalls @ check range + itttt lo stmloia sp, {r5, r6} @ shuffle args movlo r0, r1 movlo r1, r2 movlo r2, r3 + itt lo movlo r3, r4 ldrlo pc, [tbl, scno, lsl #2] b sys_ni_syscall @@ -384,12 +391,14 @@ ENDPROC(sys_sigaltstack_wrapper) sys_statfs64_wrapper: teq r1, #88 + it eq moveq r1, #84 b sys_statfs64 ENDPROC(sys_statfs64_wrapper) sys_fstatfs64_wrapper: teq r1, #88 + it eq moveq r1, #84 b sys_fstatfs64 ENDPROC(sys_fstatfs64_wrapper) @@ -401,6 +410,7 @@ ENDPROC(sys_fstatfs64_wrapper) sys_mmap2: #if PAGE_SHIFT > 12 tst r5, #PGOFF_MASK + ittt eq moveq r5, r5, lsr #PAGE_SHIFT - 12 streq r5, [sp, #4] beq do_mmap2 diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 4f1dd91a4e09..84c16d241bf7 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -104,6 +104,7 @@ stmdb r0, {r1, \rpsr} @ rfe context ldmia sp, {r0 - r12} ldr lr, [sp, #S_LR] + ite eq addeq sp, sp, #S_FRAME_SIZE - 8 @ aligned addne sp, sp, #S_FRAME_SIZE - 4 @ not aligned rfeia sp! diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 53c403fa2c6d..81be5d6a933d 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -42,6 +42,7 @@ __mmap_switched: ldmia r3!, {r4, r5, r6, r7} cmp r4, r5 @ Copy data segment if needed + itttt ne 1: cmpne r5, r6 ldrne fp, [r4], #4 strne fp, [r5], #4 @@ -49,6 +50,7 @@ __mmap_switched: mov fp, #0 @ Clear BSS (and zero fp) 1: cmp r6, r7 + itt cc strcc fp, [r6],#4 bcc 1b diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 437df3a5ac4d..167bb4ef65ab 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -116,6 +116,7 @@ ENTRY(secondary_startup) mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type movs r10, r5 @ invalid processor? + it eq moveq r0, #'p' @ yes, error 'p' beq __error @@ -255,6 +256,7 @@ __create_page_tables: add r6, r4, r6, lsr #18 1: cmp r0, r6 add r3, r3, #1 << 20 + it ls strls r3, [r0], #4 bls 1b @@ -298,6 +300,7 @@ __create_page_tables: add r0, r4, r3 rsb r3, r3, #0x4000 @ PTRS_PER_PGD*sizeof(long) cmp r3, #0x0800 @ limit to 512MB + it hi movhi r3, #0x0800 add r6, r0, r3 ldr r3, [r8, #MACHINFO_PHYSIO] diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 638deb13da1c..b18944b85e4b 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -41,6 +41,7 @@ ENTRY(__aeabi_llsl) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi ah, ah, lsl r2 movpl ah, al, lsl r3 ARM( orrmi ah, ah, al, lsr ip ) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 015e8aa5a1d1..0d5ace74dd9d 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -41,6 +41,7 @@ ENTRY(__aeabi_lasr) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi al, al, lsr r2 movpl al, ah, asr r3 ARM( orrmi al, al, ah, lsl ip ) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index aaf7220d9e30..42e62dd54188 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -38,6 +38,7 @@ ENDPROC(c_backtrace) beq no_frame @ we have no stack frames tst r1, #0x10 @ 26 or 32-bit mode? + itte eq ARM( moveq mask, #0xfc000003 ) THUMB( moveq mask, #0xfc000000 ) THUMB( orreq mask, #0x03 ) @@ -75,6 +76,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1003: ldr r2, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 @ adjust saved 'pc' back one teq r3, r2, lsr #10 @ instruction + ite ne subne r0, sv_pc, #4 @ allow for mov subeq r0, sv_pc, #8 @ allow for mov + stmia @@ -86,6 +88,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions ldr r1, [sv_pc, #-4] @ if stmfd sp!, {args} exists, ldr r3, .Ldsi+4 teq r3, r1, lsr #10 + ittt eq ldreq r0, [frame, #-8] @ get sp subeq r0, r0, #4 @ point at the last arg bleq .Ldumpstm @ dump saved registers @@ -93,6 +96,7 @@ for_each_frame: tst frame, mask @ Check for address exceptions 1004: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, ip, lr, pc} ldr r3, .Ldsi @ instruction exists, teq r3, r1, lsr #10 + itt eq subeq r0, frame, #16 bleq .Ldumpstm @ dump saved registers @@ -134,6 +138,7 @@ ENDPROC(c_backtrace) beq 2f add r7, r7, #1 teq r7, #6 + itte eq moveq r7, #1 moveq r1, #'\n' movne r1, #' ' @@ -144,6 +149,7 @@ ENDPROC(c_backtrace) 2: subs reg, reg, #1 bpl 1b teq r7, #0 + itt ne adrne r0, .Lcr blne printk ldmfd sp!, {instr, reg, stack, r7, pc} diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 2e787d40d599..5e34c1238103 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -13,18 +13,22 @@ mov pc, lr .endm - .macro testop, instr, store + .macro testop, instr, store, cond=al and r3, r0, #7 @ Get bit offset mov r2, #1 add r1, r1, r0, lsr #3 @ Get byte offset mov r3, r2, lsl r3 @ create mask 1: ldrexb r2, [r1] ands r0, r2, r3 @ save old value of bit - \instr r2, r2, r3 @ toggle bit + .ifnc \cond,al + it \cond + .endif + \instr r2, r2, r3 @ toggle bit strexb ip, r2, [r1] cmp ip, #0 bne 1b cmp r0, #0 + it ne movne r0, #1 2: mov pc, lr .endm @@ -49,7 +53,7 @@ * Note: we can trivially conditionalise the store instruction * to avoid dirtying the data cache. */ - .macro testop, instr, store + .macro testop, instr, store, cond=al add r1, r1, r0, lsr #3 and r3, r0, #7 mov r0, #1 diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index e4fe124acedc..9a7a16426428 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -69,6 +69,9 @@ .endm .macro str1b ptr reg cond=al abort + .ifnc \cond,al + it \cond + .endif str\cond\()b \reg, [\ptr], #1 .endm diff --git a/arch/arm/lib/copy_page.S b/arch/arm/lib/copy_page.S index 6ae04db1ca4f..1c57a034cadf 100644 --- a/arch/arm/lib/copy_page.S +++ b/arch/arm/lib/copy_page.S @@ -39,8 +39,10 @@ ENTRY(copy_page) ldmia r1!, {r3, r4, ip, lr} @ 4 subs r2, r2, #1 @ 1 stmia r0!, {r3, r4, ip, lr} @ 4 + itt gt ldmgtia r1!, {r3, r4, ip, lr} @ 4 bgt 1b @ 1 + PLD( itt eq ) PLD( ldmeqia r1!, {r3, r4, ip, lr} ) PLD( beq 2b ) ldmfd sp!, {r4, pc} @ 3 diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index 805e3f8fb007..8e8fc03f55bd 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -109,6 +109,7 @@ #if LDR1W_SHIFT > 0 lsl ip, ip, #LDR1W_SHIFT #endif + it ne addne pc, pc, ip @ C is always clear here b 7f 6: diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 44354fdba350..dc0fe7391527 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -53,6 +53,9 @@ .endm .macro ldr1b ptr reg cond=al abort + .ifnc \cond,al + it \cond + .endif ldr\cond\()b \reg, [\ptr], #1 .endm diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 31d3cb34740d..e9a504e4302e 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -39,6 +39,7 @@ td3 .req lr /* we must have at least one byte. */ tst buf, #1 @ odd address? + itttt ne movne sum, sum, ror #8 ldrneb td0, [buf], #1 subne len, len, #1 @@ -68,25 +69,30 @@ td3 .req lr bne .Lless8_wordlp .Lless8_byte: tst len, #1 @ odd number of bytes + itt ne ldrneb td0, [buf], #1 @ include last byte adcnes sum, sum, td0, put_byte_0 @ update checksum .Ldone: adc r0, sum, #0 @ collect up the last carry ldr td0, [sp], #4 tst td0, #1 @ check buffer alignment + it ne movne r0, r0, ror #8 @ rotate checksum by 8 bits ldr pc, [sp], #4 @ return .Lnot_aligned: tst buf, #1 @ odd address + ittt ne ldrneb td0, [buf], #1 @ make even subne len, len, #1 adcnes sum, sum, td0, put_byte_1 @ update checksum tst buf, #2 @ 32-bit aligned? #if __LINUX_ARM_ARCH__ >= 4 + itt ne ldrneh td0, [buf], #2 @ make 32-bit aligned subne len, len, #2 #else + itttt ne ldrneb td0, [buf], #1 ldrneb ip, [buf], #1 subne len, len, #2 @@ -96,6 +102,7 @@ td3 .req lr orrne td0, ip, td0, lsl #8 #endif #endif + it ne adcnes sum, sum, td0 @ update checksum mov pc, lr @@ -105,10 +112,12 @@ ENTRY(csum_partial) blo .Lless8 @ 8 bytes to copy. tst buf, #1 + it ne movne sum, sum, ror #8 adds sum, sum, #0 @ C = 0 tst buf, #3 @ Test destination alignment + it ne blne .Lnot_aligned @ align destination, return here 1: bics ip, len, #31 diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d620a5f22a09..8e1c141b6524 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -40,6 +40,7 @@ sum .req r3 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst dst, #2 + it eq moveq pc, lr @ dst is now 32bit aligned .Ldst_16bit: load2b r8, ip @@ -94,6 +95,7 @@ FN_ENTRY adds sum, sum, #0 @ C = 0 tst dst, #3 @ Test destination alignment + it ne blne .Ldst_unaligned @ align destination, return here /* @@ -147,6 +149,7 @@ FN_ENTRY strb r5, [dst], #1 mov r5, r4, get_byte_2 .Lexit: tst len, #1 + ittt ne strneb r5, [dst], #1 andne r5, r5, #255 adcnes sum, sum, r5, put_byte_0 @@ -160,6 +163,7 @@ FN_ENTRY .Ldone: adc r0, sum, #0 ldr sum, [sp, #0] @ dst tst sum, #1 + it ne movne r0, r0, ror #8 load_regs diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index fd0e9dcd9fdc..152ed83480f7 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -77,6 +77,7 @@ add r2, r2, r1 mov r0, #0 @ zero the buffer 9002: teq r2, r1 + it ne strneb r0, [r1], #1 bne 9002b load_regs diff --git a/arch/arm/lib/delay.S b/arch/arm/lib/delay.S index 8d6a8762ab88..fcd87ffe2b1d 100644 --- a/arch/arm/lib/delay.S +++ b/arch/arm/lib/delay.S @@ -31,6 +31,7 @@ ENTRY(__const_udelay) @ 0 <= r0 <= 0x7fffff06 mov r2, r2, lsr #10 @ max = 0x00007fff mul r0, r2, r0 @ max = 2^32-1 movs r0, r0, lsr #6 + it eq moveq pc, lr /* @@ -58,6 +59,7 @@ ENTRY(__delay) movls pc, lr subs r0, r0, #1 #endif + it hi bhi __delay mov pc, lr ENDPROC(__udelay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index faa7748142da..d02268ac7baf 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -84,8 +84,10 @@ ENTRY(__do_div64) @ The division loop for needed upper bit positions. @ Break out early if dividend reaches 0. 2: cmp xh, yl + itt cs orrcs yh, yh, ip subcss xh, xh, yl + it ne movnes ip, ip, lsr #1 mov yl, yl, lsr #1 bne 2b @@ -93,7 +95,9 @@ ENTRY(__do_div64) @ See if we need to handle lower 32-bit result. 3: cmp xh, #0 mov yl, #0 + it eq cmpeq xl, r4 + itt lo movlo xh, xl movlo pc, lr @@ -104,7 +108,9 @@ ENTRY(__do_div64) 4: movs xl, xl, lsl #1 adcs xh, xh, xh beq 6f + it cc cmpcc xh, r4 + itt cs 5: orrcs yl, yl, ip subcs xh, xh, r4 movs ip, ip, lsr #1 @@ -116,6 +122,7 @@ ENTRY(__do_div64) @ Otherwise, if lower part is also null then we are done. 6: bcs 5b cmp xl, #0 + it eq moveq pc, lr @ We still have remainer bits in the low part. Bring them up. @@ -185,7 +192,8 @@ ENTRY(__do_div64) mov pc, lr @ eq -> division by 1: obvious enough... -9: moveq yl, xl +9: itttt eq + moveq yl, xl moveq yh, xh moveq xh, #0 moveq pc, lr diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 9f4238987fe9..45229e46ed5b 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -12,12 +12,15 @@ .Linsb_align: rsb ip, ip, #4 cmp ip, r2 + it gt movgt ip, r2 cmp ip, #2 ldrb r3, [r0] strb r3, [r1], #1 + itt ge ldrgeb r3, [r0] strgeb r3, [r1], #1 + itt gt ldrgtb r3, [r0] strgtb r3, [r1], #1 subs r2, r2, ip @@ -25,6 +28,7 @@ ENTRY(__raw_readsb) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne .Linsb_align @@ -72,6 +76,7 @@ ENTRY(__raw_readsb) bpl .Linsb_16_lp tst r2, #15 + it eq ldmeqfd sp!, {r4 - r6, pc} .Linsb_no_16: tst r2, #8 @@ -109,13 +114,16 @@ ENTRY(__raw_readsb) str r3, [r1], #4 .Linsb_no_4: ands r2, r2, #3 + it eq ldmeqfd sp!, {r4 - r6, pc} cmp r2, #2 ldrb r3, [r0] strb r3, [r1], #1 + itt ge ldrgeb r3, [r0] strgeb r3, [r1], #1 + itt gt ldrgtb r3, [r0] strgtb r3, [r1] diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 5fb97e7f9f4b..1f02e66d079c 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -12,6 +12,7 @@ ENTRY(__raw_readsl) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne 3f @@ -28,9 +29,11 @@ ENTRY(__raw_readsl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 + ittt cs ldrcs r3, [r0, #0] ldrcs ip, [r0, #0] stmcsia r1!, {r3, ip} + itt ne ldrne r3, [r0, #0] strne r3, [r1, #0] mov pc, lr @@ -48,6 +51,7 @@ ENTRY(__raw_readsl) 4: subs r2, r2, #1 mov ip, r3, pull #24 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #8 strne ip, [r1], #4 @@ -56,6 +60,7 @@ ENTRY(__raw_readsl) 5: subs r2, r2, #1 mov ip, r3, pull #16 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #16 strne ip, [r1], #4 @@ -64,6 +69,7 @@ ENTRY(__raw_readsl) 6: subs r2, r2, #1 mov ip, r3, pull #8 + itttt ne ldrne r3, [r0] orrne ip, ip, r3, push #24 strne ip, [r1], #4 diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index 1f393d42593d..9db32f0541da 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -26,6 +26,7 @@ ENTRY(__raw_readsw) teq r2, #0 + it eq moveq pc, lr tst r1, #3 bne .Linsw_align @@ -76,7 +77,8 @@ ENTRY(__raw_readsw) pack r3, r3, ip str r3, [r1], #4 -.Lno_insw_2: ldrneh r3, [r0] +.Lno_insw_2: itt ne + ldrneh r3, [r0] strneh r3, [r1] ldmfd sp!, {r4, r5, pc} @@ -94,6 +96,7 @@ ENTRY(__raw_readsw) #endif .Linsw_noalign: stmfd sp!, {r4, lr} + it cc ldrccb ip, [r1, #-1]! bcc 1f @@ -121,6 +124,7 @@ ENTRY(__raw_readsw) 3: tst r2, #1 strb ip, [r1], #1 + itttt ne ldrneh ip, [r0] _BE_ONLY_( movne ip, ip, ror #8 ) strneb ip, [r1], #1 diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index 68b92f4acaeb..5fad6b0c7f05 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -32,12 +32,15 @@ .Loutsb_align: rsb ip, ip, #4 cmp ip, r2 + it gt movgt ip, r2 cmp ip, #2 ldrb r3, [r1], #1 strb r3, [r0] + itt ge ldrgeb r3, [r1], #1 strgeb r3, [r0] + itt gt ldrgtb r3, [r1], #1 strgtb r3, [r0] subs r2, r2, ip @@ -45,6 +48,7 @@ ENTRY(__raw_writesb) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne .Loutsb_align @@ -64,6 +68,7 @@ ENTRY(__raw_writesb) bpl .Loutsb_16_lp tst r2, #15 + it eq ldmeqfd sp!, {r4, r5, pc} .Loutsb_no_16: tst r2, #8 @@ -80,13 +85,16 @@ ENTRY(__raw_writesb) outword r3 .Loutsb_no_4: ands r2, r2, #3 + it eq ldmeqfd sp!, {r4, r5, pc} cmp r2, #2 ldrb r3, [r1], #1 strb r3, [r0] + itt ge ldrgeb r3, [r1], #1 strgeb r3, [r0] + itt gt ldrgtb r3, [r1] strgtb r3, [r0] diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index 8d3b7813725c..ced1d9169090 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -12,6 +12,7 @@ ENTRY(__raw_writesl) teq r2, #0 @ do we have to check for the zero len? + it eq moveq pc, lr ands ip, r1, #3 bne 3f @@ -28,10 +29,14 @@ ENTRY(__raw_writesl) bpl 1b ldmfd sp!, {r4, lr} 2: movs r2, r2, lsl #31 + itt cs ldmcsia r1!, {r3, ip} strcs r3, [r0, #0] + it ne ldrne r3, [r1, #0] + it cs strcs ip, [r0, #0] + it ne strne r3, [r0, #0] mov pc, lr diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index ff4f71b579ee..bb8530310ff3 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -31,6 +31,7 @@ ENTRY(__raw_writesw) teq r2, #0 + it eq moveq pc, lr ands r3, r1, #3 bne .Loutsw_align @@ -61,7 +62,8 @@ ENTRY(__raw_writesw) ldr r3, [r1], #4 outword r3 -.Lno_outsw_2: ldrneh r3, [r1] +.Lno_outsw_2: itt ne + ldrneh r3, [r1] strneh r3, [r0] ldmfd sp!, {r4, r5, pc} @@ -79,6 +81,7 @@ ENTRY(__raw_writesw) THUMB( rsb r3, r3, #0 ) THUMB( ldr r3, [r1, r3] ) THUMB( sub r1, r3 ) + it cs subcs r2, r2, #1 bcs 2f subs r2, r2, #2 @@ -94,7 +97,8 @@ ENTRY(__raw_writesw) bpl 1b tst r2, #1 -3: movne ip, r3, lsr #8 +3: itt ne + movne ip, r3, lsr #8 strneh ip, [r0] mov pc, lr ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index 67964bcfc854..1eb73e769c6c 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -56,6 +56,7 @@ Boston, MA 02111-1307, USA. */ @ at the left end of each 4 bit nibbles in the division loop @ to save one loop in most cases. tst \divisor, #0xe0000000 + itte eq moveq \divisor, \divisor, lsl #3 moveq \curbit, #8 movne \curbit, #1 @@ -65,6 +66,7 @@ Boston, MA 02111-1307, USA. */ @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 movlo \curbit, \curbit, lsl #4 @@ -73,6 +75,7 @@ Boston, MA 02111-1307, USA. */ @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 movlo \curbit, \curbit, lsl #1 @@ -84,19 +87,25 @@ Boston, MA 02111-1307, USA. */ @ Division loop 1: cmp \dividend, \divisor + itt hs subhs \dividend, \dividend, \divisor orrhs \result, \result, \curbit cmp \dividend, \divisor, lsr #1 + itt hs subhs \dividend, \dividend, \divisor, lsr #1 orrhs \result, \result, \curbit, lsr #1 cmp \dividend, \divisor, lsr #2 + itt hs subhs \dividend, \dividend, \divisor, lsr #2 orrhs \result, \result, \curbit, lsr #2 cmp \dividend, \divisor, lsr #3 + itt hs subhs \dividend, \dividend, \divisor, lsr #3 orrhs \result, \result, \curbit, lsr #3 cmp \dividend, #0 @ Early termination? + it ne movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? + it ne movne \divisor, \divisor, lsr #4 bne 1b @@ -113,19 +122,24 @@ Boston, MA 02111-1307, USA. */ #else cmp \divisor, #(1 << 16) + itt hs movhs \divisor, \divisor, lsr #16 movhs \order, #16 + it lo movlo \order, #0 cmp \divisor, #(1 << 8) + itt hs movhs \divisor, \divisor, lsr #8 addhs \order, \order, #8 cmp \divisor, #(1 << 4) + itt hs movhs \divisor, \divisor, lsr #4 addhs \order, \order, #4 cmp \divisor, #(1 << 2) + ite hi addhi \order, \order, #3 addls \order, \order, \divisor, lsr #1 @@ -152,6 +166,7 @@ Boston, MA 02111-1307, USA. */ @ division loop. Continue shifting until the divisor is @ larger than the dividend. 1: cmp \divisor, #0x10000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #4 addlo \order, \order, #4 @@ -160,6 +175,7 @@ Boston, MA 02111-1307, USA. */ @ For very big divisors, we must shift it a bit at a time, or @ we will be in danger of overflowing. 1: cmp \divisor, #0x80000000 + ittt lo cmplo \divisor, \dividend movlo \divisor, \divisor, lsl #1 addlo \order, \order, #1 @@ -173,19 +189,25 @@ Boston, MA 02111-1307, USA. */ blt 2f 1: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor cmp \dividend, \divisor, lsr #1 + it hs subhs \dividend, \dividend, \divisor, lsr #1 cmp \dividend, \divisor, lsr #2 + it hs subhs \dividend, \dividend, \divisor, lsr #2 cmp \dividend, \divisor, lsr #3 + it hs subhs \dividend, \dividend, \divisor, lsr #3 cmp \dividend, #1 mov \divisor, \divisor, lsr #4 + it ge subges \order, \order, #4 bge 1b tst \order, #3 + it ne teqne \dividend, #0 beq 5f @@ -194,12 +216,15 @@ Boston, MA 02111-1307, USA. */ blt 4f beq 3f cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 3: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor mov \divisor, \divisor, lsr #1 4: cmp \dividend, \divisor + it hs subhs \dividend, \dividend, \divisor 5: .endm @@ -209,6 +234,7 @@ ENTRY(__udivsi3) ENTRY(__aeabi_uidiv) subs r2, r1, #1 + it eq moveq pc, lr bcc Ldiv0 cmp r0, r1 @@ -221,7 +247,8 @@ ENTRY(__aeabi_uidiv) mov r0, r2 mov pc, lr -11: moveq r0, #1 +11: ite eq + moveq r0, #1 movne r0, #0 mov pc, lr @@ -237,10 +264,14 @@ ENTRY(__umodsi3) subs r2, r1, #1 @ compare divisor with 1 bcc Ldiv0 + ite ne cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 + it hi tsthi r1, r2 @ see if divisor is power of 2 + it eq andeq r0, r0, r2 + it ls movls pc, lr ARM_MOD_BODY r0, r1, r2, r3 @@ -255,10 +286,12 @@ ENTRY(__aeabi_idiv) cmp r1, #0 eor ip, r0, r1 @ save the sign of the result. beq Ldiv0 + it mi rsbmi r1, r1, #0 @ loops below use unsigned. subs r2, r1, #1 @ division by 1 or -1 ? beq 10f movs r3, r0 + it mi rsbmi r3, r0, #0 @ positive dividend value cmp r3, r1 bls 11f @@ -268,14 +301,18 @@ ENTRY(__aeabi_idiv) ARM_DIV_BODY r3, r1, r0, r2 cmp ip, #0 + it mi rsbmi r0, r0, #0 mov pc, lr 10: teq ip, r0 @ same sign ? + it mi rsbmi r0, r0, #0 mov pc, lr -11: movlo r0, #0 +11: it lo + movlo r0, #0 + itt eq moveq r0, ip, asr #31 orreq r0, r0, #1 mov pc, lr @@ -284,6 +321,7 @@ ENTRY(__aeabi_idiv) cmp ip, #0 mov r0, r3, lsr r2 + it mi rsbmi r0, r0, #0 mov pc, lr @@ -294,19 +332,25 @@ ENTRY(__modsi3) cmp r1, #0 beq Ldiv0 + it mi rsbmi r1, r1, #0 @ loops below use unsigned. movs ip, r0 @ preserve sign of dividend + it mi rsbmi r0, r0, #0 @ if negative make positive subs r2, r1, #1 @ compare divisor with 1 + ite ne cmpne r0, r1 @ compare dividend with divisor moveq r0, #0 + it hi tsthi r1, r2 @ see if divisor is power of 2 + it eq andeq r0, r0, r2 bls 10f ARM_MOD_BODY r0, r1, r2, r3 10: cmp ip, #0 + it mi rsbmi r0, r0, #0 mov pc, lr diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index f83d449141f7..57db3a265e5b 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -41,6 +41,7 @@ ENTRY(__aeabi_llsr) subs r3, r2, #32 rsb ip, r2, #32 + itett mi movmi al, al, lsr r2 movpl al, ah, lsr r3 ARM( orrmi al, al, ah, lsl ip ) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index 1da86991d700..0d1d596ad8cd 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -21,6 +21,7 @@ ENTRY(memchr) teq r3, r1 bne 1b sub r0, r0, #1 -2: movne r0, #0 +2: it ne + movne r0, #0 mov pc, lr ENDPROC(memchr) diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index a9b9e2287a09..c7a810dee294 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -29,7 +29,12 @@ .endm .macro ldr1b ptr reg cond=al abort + .ifnc \cond,al + it \cond ldr\cond\()b \reg, [\ptr], #1 + .else + ldrb \reg, [\ptr], #1 + .endif .endm .macro str1w ptr reg abort @@ -41,7 +46,12 @@ .endm .macro str1b ptr reg cond=al abort + .ifnc \cond,al + it \cond str\cond\()b \reg, [\ptr], #1 + .else + strb \reg, [\ptr], #1 + .endif .endm .macro enter reg1 reg2 diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index 5025c863713d..191a5dc41596 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -29,7 +29,9 @@ ENTRY(memmove) subs ip, r0, r1 + it hi cmphi r2, ip + it ls bls memcpy stmfd sp!, {r0, r4, lr} @@ -72,6 +74,7 @@ ENTRY(memmove) 5: ands ip, r2, #28 rsb ip, ip, #32 + it ne addne pc, pc, ip @ C is always clear here b 7f 6: nop @@ -99,19 +102,27 @@ ENTRY(memmove) 7: ldmfd sp!, {r5 - r8} 8: movs r2, r2, lsl #31 + it ne ldrneb r3, [r1, #-1]! + itt cs ldrcsb r4, [r1, #-1]! ldrcsb ip, [r1, #-1] + it ne strneb r3, [r0, #-1]! + itt cs strcsb r4, [r0, #-1]! strcsb ip, [r0, #-1] ldmfd sp!, {r0, r4, pc} 9: cmp ip, #2 + it gt ldrgtb r3, [r1, #-1]! + it ge ldrgeb r4, [r1, #-1]! ldrb lr, [r1, #-1]! + it gt strgtb r3, [r0, #-1]! + it ge strgeb r4, [r0, #-1]! subs r2, r2, ip strb lr, [r0, #-1]! diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 761eefa76243..ef022bdc0235 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -19,7 +19,9 @@ 1: subs r2, r2, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 + it lt strltb r1, [r0], #1 @ 1 + it le strleb r1, [r0], #1 @ 1 strb r1, [r0], #1 @ 1 add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3)) @@ -51,19 +53,23 @@ ENTRY(memset) mov lr, r1 2: subs r2, r2, #64 + itttt ge stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time. stmgeia r0!, {r1, r3, ip, lr} stmgeia r0!, {r1, r3, ip, lr} stmgeia r0!, {r1, r3, ip, lr} bgt 2b + it eq ldmeqfd sp!, {pc} @ Now <64 bytes to go. /* * No need to correct the count; we're only testing bits from now on */ tst r2, #32 + itt ne stmneia r0!, {r1, r3, ip, lr} stmneia r0!, {r1, r3, ip, lr} tst r2, #16 + it ne stmneia r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 @@ -111,17 +117,21 @@ ENTRY(memset) #endif 4: tst r2, #8 + it ne stmneia r0!, {r1, r3} tst r2, #4 + it ne strne r1, [r0], #4 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r2, #2 + itt ne strneb r1, [r0], #1 strneb r1, [r0], #1 tst r2, #1 + it ne strneb r1, [r0], #1 mov pc, lr ENDPROC(memset) diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 3fbdef5f802a..a0e319a4c03f 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -21,7 +21,9 @@ 1: subs r1, r1, #4 @ 1 do we have enough blt 5f @ 1 bytes to align with? cmp r3, #2 @ 1 + it lt strltb r2, [r0], #1 @ 1 + it le strleb r2, [r0], #1 @ 1 strb r2, [r0], #1 @ 1 add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3)) @@ -51,19 +53,23 @@ ENTRY(__memzero) mov lr, r2 @ 1 3: subs r1, r1, #64 @ 1 write 32 bytes out per loop + itttt ge stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 stmgeia r0!, {r2, r3, ip, lr} @ 4 bgt 3b @ 1 + it eq ldmeqfd sp!, {pc} @ 1/2 quick exit /* * No need to correct the count; we're only testing bits from now on */ tst r1, #32 @ 1 + itt ne stmneia r0!, {r2, r3, ip, lr} @ 4 stmneia r0!, {r2, r3, ip, lr} @ 4 tst r1, #16 @ 1 16 bytes or more? + it ne stmneia r0!, {r2, r3, ip, lr} @ 4 ldr lr, [sp], #4 @ 1 @@ -109,17 +115,21 @@ ENTRY(__memzero) #endif 4: tst r1, #8 @ 1 8 bytes or more? + it ne stmneia r0!, {r2, r3} @ 2 tst r1, #4 @ 1 4 bytes or more? + it ne strne r2, [r0], #4 @ 1 /* * When we get here, we've got less than 4 bytes to zero. We * may have an unaligned pointer as well. */ 5: tst r1, #2 @ 1 2 bytes or more? + itt ne strneb r2, [r0], #1 @ 1 strneb r2, [r0], #1 @ 1 tst r1, #1 @ 1 a byte left over + it ne strneb r2, [r0], #1 @ 1 mov pc, lr @ 1 ENDPROC(__memzero) diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index d8f2a1c1aea4..fd4014e54e37 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -18,9 +18,11 @@ ENTRY(strchr) and r1, r1, #0xff 1: ldrb r2, [r0], #1 teq r2, r1 + it ne teqne r2, #0 bne 1b teq r2, r1 + ite ne movne r0, #0 subeq r0, r0, #1 mov pc, lr diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index 302f20cd2423..d7a9440de6b8 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -18,6 +18,7 @@ ENTRY(strrchr) mov r3, #0 1: ldrb r2, [r0], #1 teq r2, r1 + it eq subeq r3, r0, #1 teq r2, #0 bne 1b diff --git a/arch/arm/lib/testclearbit.S b/arch/arm/lib/testclearbit.S index 543d7094d18e..df66c76e8b29 100644 --- a/arch/arm/lib/testclearbit.S +++ b/arch/arm/lib/testclearbit.S @@ -15,6 +15,6 @@ ENTRY(_test_and_clear_bit_be) eor r0, r0, #0x18 @ big endian byte ordering ENTRY(_test_and_clear_bit_le) - testop bicne, strneb + testop bicne, strneb, ne ENDPROC(_test_and_clear_bit_be) ENDPROC(_test_and_clear_bit_le) diff --git a/arch/arm/lib/testsetbit.S b/arch/arm/lib/testsetbit.S index 0b3f390401ce..3938bdf446a6 100644 --- a/arch/arm/lib/testsetbit.S +++ b/arch/arm/lib/testsetbit.S @@ -15,6 +15,6 @@ ENTRY(_test_and_set_bit_be) eor r0, r0, #0x18 @ big endian byte ordering ENTRY(_test_and_set_bit_le) - testop orreq, streqb + testop orreq, streqb, eq ENDPROC(_test_and_set_bit_be) ENDPROC(_test_and_set_bit_le) diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index f0df6a91db04..503288955242 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -27,9 +27,13 @@ ENTRY(__ucmpdi2) cmp xh, yh + it eq cmpeq xl, yl + it lo movlo r0, #0 + it eq moveq r0, #1 + it hi movhi r0, #2 mov pc, lr @@ -40,9 +44,13 @@ ENDPROC(__ucmpdi2) ENTRY(__aeabi_ulcmp) cmp xh, yh + it eq cmpeq xl, yl + it lo movlo r0, #-1 + it eq moveq r0, #0 + it hi movhi r0, #1 mov pc, lr diff --git a/arch/arm/mach-integrator/include/mach/debug-macro.S b/arch/arm/mach-integrator/include/mach/debug-macro.S index d347d659ea30..ffb7d8961f5e 100644 --- a/arch/arm/mach-integrator/include/mach/debug-macro.S +++ b/arch/arm/mach-integrator/include/mach/debug-macro.S @@ -14,6 +14,7 @@ .macro addruart,rx mrc p15, 0, \rx, c1, c0 tst \rx, #1 @ MMU enabled? + itee eq moveq \rx, #0x16000000 @ physical base address movne \rx, #0xf0000000 @ virtual base addne \rx, \rx, #0x16000000 >> 4 diff --git a/arch/arm/mach-integrator/include/mach/entry-macro.S b/arch/arm/mach-integrator/include/mach/entry-macro.S index 7649c57acb53..ce478b5bfb90 100644 --- a/arch/arm/mach-integrator/include/mach/entry-macro.S +++ b/arch/arm/mach-integrator/include/mach/entry-macro.S @@ -26,6 +26,7 @@ ldr \irqstat, [\base, #IRQ_STATUS] @ get masked status ldr \base, =IO_ADDRESS(INTEGRATOR_HDR_BASE) teq \irqstat, #0 + itt eq ldreq \irqstat, [\base, #(INTEGRATOR_HDR_IC_OFFSET+IRQ_STATUS)] moveq \irqnr, #IRQ_CIC_START diff --git a/arch/arm/mach-realview/include/mach/debug-macro.S b/arch/arm/mach-realview/include/mach/debug-macro.S index 932d8af18062..59a0396dd5f3 100644 --- a/arch/arm/mach-realview/include/mach/debug-macro.S +++ b/arch/arm/mach-realview/include/mach/debug-macro.S @@ -36,6 +36,7 @@ .macro addruart,rx mrc p15, 0, \rx, c1, c0 tst \rx, #1 @ MMU enabled? + ite eq moveq \rx, #0x10000000 movne \rx, #0xfb000000 @ virtual base orr \rx, \rx, #DEBUG_LL_UART_OFFSET diff --git a/arch/arm/mach-realview/include/mach/entry-macro.S b/arch/arm/mach-realview/include/mach/entry-macro.S index 340a5c276946..7bf198cd5b60 100644 --- a/arch/arm/mach-realview/include/mach/entry-macro.S +++ b/arch/arm/mach-realview/include/mach/entry-macro.S @@ -49,8 +49,11 @@ bic \irqnr, \irqstat, #0x1c00 cmp \irqnr, #29 + it cc cmpcc \irqnr, \irqnr + it ne cmpne \irqnr, \tmp + it cs cmpcs \irqnr, \irqnr .endm @@ -65,7 +68,9 @@ .macro test_for_ipi, irqnr, irqstat, base, tmp bic \irqnr, \irqstat, #0x1c00 cmp \irqnr, #16 + it cc strcc \irqstat, [\base, #GIC_CPU_EOI] + it cs cmpcs \irqnr, \irqnr .endm @@ -75,6 +80,7 @@ bic \irqnr, \irqstat, #0x1c00 mov \tmp, #0 cmp \irqnr, #29 + itt eq moveq \tmp, #1 streq \irqstat, [\base, #GIC_CPU_EOI] cmp \tmp, #0 diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index bda0ec31a4e2..29e69048b037 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -205,10 +205,12 @@ ENTRY(v7_dma_inv_range) sub r3, r2, #1 tst r0, r3 bic r0, r0, r3 + it ne mcrne p15, 0, r0, c7, c14, 1 @ clean & invalidate D / U line tst r1, r3 bic r1, r1, r3 + it ne mcrne p15, 0, r1, c7, c14, 1 @ clean & invalidate D / U line 1: mcr p15, 0, r0, c7, c6, 1 @ invalidate D / U line diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index ae9256a89773..41615a3c11ae 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -130,21 +130,26 @@ ENTRY(cpu_v7_set_pte_ext) orr r3, r3, #PTE_EXT_AP0 | 2 tst r1, #1 << 4 + it ne orrne r3, r3, #PTE_EXT_TEX(1) tst r1, #L_PTE_WRITE + ite ne tstne r1, #L_PTE_DIRTY orreq r3, r3, #PTE_EXT_APX tst r1, #L_PTE_USER + ittt ne orrne r3, r3, #PTE_EXT_AP1 tstne r3, #PTE_EXT_APX bicne r3, r3, #PTE_EXT_APX | PTE_EXT_AP0 tst r1, #L_PTE_EXEC + it eq orreq r3, r3, #PTE_EXT_XN tst r1, #L_PTE_YOUNG + ite ne tstne r1, #L_PTE_PRESENT moveq r3, #0 diff --git a/arch/arm/vfp/vfp.h b/arch/arm/vfp/vfp.h index c8c98dd44ad4..8a5fe9c6cccd 100644 --- a/arch/arm/vfp/vfp.h +++ b/arch/arm/vfp/vfp.h @@ -37,6 +37,7 @@ static inline u32 vfp_hi64to32jamming(u64 val) asm( "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t" + "ite cc\n\t" "movcc %0, %R1\n\t" "orrcs %0, %R1, #1" : "=r" (v) : "r" (val) : "cc"); |