From 26c5e07d1478021914801c8c7dd77c9268940e4f Mon Sep 17 00:00:00 2001 From: "Steven J. Hill" Date: Mon, 25 Mar 2013 13:40:49 -0500 Subject: MIPS: microMIPS: Optimise 'memset' core library function. Optimise 'memset' to use microMIPS instructions and/or optimisations for binary size reduction. When the microMIPS ISA is not being used, the library function compiles to the original binary code. Signed-off-by: Steven J. Hill --- arch/mips/include/asm/asm.h | 2 ++ arch/mips/lib/memset.S | 84 +++++++++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 30 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 164a21e65b42..879691d194af 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -296,6 +296,7 @@ symbol = value #define LONG_SUBU subu #define LONG_L lw #define LONG_S sw +#define LONG_SP swp #define LONG_SLL sll #define LONG_SLLV sllv #define LONG_SRL srl @@ -318,6 +319,7 @@ symbol = value #define LONG_SUBU dsubu #define LONG_L ld #define LONG_S sd +#define LONG_SP sdp #define LONG_SLL dsll #define LONG_SLLV dsllv #define LONG_SRL dsrl diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index 053d3b0b0317..0580194e7402 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -5,7 +5,8 @@ * * Copyright (C) 1998, 1999, 2000 by Ralf Baechle * Copyright (C) 1999, 2000 Silicon Graphics, Inc. - * Copyright (C) 2007 Maciej W. Rozycki + * Copyright (C) 2007 by Maciej W. Rozycki + * Copyright (C) 2011, 2012 MIPS Technologies, Inc. */ #include #include @@ -19,6 +20,20 @@ #define LONG_S_R sdr #endif +#ifdef CONFIG_CPU_MICROMIPS +#define STORSIZE (LONGSIZE * 2) +#define STORMASK (STORSIZE - 1) +#define FILL64RG t8 +#define FILLPTRG t7 +#undef LONG_S +#define LONG_S LONG_SP +#else +#define STORSIZE LONGSIZE +#define STORMASK LONGMASK +#define FILL64RG a1 +#define FILLPTRG t0 +#endif + #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ @@ -26,23 +41,25 @@ .previous .macro f_fill64 dst, offset, val, fixup - EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) -#if LONGSIZE == 4 - EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 0 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 1 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 2 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 3 * STORSIZE)(\dst), \fixup) +#if ((defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) || !defined(CONFIG_CPU_MICROMIPS)) + EX(LONG_S, \val, (\offset + 4 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 5 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 6 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 7 * STORSIZE)(\dst), \fixup) +#endif +#if (!defined(CONFIG_CPU_MICROMIPS) && (LONGSIZE == 4)) + EX(LONG_S, \val, (\offset + 8 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 9 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 10 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 11 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 12 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 13 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 14 * STORSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 15 * STORSIZE)(\dst), \fixup) #endif .endm @@ -71,16 +88,20 @@ LEAF(memset) 1: FEXPORT(__bzero) - sltiu t0, a2, LONGSIZE /* very small region? */ + sltiu t0, a2, STORSIZE /* very small region? */ bnez t0, .Lsmall_memset - andi t0, a0, LONGMASK /* aligned? */ + andi t0, a0, STORMASK /* aligned? */ +#ifdef CONFIG_CPU_MICROMIPS + move t8, a1 /* used by 'swp' instruction */ + move t9, a1 +#endif #ifndef CONFIG_CPU_DADDI_WORKAROUNDS beqz t0, 1f - PTR_SUBU t0, LONGSIZE /* alignment in bytes */ + PTR_SUBU t0, STORSIZE /* alignment in bytes */ #else .set noat - li AT, LONGSIZE + li AT, STORSIZE beqz t0, 1f PTR_SUBU t0, AT /* alignment in bytes */ .set at @@ -99,24 +120,27 @@ FEXPORT(__bzero) 1: ori t1, a2, 0x3f /* # of full blocks */ xori t1, 0x3f beqz t1, .Lmemset_partial /* no block to fill */ - andi t0, a2, 0x40-LONGSIZE + andi t0, a2, 0x40-STORSIZE PTR_ADDU t1, a0 /* end address */ .set reorder 1: PTR_ADDIU a0, 64 R10KCBARRIER(0(ra)) - f_fill64 a0, -64, a1, .Lfwd_fixup + f_fill64 a0, -64, FILL64RG, .Lfwd_fixup bne t1, a0, 1b .set noreorder .Lmemset_partial: R10KCBARRIER(0(ra)) PTR_LA t1, 2f /* where to start */ +#ifdef CONFIG_CPU_MICROMIPS + LONG_SRL t7, t0, 1 +#endif #if LONGSIZE == 4 - PTR_SUBU t1, t0 + PTR_SUBU t1, FILLPTRG #else .set noat - LONG_SRL AT, t0, 1 + LONG_SRL AT, FILLPTRG, 1 PTR_SUBU t1, AT .set at #endif @@ -126,9 +150,9 @@ FEXPORT(__bzero) .set push .set noreorder .set nomacro - f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ + f_fill64 a0, -64, FILL64RG, .Lpartial_fixup /* ... but first do longs ... */ 2: .set pop - andi a2, LONGMASK /* At most one long to go */ + andi a2, STORMASK /* At most one long to go */ beqz a2, 1f PTR_ADDU a0, a2 /* What's left */ @@ -169,7 +193,7 @@ FEXPORT(__bzero) .Lpartial_fixup: PTR_L t0, TI_TASK($28) - andi a2, LONGMASK + andi a2, STORMASK LONG_L t0, THREAD_BUADDR(t0) LONG_ADDU a2, t1 jr ra @@ -177,4 +201,4 @@ FEXPORT(__bzero) .Llast_fixup: jr ra - andi v1, a2, LONGMASK + andi v1, a2, STORMASK -- cgit v1.2.3