diff options
author | Heinrich Schuchardt <xypron.glpk@gmx.de> | 2021-03-27 12:37:04 +0100 |
---|---|---|
committer | Leo Yu-Chi Liang <ycliang@andestech.com> | 2021-04-08 15:37:29 +0800 |
commit | 8f0dc4cfd106edbb8f8efb4583b33ecd52610e6c (patch) | |
tree | 864f15409441a9f99d152e2d64b72e0f6487c52e /arch/riscv/lib/memcpy.S | |
parent | f709a0b6f9c986e2a921435d095fc498949b53fa (diff) |
riscv: assembler versions of memcpy, memmove, memset
Provide optimized versions of memcpy(), memmove(), memset() copied from
the Linux kernel.
Signed-off-by: Heinrich Schuchardt <xypron.glpk@gmx.de>
Reviewed-by: Leo Yu-Chi Liang <ycliang@andestech.com>
Diffstat (limited to 'arch/riscv/lib/memcpy.S')
-rw-r--r-- | arch/riscv/lib/memcpy.S | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S new file mode 100644 index 00000000000..51ab716253f --- /dev/null +++ b/arch/riscv/lib/memcpy.S @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 Regents of the University of California + */ + +#include <linux/linkage.h> +#include <asm/asm.h> + +/* void *memcpy(void *, const void *, size_t) */ +ENTRY(__memcpy) +WEAK(memcpy) + move t6, a0 /* Preserve return value */ + + /* Defer to byte-oriented copy for small sizes */ + sltiu a3, a2, 128 + bnez a3, 4f + /* Use word-oriented copy only if low-order bits match */ + andi a3, t6, SZREG-1 + andi a4, a1, SZREG-1 + bne a3, a4, 4f + + beqz a3, 2f /* Skip if already aligned */ + /* + * Round to nearest double word-aligned address + * greater than or equal to start address + */ + andi a3, a1, ~(SZREG-1) + addi a3, a3, SZREG + /* Handle initial misalignment */ + sub a4, a3, a1 +1: + lb a5, 0(a1) + addi a1, a1, 1 + sb a5, 0(t6) + addi t6, t6, 1 + bltu a1, a3, 1b + sub a2, a2, a4 /* Update count */ + +2: + andi a4, a2, ~((16*SZREG)-1) + beqz a4, 4f + add a3, a1, a4 +3: + REG_L a4, 0(a1) + REG_L a5, SZREG(a1) + REG_L a6, 2*SZREG(a1) + REG_L a7, 3*SZREG(a1) + REG_L t0, 4*SZREG(a1) + REG_L t1, 5*SZREG(a1) + REG_L t2, 6*SZREG(a1) + REG_L t3, 7*SZREG(a1) + REG_L t4, 8*SZREG(a1) + REG_L t5, 9*SZREG(a1) + REG_S a4, 0(t6) + REG_S a5, SZREG(t6) + REG_S a6, 2*SZREG(t6) + REG_S a7, 3*SZREG(t6) + REG_S t0, 4*SZREG(t6) + REG_S t1, 5*SZREG(t6) + REG_S t2, 6*SZREG(t6) + REG_S t3, 7*SZREG(t6) + REG_S t4, 8*SZREG(t6) + REG_S t5, 9*SZREG(t6) + REG_L a4, 10*SZREG(a1) + REG_L a5, 11*SZREG(a1) + REG_L a6, 12*SZREG(a1) + REG_L a7, 13*SZREG(a1) + REG_L t0, 14*SZREG(a1) + REG_L t1, 15*SZREG(a1) + addi a1, a1, 16*SZREG + REG_S a4, 10*SZREG(t6) + REG_S a5, 11*SZREG(t6) + REG_S a6, 12*SZREG(t6) + REG_S a7, 13*SZREG(t6) + REG_S t0, 14*SZREG(t6) + REG_S t1, 15*SZREG(t6) + addi t6, t6, 16*SZREG + bltu a1, a3, 3b + andi a2, a2, (16*SZREG)-1 /* Update count */ + +4: + /* Handle trailing misalignment */ + beqz a2, 6f + add a3, a1, a2 + + /* Use word-oriented copy if co-aligned to word boundary */ + or a5, a1, t6 + or a5, a5, a3 + andi a5, a5, 3 + bnez a5, 5f +7: + lw a4, 0(a1) + addi a1, a1, 4 + sw a4, 0(t6) + addi t6, t6, 4 + bltu a1, a3, 7b + + ret + +5: + lb a4, 0(a1) + addi a1, a1, 1 + sb a4, 0(t6) + addi t6, t6, 1 + bltu a1, a3, 5b +6: + ret +END(__memcpy) |