This commit pulls over the memset() MIPS routine from Linux 2.6.26, which provides a 10x to 20x speedup over the generic byte-at-a-time routine. This is especially useful on platforms with manual ECC scrubbing, that require all of memory to be written at least once after a power cycle. --- include/asm-mips/string.h | 2 +- lib_mips/Makefile | 2 +- lib_mips/memset.S | 174 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 lib_mips/memset.S
diff --git a/include/asm-mips/string.h b/include/asm-mips/string.h index 579a591..0df1463 100644 --- a/include/asm-mips/string.h +++ b/include/asm-mips/string.h @@ -27,7 +27,7 @@ extern int strcmp(__const__ char *__cs, __const__ char *__ct); #undef __HAVE_ARCH_STRNCMP extern int strncmp(__const__ char *__cs, __const__ char *__ct, __kernel_size_t __count); -#undef __HAVE_ARCH_MEMSET +#define __HAVE_ARCH_MEMSET extern void *memset(void *__s, int __c, __kernel_size_t __count); #undef __HAVE_ARCH_MEMCPY diff --git a/lib_mips/Makefile b/lib_mips/Makefile index 8176437..9149039 100644 --- a/lib_mips/Makefile +++ b/lib_mips/Makefile @@ -25,7 +25,7 @@ include $(TOPDIR)/config.mk LIB = $(obj)lib$(ARCH).a -SOBJS-y += +SOBJS-y += memset.o COBJS-y += board.o COBJS-y += bootm.o diff --git a/lib_mips/memset.S b/lib_mips/memset.S new file mode 100644 index 0000000..f1c07d7 --- /dev/null +++ b/lib_mips/memset.S @@ -0,0 +1,174 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998, 1999, 2000 by Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + * Copyright (C) 2007 Maciej W. Rozycki + */ +#include <asm/asm.h> +//#include <asm/asm-offsets.h> +#include <asm/regdef.h> + +#if LONGSIZE == 4 +#define LONG_S_L swl +#define LONG_S_R swr +#else +#define LONG_S_L sdl +#define LONG_S_R sdr +#endif + +#define EX(insn,reg,addr,handler) \ +9: insn reg, addr; \ + .section __ex_table,"a"; \ + PTR 9b, handler; \ + .previous + + .macro f_fill64 dst, offset, val, fixup + EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) +#if LONGSIZE == 4 + EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) +#endif + .endm + +/* + * memset(void *s, int c, size_t n) + * + * a0: start of area to clear + * a1: char to fill with + * a2: size of area to clear + */ + .set noreorder + .align 5 +LEAF(memset) + beqz a1, 1f + move v0, a0 /* result */ + + andi a1, 0xff /* spread fillword */ + LONG_SLL t1, a1, 8 + or a1, t1 + LONG_SLL t1, a1, 16 +#if LONGSIZE == 8 + or a1, t1 + LONG_SLL t1, a1, 32 +#endif + or a1, t1 +1: + +FEXPORT(__bzero) + sltiu t0, a2, LONGSIZE /* very small region? */ + bnez t0, .Lsmall_memset + andi t0, a0, LONGMASK /* aligned? */ + +#ifndef CONFIG_CPU_DADDI_WORKAROUNDS + beqz t0, 1f + PTR_SUBU t0, LONGSIZE /* alignment in bytes */ +#else + .set noat + li AT, LONGSIZE + beqz t0, 1f + PTR_SUBU t0, AT /* alignment in bytes */ + .set at +#endif + + R10KCBARRIER(0(ra)) +#ifdef __MIPSEB__ + EX(LONG_S_L, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ +#endif +#ifdef __MIPSEL__ + EX(LONG_S_R, a1, (a0), .Lfirst_fixup) /* make word/dword aligned */ +#endif + PTR_SUBU a0, t0 /* long align ptr */ + PTR_ADDU a2, t0 /* correct size */ + +1: ori t1, a2, 0x3f /* # of full blocks */ + xori t1, 0x3f + beqz t1, .Lmemset_partial /* no block to fill */ + andi t0, a2, 0x40-LONGSIZE + + PTR_ADDU t1, a0 /* end address */ + .set reorder +1: PTR_ADDIU a0, 64 + R10KCBARRIER(0(ra)) + f_fill64 a0, -64, a1, .Lfwd_fixup + bne t1, a0, 1b + .set noreorder + +.Lmemset_partial: + R10KCBARRIER(0(ra)) + PTR_LA t1, 2f /* where to start */ +#if LONGSIZE == 4 + PTR_SUBU t1, t0 +#else + .set noat + LONG_SRL AT, t0, 1 + PTR_SUBU t1, AT + .set at +#endif + jr t1 + PTR_ADDU a0, t0 /* dest ptr */ + + .set push + .set noreorder + .set nomacro + f_fill64 a0, -64, a1, .Lpartial_fixup /* ... but first do longs ... */ +2: .set pop + andi a2, LONGMASK /* At most one long to go */ + + beqz a2, 1f + PTR_ADDU a0, a2 /* What's left */ + R10KCBARRIER(0(ra)) +#ifdef __MIPSEB__ + EX(LONG_S_R, a1, -1(a0), .Llast_fixup) +#endif +#ifdef __MIPSEL__ + EX(LONG_S_L, a1, -1(a0), .Llast_fixup) +#endif +1: jr ra + move a2, zero + +.Lsmall_memset: + beqz a2, 2f + PTR_ADDU t1, a0, a2 + +1: PTR_ADDIU a0, 1 /* fill bytewise */ + R10KCBARRIER(0(ra)) + bne t1, a0, 1b + sb a1, -1(a0) + +2: jr ra /* done */ + move a2, zero + END(memset) + +.Lfirst_fixup: + jr ra + nop + +.Lfwd_fixup: + andi a2, 0x3f + jr ra + LONG_ADDU a2, t1 + +.Lpartial_fixup: + andi a2, LONGMASK + jr ra + LONG_ADDU a2, t1 + +.Llast_fixup: + jr ra + andi v1, a2, LONGMASK -- 1.5.4.3 ------------------------------------------------------------------------- Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://sourceforge.net/services/buy/index.php _______________________________________________ U-Boot-Users mailing list U-Boot-Users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/u-boot-users