This commit pulls over the memset() MIPS routine from Linux 2.6.26,
which provides a 10x to 20x speedup over the generic byte-at-a-time
routine. This is especially useful on platforms with manual ECC
scrubbing, that require all of memory to be written at least once
after a power cycle.
---
 include/asm-mips/string.h |    2 +-
 lib_mips/Makefile         |    2 +-
 lib_mips/memset.S         |  174 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+), 2 deletions(-)
 create mode 100644 lib_mips/memset.S

diff --git a/include/asm-mips/string.h b/include/asm-mips/string.h
index 579a591..0df1463 100644
--- a/include/asm-mips/string.h
+++ b/include/asm-mips/string.h
@@ -27,7 +27,7 @@ extern int strcmp(__const__ char *__cs, __const__ char *__ct);
 #undef __HAVE_ARCH_STRNCMP
 extern int strncmp(__const__ char *__cs, __const__ char *__ct, __kernel_size_t 
__count);
 
-#undef __HAVE_ARCH_MEMSET
+#define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, __kernel_size_t __count);
 
 #undef __HAVE_ARCH_MEMCPY
diff --git a/lib_mips/Makefile b/lib_mips/Makefile
index 8176437..9149039 100644
--- a/lib_mips/Makefile
+++ b/lib_mips/Makefile
@@ -25,7 +25,7 @@ include $(TOPDIR)/config.mk
 
 LIB    = $(obj)lib$(ARCH).a
 
-SOBJS-y        +=
+SOBJS-y        += memset.o
 
 COBJS-y        += board.o
 COBJS-y        += bootm.o
diff --git a/lib_mips/memset.S b/lib_mips/memset.S
new file mode 100644
index 0000000..f1c07d7
--- /dev/null
+++ b/lib_mips/memset.S
@@ -0,0 +1,174 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
+#include <asm/asm.h>
+//#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+
+#if LONGSIZE == 4
+#define LONG_S_L swl
+#define LONG_S_R swr
+#else
+#define LONG_S_L sdl
+#define LONG_S_R sdr
+#endif
+
+#define EX(insn,reg,addr,handler)                      \
+9:     insn    reg, addr;                              \
+       .section __ex_table,"a";                        \
+       PTR     9b, handler;                            \
+       .previous
+
+       .macro  f_fill64 dst, offset, val, fixup
+       EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
+#if LONGSIZE == 4
+       EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
+       EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
+#endif
+       .endm
+
+/*
+ * memset(void *s, int c, size_t n)
+ *
+ * a0: start of area to clear
+ * a1: char to fill with
+ * a2: size of area to clear
+ */
+       .set    noreorder
+       .align  5
+LEAF(memset)
+       beqz            a1, 1f
+        move           v0, a0                  /* result */
+
+       andi            a1, 0xff                /* spread fillword */
+       LONG_SLL                t1, a1, 8
+       or              a1, t1
+       LONG_SLL                t1, a1, 16
+#if LONGSIZE == 8
+       or              a1, t1
+       LONG_SLL                t1, a1, 32
+#endif
+       or              a1, t1
+1:
+
+FEXPORT(__bzero)
+       sltiu           t0, a2, LONGSIZE        /* very small region? */
+       bnez            t0, .Lsmall_memset
+        andi           t0, a0, LONGMASK        /* aligned? */
+
+#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
+       beqz            t0, 1f
+        PTR_SUBU       t0, LONGSIZE            /* alignment in bytes */
+#else
+       .set            noat
+       li              AT, LONGSIZE
+       beqz            t0, 1f
+        PTR_SUBU       t0, AT                  /* alignment in bytes */
+       .set            at
+#endif
+
+       R10KCBARRIER(0(ra))
+#ifdef __MIPSEB__
+       EX(LONG_S_L, a1, (a0), .Lfirst_fixup)   /* make word/dword aligned */
+#endif
+#ifdef __MIPSEL__
+       EX(LONG_S_R, a1, (a0), .Lfirst_fixup)   /* make word/dword aligned */
+#endif
+       PTR_SUBU        a0, t0                  /* long align ptr */
+       PTR_ADDU        a2, t0                  /* correct size */
+
+1:     ori             t1, a2, 0x3f            /* # of full blocks */
+       xori            t1, 0x3f
+       beqz            t1, .Lmemset_partial    /* no block to fill */
+        andi           t0, a2, 0x40-LONGSIZE
+
+       PTR_ADDU        t1, a0                  /* end address */
+       .set            reorder
+1:     PTR_ADDIU       a0, 64
+       R10KCBARRIER(0(ra))
+       f_fill64 a0, -64, a1, .Lfwd_fixup
+       bne             t1, a0, 1b
+       .set            noreorder
+
+.Lmemset_partial:
+       R10KCBARRIER(0(ra))
+       PTR_LA          t1, 2f                  /* where to start */
+#if LONGSIZE == 4
+       PTR_SUBU        t1, t0
+#else
+       .set            noat
+       LONG_SRL                AT, t0, 1
+       PTR_SUBU        t1, AT
+       .set            at
+#endif
+       jr              t1
+        PTR_ADDU       a0, t0                  /* dest ptr */
+
+       .set            push
+       .set            noreorder
+       .set            nomacro
+       f_fill64 a0, -64, a1, .Lpartial_fixup   /* ... but first do longs ... */
+2:     .set            pop
+       andi            a2, LONGMASK            /* At most one long to go */
+
+       beqz            a2, 1f
+        PTR_ADDU       a0, a2                  /* What's left */
+       R10KCBARRIER(0(ra))
+#ifdef __MIPSEB__
+       EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
+#endif
+#ifdef __MIPSEL__
+       EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
+#endif
+1:     jr              ra
+        move           a2, zero
+
+.Lsmall_memset:
+       beqz            a2, 2f
+        PTR_ADDU       t1, a0, a2
+
+1:     PTR_ADDIU       a0, 1                   /* fill bytewise */
+       R10KCBARRIER(0(ra))
+       bne             t1, a0, 1b
+        sb             a1, -1(a0)
+
+2:     jr              ra                      /* done */
+        move           a2, zero
+       END(memset)
+
+.Lfirst_fixup:
+       jr      ra
+        nop
+
+.Lfwd_fixup:
+       andi            a2, 0x3f
+       jr              ra
+       LONG_ADDU       a2, t1
+
+.Lpartial_fixup:
+       andi            a2, LONGMASK
+       jr              ra
+       LONG_ADDU       a2, t1
+
+.Llast_fixup:
+       jr              ra
+        andi           v1, a2, LONGMASK
-- 
1.5.4.3


-------------------------------------------------------------------------
Check out the new SourceForge.net Marketplace.
It's the best place to buy or sell services for
just about anything Open Source.
http://sourceforge.net/services/buy/index.php
_______________________________________________
U-Boot-Users mailing list
U-Boot-Users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/u-boot-users

Reply via email to