Hello,

Ive been working on trying to improve the performance of directfb 1.3.0
on the arm platform. The attached patch will replace the default libc
memcpy with a faster implementation. Ive tested this patch using an
AT91RM9200, but should work on other ARM targets.

Hope this will be useful to others.

Regards,

Vince 
diff -Naur DirectFB-1.3.0-org/configure.in DirectFB-1.3.0/configure.in
--- DirectFB-1.3.0-org/configure.in	2009-03-18 09:11:21.000000000 +0000
+++ DirectFB-1.3.0/configure.in	2009-03-18 09:12:47.000000000 +0000
@@ -198,6 +198,7 @@
 
   *arm*)
     have_arm=yes
+	AC_DEFINE(ARCH_ARM,1,[Define to 1 if you are compiling for ARM.])
     ;;
 
   ppc-*-linux* | powerpc-*)
@@ -221,6 +222,7 @@
 need_libc_r=no
 need_libdl=yes
 want_ppcasm=yes
+want_armasm=yes
 
 case "$target_or_host" in
   *-linux*)
@@ -236,6 +238,7 @@
     need_libc_r=yes
     need_libdl=no
     want_ppcasm=yes
+	want_armasm=yes
     CPPFLAGS="$CPPFLAGS -I/usr/local/include"
     LDFLAGS="$LDFLAGS -L/usr/local/lib"
     ;;
@@ -244,6 +247,7 @@
     need_libc_r=yes
     need_libdl=no
     want_ppcasm=no
+	want_armasm=no
     CPPFLAGS="$CPPFLAGS -I/usr/local/include"
     LDFLAGS="$LDFLAGS -L/usr/local/lib"
     ;;
@@ -252,6 +256,7 @@
     need_libc_r=no
     need_libdl=no
     want_ppcasm=yes
+	want_armasm=yes
     CPPFLAGS="$CPPFLAGS -I/usr/pkg/include"
     LDFLAGS="$LDFLAGS -L/usr/pkg/lib"
     ;;    
@@ -260,6 +265,7 @@
     need_libc_r=no
     need_libdl=yes
     want_ppcasm=no
+	want_armasm=no
     CPPFLAGS="$CPPFLAGS -I/sw/include"
     LDFLAGS="$LDFLAGS -L/sw/lib"
     ;;
@@ -281,6 +287,13 @@
     AC_DEFINE(USE_PPCASM,1,[Define to 1 if ppc assembly is available.])
 fi
 
+
+AM_CONDITIONAL(BUILDARMASM, test "$have_arm" = "yes" && test "$want_armasm" = "yes")
+
+if test "$have_arm" = "yes" && test "$want_armasm" = "yes"; then
+    AC_DEFINE(USE_ARMASM,1,[Define to 1 if arm assembly is available.])
+fi
+
 if test "$have_kos" = "yes"; then
     AC_DEFINE(USE_KOS,1,[Define to 1 if compiling on KallistiOS.])
 fi
diff -Naur DirectFB-1.3.0-org/lib/direct/armasm_memcpy.h DirectFB-1.3.0/lib/direct/armasm_memcpy.h
--- DirectFB-1.3.0-org/lib/direct/armasm_memcpy.h	1970-01-01 01:00:00.000000000 +0100
+++ DirectFB-1.3.0/lib/direct/armasm_memcpy.h	2009-03-18 10:22:26.000000000 +0000
@@ -0,0 +1,28 @@
+/*
+ * ARM memcpy asm replacement.
+ *
+ * Copyright (C) 2009 Bluush Dev Team.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __ARMASM_MEMCPY_H__
+#define __ARMASM_MEMCPY_H__
+
+void *direct_armasm_memcpy          ( void *dest, const void *src, size_t n);
+
+#endif /* __ARMASM_MEMCPY_H__ */
+ 
diff -Naur DirectFB-1.3.0-org/lib/direct/armasm_memcpy.S DirectFB-1.3.0/lib/direct/armasm_memcpy.S
--- DirectFB-1.3.0-org/lib/direct/armasm_memcpy.S	1970-01-01 01:00:00.000000000 +0100
+++ DirectFB-1.3.0/lib/direct/armasm_memcpy.S	2009-03-18 10:22:41.000000000 +0000
@@ -0,0 +1,126 @@
+/*
+ * ARM memcpy asm replacement.
+ *
+ * Copyright (C) 2009 Bluush Dev Team.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+
+#define _LABEL(f)   f :
+
+
+.global direct_armasm_memcpy
+
+
+
+	.code 32
+
+
+/*
+ * Fast copy n bytes from source p2 to destination p1.
+ *
+ *     void *direct_armasm_memcpy(void *p1, const void *p2, int n)
+ */
+
+_LABEL(direct_armasm_memcpy)
+        teq      r2,#0       	/* is arg n == 0 ? */
+        moveq    pc,lr       	/* if n == 0, return */
+
+        stmdb    sp!,{lr}    	/* push return address */
+        mov      r12,r0       	/* copy pointer p1 */
+        cmp      r2,#0x8     	/* is string long or short? */
+        ble      ByteSerial  	/* jump if long string */
+
+        sub      r3,r0,r1    	/* compare pointers p1, p2 */
+        tst      r3,#3       	/* strings aligned same? */
+        bne      ByteSerial  	/* jump if strings not aligned */
+
+/*
+ * Both strings are similarly aligned WRT word boundaries.
+ * At least a portion of the data can be copied an entire
+ * word at a time, which is faster than copying bytes.
+ */
+_LABEL(WordSerial)
+        ands     r3,r0,#3    	/* check byte alignment */
+        beq      WordAligned 	/* jump if p1, p2 word-aligned */
+
+        rsb      r3,r3,#4    	/* m = no. of odd initial bytes */
+        sub      r2,r2,r3    	/* n = n - m */
+
+/*
+ * If the two strings do not begin on word boundaries, begin
+ * by copying the odd bytes that precede the first full word.
+ */
+_LABEL(PreLoop)
+        ldrb     lr,[r1],#1  	/* read byte from string 2 */
+        subs     r3,r3,#1    	/* --m (decrement loop count) */
+        strb     lr,[r12],#1  	/* write byte to string 1 */
+        bne      PreLoop     	/* loop if more bytes to move */
+
+_LABEL(WordAligned)
+        movs     r3,r2,asr #5 	/* any chunks of 8 words? */
+        beq      OctsDone    	/* jump if no 8-word chunks */
+
+        and      r2,r2,#0x1f 	/* subtract chunks from n */
+        stmdb    sp!,{r4-r10} 	/* save registers on stack */
+
+/*
+ * The strings are long enough that we can transfer at least
+ * some portion of the data in 8-word chunks.
+ */
+_LABEL(OctLoop)
+        ldmia    r1!,{r4-r10,lr} /* load 8 words from string 2 */
+        subs     r3,r3,#1    	/* more 8-word chunks to move? */
+        stmia    r12!,{r4-r10,lr} /* write 8 words to string 1 */
+        bne      OctLoop     	/* loop if more chunks */
+
+        ldmia    sp!,{r4-r10} 	/* restore registers from stack */
+
+_LABEL(OctsDone)
+        movs     r3,r2,asr #2 	/* any more whole words to move? */
+        beq      WordsDone   	/* jump if no more whole words */
+
+/*
+ * Copy as much of the remaining data as possible one word at
+ * a time.
+ */
+_LABEL(WordLoop2)
+        ldr      lr,[r1],#4  	/* read next word from string 2 */
+        subs     r3,r3,#1    	/* decrement word count */
+        str      lr,[r12],#4  	/* write next word to string 1 */
+        bne      WordLoop2   	/* loop while more words to move */
+
+_LABEL(WordsDone)
+        ands     r2,r2,#3    	/* any last bytes to transfer? */
+        ldmeqia  sp!,{pc}    	/* return if already done */
+
+/*
+ * The two strings do not end on word boundaries.
+ * Copy the remaining data one byte at a time.
+ */
+_LABEL(ByteSerial)
+        ldrb     lr,[r1],#1  	/* read byte from string 2 */
+        subs     r2,r2,#1    	/* --n (decrement loop count) */
+        strb     lr,[r12],#1  	/* write byte to string 1 */
+        bne      ByteSerial  	/* loop if more bytes to move */
+
+        ldmia    sp!,{pc}    	/* return to caller */
+
+		.ltorg
+		
+
+ 
diff -Naur DirectFB-1.3.0-org/lib/direct/Makefile.am DirectFB-1.3.0/lib/direct/Makefile.am
--- DirectFB-1.3.0-org/lib/direct/Makefile.am	2009-03-18 09:11:21.000000000 +0000
+++ DirectFB-1.3.0/lib/direct/Makefile.am	2009-03-18 09:15:21.000000000 +0000
@@ -29,6 +29,10 @@
 ppcasm_headers = ppcasm_memcpy.h ppc_asm.h
 endif
 
+if BUILDARMASM
+armasm_sources = armasm_memcpy.S
+armasm_header = armasm_memcpy.h
+endif
 
 # If the old location isn't cleared, builds of external modules fail
 install-exec-local:
@@ -39,6 +43,7 @@
 
 include_HEADERS = \
 	$(ppcasm_headers)		\
+	$(armasm_headers)		\
 	build.h				\
 	clock.h				\
 	conf.h				\
@@ -69,6 +74,7 @@
 
 libdirect_la_SOURCES = \
 	$(ppcasm_sources)	\
+	$(armasm_sources)	\
 	clock.c			\
 	conf.c			\
 	debug.c			\
diff -Naur DirectFB-1.3.0-org/lib/direct/memcpy.c DirectFB-1.3.0/lib/direct/memcpy.c
--- DirectFB-1.3.0-org/lib/direct/memcpy.c	2009-03-18 09:11:21.000000000 +0000
+++ DirectFB-1.3.0/lib/direct/memcpy.c	2009-03-18 09:14:45.000000000 +0000
@@ -44,7 +44,7 @@
 #include <direct/memcpy.h>
 #include <direct/messages.h>
 
-#if defined (ARCH_PPC) || (SIZEOF_LONG == 8)
+#if defined (ARCH_PPC) || defined (ARCH_ARM) || (SIZEOF_LONG == 8)
 # define RUN_BENCHMARK  1
 #else
 # define RUN_BENCHMARK  0
@@ -58,6 +58,10 @@
 #include "ppcasm_memcpy.h"
 #endif
 
+#ifdef USE_ARMASM
+#include "armasm_memcpy.h"
+#endif
+
 
 #if SIZEOF_LONG == 8
 
@@ -152,6 +156,9 @@
      { "ppccache", "ppcasm_cacheable_memcpy()",  direct_ppcasm_cacheable_memcpy, 0, 0},
 #endif /* __LINUX__ */
 #endif /* USE_PPCASM */
+#ifdef USE_ARMASM
+	 { "arm",      "armasm_memcpy()",            direct_armasm_memcpy, 0, 0},
+#endif
      { NULL, NULL, NULL, 0, 0}
 };
 
_______________________________________________
directfb-dev mailing list
directfb-dev@directfb.org
http://mail.directfb.org/cgi-bin/mailman/listinfo/directfb-dev

Reply via email to