Niels,

Here is a new version of the patch with the second version of memcpy and
a conditional to remove big-endian. 

Let me know if you have any trouble with it.

Regards,

Vince


On Tue, 2009-03-24 at 16:36 +0100, Niels Roest wrote:
> Hi John,
> thanks for the comments,
> just want to mention 1 or 2 things too.
> 
> The testing routines do have a single cold, unmeasured, run first to 
> rule out previous cache state influence.
> 
> The test itself is in fact really simple - a continuous copy of a large 
> region. So no repeats. This does focus on the use case that is most 
> obvious for DirectFB, namely copying chunks and lines of graphics 
> between surfaces, which will normally lead to cache misses anyway. I am 
> most concerned about alignment, since this is really unpredictable.
> 
> I am not sure if we will benefit much from shuffling the code or using 
> different memory regions; you have to remember that the testing routines 
> produce a single score only, so these will need to be fine tuned a lot, 
> and we may even need to revert to multiple memcpy routines which are 
> optimised for multiple use cases. This might be an interesting approach, 
> it is one I will follow if performance measurements show that we can 
> expect a proper benefit from this - forgetting that DirectFB is mainly 
> about hardware acceleration anyway.
> 
> For me I am very happy with the changes that Vince made, thanks Vince, 
> and if I have a BE/LE lock, I will include the patch.
> 
> Greets
> Niels
> 
> John Williams wrote:
> > Hi Vince,
> >
> >
> > On Wed, Mar 25, 2009 at 12:57 AM, vince <vi...@bluush.com> wrote:
> >
> >   
> >> Ive change my benchmark to invalidate the cache before every test. My
> >> result are the same. Attached is my test program.
> >>     
> >
> > No worries - just wanted to make sure we weren't missing the obvious!
> >
> > Might also be worth shuffling the sequencing of the tests (armasm,
> > armasm2, libc), see if that has any impact.  I'm not intimate with ARM
> > cache details, but with a write-back cache you could be stalling on
> > cacheline evictions later in the test.
> >
> > Another safety would be to perform the tests in different memory
> > regions, with a complete cache flush and invalidate between each run.
> >
> > Not saying there's anything wrong with your code, just know its easy
> > to get false results from simple benchmark code. Memory tests are
> > another one where the obvious approach is often wrong.
> >
> > Cheers,
> >
> > John
> > _______________________________________________
> > directfb-dev mailing list
> > directfb-dev@directfb.org
> > http://mail.directfb.org/cgi-bin/mailman/listinfo/directfb-dev
> >
> >   
> 
> 
diff -Naur DirectFB-1.3.0-org/configure.in DirectFB-1.3.0/configure.in
--- DirectFB-1.3.0-org/configure.in	2009-03-25 07:50:21.000000000 +0000
+++ DirectFB-1.3.0/configure.in	2009-03-25 07:51:31.000000000 +0000
@@ -198,6 +198,7 @@
 
   *arm*)
     have_arm=yes
+	AC_DEFINE(ARCH_ARM,1,[Define to 1 if you are compiling for ARM.])
     ;;
 
   ppc-*-linux* | powerpc-*)
@@ -221,6 +222,7 @@
 need_libc_r=no
 need_libdl=yes
 want_ppcasm=yes
+want_armasm=yes
 
 case "$target_or_host" in
   *-linux*)
@@ -236,6 +238,7 @@
     need_libc_r=yes
     need_libdl=no
     want_ppcasm=yes
+	want_armasm=yes
     CPPFLAGS="$CPPFLAGS -I/usr/local/include"
     LDFLAGS="$LDFLAGS -L/usr/local/lib"
     ;;
@@ -244,6 +247,7 @@
     need_libc_r=yes
     need_libdl=no
     want_ppcasm=no
+	want_armasm=no
     CPPFLAGS="$CPPFLAGS -I/usr/local/include"
     LDFLAGS="$LDFLAGS -L/usr/local/lib"
     ;;
@@ -252,6 +256,7 @@
     need_libc_r=no
     need_libdl=no
     want_ppcasm=yes
+	want_armasm=yes
     CPPFLAGS="$CPPFLAGS -I/usr/pkg/include"
     LDFLAGS="$LDFLAGS -L/usr/pkg/lib"
     ;;    
@@ -260,6 +265,7 @@
     need_libc_r=no
     need_libdl=yes
     want_ppcasm=no
+	want_armasm=no
     CPPFLAGS="$CPPFLAGS -I/sw/include"
     LDFLAGS="$LDFLAGS -L/sw/lib"
     ;;
@@ -281,6 +287,13 @@
     AC_DEFINE(USE_PPCASM,1,[Define to 1 if ppc assembly is available.])
 fi
 
+
+AM_CONDITIONAL(BUILDARMASM, test "$have_arm" = "yes" && test "$want_armasm" = "yes")
+
+if test "$have_arm" = "yes" && test "$want_armasm" = "yes"; then
+    AC_DEFINE(USE_ARMASM,1,[Define to 1 if arm assembly is available.])
+fi
+
 if test "$have_kos" = "yes"; then
     AC_DEFINE(USE_KOS,1,[Define to 1 if compiling on KallistiOS.])
 fi
diff -Naur DirectFB-1.3.0-org/lib/direct/armasm_memcpy.h DirectFB-1.3.0/lib/direct/armasm_memcpy.h
--- DirectFB-1.3.0-org/lib/direct/armasm_memcpy.h	1970-01-01 01:00:00.000000000 +0100
+++ DirectFB-1.3.0/lib/direct/armasm_memcpy.h	2009-03-25 07:52:52.000000000 +0000
@@ -0,0 +1,32 @@
+/*
+ * ARM memcpy asm replacement.
+ *
+ * Copyright (C) 2009 Bluush Dev Team.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __ARMASM_MEMCPY_H__
+#define __ARMASM_MEMCPY_H__
+
+#if USE_ARMASM && !WORDS_BIGENDIAN
+
+void *direct_armasm_memcpy          ( void *dest, const void *src, size_t n);
+
+#endif /* USE_ARMASM && !WORDS_BIGENDIAN */
+
+#endif /* __ARMASM_MEMCPY_H__ */
+ 
diff -Naur DirectFB-1.3.0-org/lib/direct/armasm_memcpy.S DirectFB-1.3.0/lib/direct/armasm_memcpy.S
--- DirectFB-1.3.0-org/lib/direct/armasm_memcpy.S	1970-01-01 01:00:00.000000000 +0100
+++ DirectFB-1.3.0/lib/direct/armasm_memcpy.S	2009-03-25 07:52:52.000000000 +0000
@@ -0,0 +1,421 @@
+/*
+ * ARM memcpy asm replacement.
+ *
+ * Copyright (C) 2009 Bluush Dev Team.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#include <config.h>
+
+#if USE_ARMASM && !WORDS_BIGENDIAN
+
+#define _LABEL(f)   f :
+
+.global direct_armasm_memcpy
+
+	.code 32
+
+_LABEL(direct_armasm_memcpy)	
+	cmp r1, r0
+	bcc Lmemcpy_backwards
+
+	moveq r0, #0
+	moveq pc, lr
+	
+	stmdb sp!, {r0, lr}
+	subs r2, r2, #4
+	blt Lmemcpy_fl4
+	ands r12, r0, #3
+	bne Lmemcpy_fdestul
+	ands r12, r1, #3
+	bne Lmemcpy_fsrcul
+
+_LABEL(Lmemcpy_ft8)
+	subs r2, r2, #8
+	blt Lmemcpy_fl12
+	subs r2, r2, #0x14        
+	blt Lmemcpy_fl32
+	stmdb sp!, {r4}
+
+_LABEL(Lmemcpy_floop32)
+	ldmia r1!, {r3, r4, r12, lr}
+	stmia r0!, {r3, r4, r12, lr}
+	ldmia r1!, {r3, r4, r12, lr}
+	stmia r0!, {r3, r4, r12, lr}
+	subs r2, r2, #0x20        
+	bge Lmemcpy_floop32
+
+	cmn r2, #0x10
+	ldmgeia r1!, {r3, r4, r12, lr}
+	stmgeia r0!, {r3, r4, r12, lr}
+	subge r2, r2, #0x10        
+	ldmia sp!, {r4}
+
+_LABEL(Lmemcpy_fl32)
+	adds r2, r2, #0x14        
+
+
+_LABEL(Lmemcpy_floop12)
+	ldmgeia r1!, {r3, r12, lr}
+	stmgeia r0!, {r3, r12, lr}
+	subges r2, r2, #0x0c        
+	bge Lmemcpy_floop12
+
+_LABEL(Lmemcpy_fl12)
+	adds r2, r2, #8
+	blt Lmemcpy_fl4
+
+	subs r2, r2, #4
+	ldrlt r3, [r1], #4
+	strlt r3, [r0], #4
+	ldmgeia r1!, {r3, r12}
+	stmgeia r0!, {r3, r12}
+	subge r2, r2, #4
+
+_LABEL(Lmemcpy_fl4)
+	adds r2, r2, #4
+	ldmeqia sp!, {r0, pc}
+	
+	cmp r2, #2
+	ldrb r3, [r1], #1
+	strb r3, [r0], #1
+	ldrgeb r3, [r1], #1
+	strgeb r3, [r0], #1
+	ldrgtb r3, [r1], #1
+	strgtb r3, [r0], #1
+	ldmia sp!, {r0, pc}
+
+
+_LABEL(Lmemcpy_fdestul)
+	rsb r12, r12, #4
+	cmp r12, #2
+	
+	ldrb r3, [r1], #1
+	strb r3, [r0], #1
+	ldrgeb r3, [r1], #1
+	strgeb r3, [r0], #1
+	ldrgtb r3, [r1], #1
+	strgtb r3, [r0], #1
+	subs r2, r2, r12
+	blt Lmemcpy_fl4
+	
+	ands r12, r1, #3
+	beq Lmemcpy_ft8
+
+
+_LABEL(Lmemcpy_fsrcul)
+	bic r1, r1, #3
+	ldr lr, [r1], #4
+	cmp r12, #2
+	bgt Lmemcpy_fsrcul3
+	beq Lmemcpy_fsrcul2
+	cmp r2, #0x0c            
+	blt Lmemcpy_fsrcul1loop4
+	sub r2, r2, #0x0c        
+	stmdb sp!, {r4, r5}
+
+_LABEL(Lmemcpy_fsrcul1loop16)
+	mov r3, lr, lsr #8
+	ldmia r1!, {r4, r5, r12, lr}
+	orr r3, r3, r4, lsl #24
+	mov r4, r4, lsr #8
+	orr r4, r4, r5, lsl #24
+	mov r5, r5, lsr #8
+	orr r5, r5, r12, lsl #24
+	mov r12, r12, lsr #8
+	orr r12, r12, lr, lsl #24
+	stmia r0!, {r3-r5, r12}
+	subs r2, r2, #0x10        
+	bge Lmemcpy_fsrcul1loop16
+	ldmia sp!, {r4, r5}
+	adds r2, r2, #0x0c        
+	blt Lmemcpy_fsrcul1l4
+
+_LABEL(Lmemcpy_fsrcul1loop4)
+	mov r12, lr, lsr #8
+	ldr lr, [r1], #4
+	orr r12, r12, lr, lsl #24
+	str r12, [r0], #4
+	subs r2, r2, #4
+	bge Lmemcpy_fsrcul1loop4
+
+_LABEL(Lmemcpy_fsrcul1l4)
+	sub r1, r1, #3
+	b Lmemcpy_fl4
+
+_LABEL(Lmemcpy_fsrcul2)
+	cmp r2, #0x0c            
+	blt Lmemcpy_fsrcul2loop4
+	sub r2, r2, #0x0c        
+	stmdb sp!, {r4, r5}
+
+_LABEL(Lmemcpy_fsrcul2loop16)
+	mov r3, lr, lsr #16
+	ldmia r1!, {r4, r5, r12, lr}
+	orr r3, r3, r4, lsl #16
+	mov r4, r4, lsr #16
+	orr r4, r4, r5, lsl #16
+	mov r5, r5, lsr #16
+	orr r5, r5, r12, lsl #16
+	mov r12, r12, lsr #16
+	orr r12, r12, lr, lsl #16
+	stmia r0!, {r3-r5, r12}
+	subs r2, r2, #0x10        
+	bge Lmemcpy_fsrcul2loop16
+	ldmia sp!, {r4, r5}
+	adds r2, r2, #0x0c        
+	blt Lmemcpy_fsrcul2l4
+
+_LABEL(Lmemcpy_fsrcul2loop4)
+	mov r12, lr, lsr #16
+	ldr lr, [r1], #4
+	orr r12, r12, lr, lsl #16
+	str r12, [r0], #4
+	subs r2, r2, #4
+	bge Lmemcpy_fsrcul2loop4
+
+_LABEL(Lmemcpy_fsrcul2l4)
+	sub r1, r1, #2
+	b Lmemcpy_fl4
+
+_LABEL(Lmemcpy_fsrcul3)
+	cmp r2, #0x0c            
+	blt Lmemcpy_fsrcul3loop4
+	sub r2, r2, #0x0c        
+	stmdb sp!, {r4, r5}
+
+_LABEL(Lmemcpy_fsrcul3loop16)
+	mov r3, lr, lsr #24
+	ldmia r1!, {r4, r5, r12, lr}
+	orr r3, r3, r4, lsl #8
+	mov r4, r4, lsr #24
+	orr r4, r4, r5, lsl #8
+	mov r5, r5, lsr #24
+	orr r5, r5, r12, lsl #8
+	mov r12, r12, lsr #24
+	orr r12, r12, lr, lsl #8
+	stmia r0!, {r3-r5, r12}
+	subs r2, r2, #0x10        
+	bge Lmemcpy_fsrcul3loop16
+	ldmia sp!, {r4, r5}
+	adds r2, r2, #0x0c        
+	blt Lmemcpy_fsrcul3l4
+
+_LABEL(Lmemcpy_fsrcul3loop4)
+	mov r12, lr, lsr #24
+	ldr lr, [r1], #4
+	orr r12, r12, lr, lsl #8
+	str r12, [r0], #4
+	subs r2, r2, #4
+	bge Lmemcpy_fsrcul3loop4
+
+_LABEL(Lmemcpy_fsrcul3l4)
+	sub r1, r1, #1
+	b Lmemcpy_fl4
+
+_LABEL(Lmemcpy_backwards)
+	add r1, r1, r2
+	add r0, r0, r2
+	subs r2, r2, #4
+	blt Lmemcpy_bl4
+	ands r12, r0, #3
+	bne Lmemcpy_bdestul
+	ands r12, r1, #3
+	bne Lmemcpy_bsrcul
+
+_LABEL(Lmemcpy_bt8)
+	subs r2, r2, #8
+	blt Lmemcpy_bl12
+	stmdb sp!, {r4, lr}
+	subs r2, r2, #0x14
+	blt Lmemcpy_bl32
+
+
+_LABEL(Lmemcpy_bloop32)
+	ldmdb r1!, {r3, r4, r12, lr}
+	stmdb r0!, {r3, r4, r12, lr}
+	ldmdb r1!, {r3, r4, r12, lr}
+	stmdb r0!, {r3, r4, r12, lr}
+	subs r2, r2, #0x20        
+	bge Lmemcpy_bloop32
+
+_LABEL(Lmemcpy_bl32)
+	cmn r2, #0x10            
+	ldmgedb r1!, {r3, r4, r12, lr}
+	stmgedb r0!, {r3, r4, r12, lr}
+	subge r2, r2, #0x10        
+	adds r2, r2, #0x14        
+	ldmgedb r1!, {r3, r12, lr}
+	stmgedb r0!, {r3, r12, lr}
+	subge r2, r2, #0x0c        
+	ldmia sp!, {r4, lr}
+
+_LABEL(Lmemcpy_bl12)
+	adds r2, r2, #8
+	blt Lmemcpy_bl4
+	subs r2, r2, #4
+	ldrlt r3, [r1, #-4]!
+	strlt r3, [r0, #-4]!
+	ldmgedb r1!, {r3, r12}
+	stmgedb r0!, {r3, r12}
+	subge r2, r2, #4
+
+_LABEL(Lmemcpy_bl4)
+	adds r2, r2, #4
+	moveq pc, lr
+
+	cmp r2, #2
+	ldrb r3, [r1, #-1]!
+	strb r3, [r0, #-1]!
+	ldrgeb r3, [r1, #-1]!
+	strgeb r3, [r0, #-1]!
+	ldrgtb r3, [r1, #-1]!
+	strgtb r3, [r0, #-1]!
+	mov pc, lr
+
+
+_LABEL(Lmemcpy_bdestul)
+	cmp r12, #2
+
+	ldrb r3, [r1, #-1]!
+	strb r3, [r0, #-1]!
+	ldrgeb r3, [r1, #-1]!
+	strgeb r3, [r0, #-1]!
+	ldrgtb r3, [r1, #-1]!
+	strgtb r3, [r0, #-1]!
+	subs r2, r2, r12
+	blt Lmemcpy_bl4
+	ands r12, r1, #3
+	beq Lmemcpy_bt8
+
+
+_LABEL(Lmemcpy_bsrcul)
+	bic r1, r1, #3
+	ldr r3, [r1, #0]
+	cmp r12, #2
+	blt Lmemcpy_bsrcul1
+	beq Lmemcpy_bsrcul2
+	cmp r2, #0x0c            
+	blt Lmemcpy_bsrcul3loop4
+	sub r2, r2, #0x0c        
+	stmdb sp!, {r4, r5, lr}
+
+_LABEL(Lmemcpy_bsrcul3loop16)
+	mov lr, r3, lsl #8
+	ldmdb r1!, {r3-r5, r12}
+	orr lr, lr, r12, lsr #24
+	mov r12, r12, lsl #8
+	orr r12, r12, r5, lsr #24
+	mov r5, r5, lsl #8
+	orr r5, r5, r4, lsr #24
+	mov r4, r4, lsl #8
+	orr r4, r4, r3, lsr #24
+	stmdb r0!, {r4, r5, r12, lr}
+	subs r2, r2, #0x10        
+	bge Lmemcpy_bsrcul3loop16
+	ldmia sp!, {r4, r5, lr}
+	adds r2, r2, #0x0c        
+	blt Lmemcpy_bsrcul3l4
+
+_LABEL(Lmemcpy_bsrcul3loop4)
+	mov r12, r3, lsl #8
+	ldr r3, [r1, #-4]!
+	orr r12, r12, r3, lsr #24
+	str r12, [r0, #-4]!
+	subs r2, r2, #4
+	bge Lmemcpy_bsrcul3loop4
+
+_LABEL(Lmemcpy_bsrcul3l4)
+	add r1, r1, #3
+	b Lmemcpy_bl4
+
+_LABEL(Lmemcpy_bsrcul2)
+	cmp r2, #0x0c            
+	blt Lmemcpy_bsrcul2loop4
+	sub r2, r2, #0x0c        
+	stmdb sp!, {r4, r5, lr}
+
+_LABEL(Lmemcpy_bsrcul2loop16)
+	mov lr, r3, lsl #16
+	ldmdb r1!, {r3-r5, r12}
+	orr lr, lr, r12, lsr #16
+	mov r12, r12, lsl #16
+	orr r12, r12, r5, lsr #16
+	mov r5, r5, lsl #16
+	orr r5, r5, r4, lsr #16
+	mov r4, r4, lsl #16
+	orr r4, r4, r3, lsr #16
+	stmdb r0!, {r4, r5, r12, lr}
+	subs r2, r2, #0x10        
+	bge Lmemcpy_bsrcul2loop16
+	ldmia sp!, {r4, r5, lr}
+	adds r2, r2, #0x0c        
+	blt Lmemcpy_bsrcul2l4
+
+_LABEL(Lmemcpy_bsrcul2loop4)
+	mov r12, r3, lsl #16
+	ldr r3, [r1, #-4]!
+	orr r12, r12, r3, lsr #16
+	str r12, [r0, #-4]!
+	subs r2, r2, #4
+	bge Lmemcpy_bsrcul2loop4
+
+_LABEL(Lmemcpy_bsrcul2l4)
+	add r1, r1, #2
+	b Lmemcpy_bl4
+
+_LABEL(Lmemcpy_bsrcul1)
+	cmp r2, #0x0c            
+	blt Lmemcpy_bsrcul1loop4
+	sub r2, r2, #0x0c        
+	stmdb sp!, {r4, r5, lr}
+
+_LABEL(Lmemcpy_bsrcul1loop32)
+	mov lr, r3, lsl #24
+	ldmdb r1!, {r3-r5, r12}
+	orr lr, lr, r12, lsr #8
+	mov r12, r12, lsl #24
+	orr r12, r12, r5, lsr #8
+	mov r5, r5, lsl #24
+	orr r5, r5, r4, lsr #8
+	mov r4, r4, lsl #24
+	orr r4, r4, r3, lsr #8
+	stmdb r0!, {r4, r5, r12, lr}
+	subs r2, r2, #0x10        
+	bge Lmemcpy_bsrcul1loop32
+	ldmia sp!, {r4, r5, lr}
+	adds r2, r2, #0x0c        
+	blt Lmemcpy_bsrcul1l4
+
+_LABEL(Lmemcpy_bsrcul1loop4)
+	mov r12, r3, lsl #24
+	ldr r3, [r1, #-4]!
+	orr r12, r12, r3, lsr #8
+	str r12, [r0, #-4]!
+	subs r2, r2, #4
+	bge Lmemcpy_bsrcul1loop4
+
+_LABEL(Lmemcpy_bsrcul1l4)
+	add r1, r1, #1
+	b Lmemcpy_bl4
+
+
+	.ltorg
+	
+#endif /* USE_ARMASM && !WORDS_BIGENDIAN */
+
+
diff -Naur DirectFB-1.3.0-org/lib/direct/Makefile.am DirectFB-1.3.0/lib/direct/Makefile.am
--- DirectFB-1.3.0-org/lib/direct/Makefile.am	2009-03-25 07:50:20.000000000 +0000
+++ DirectFB-1.3.0/lib/direct/Makefile.am	2009-03-25 07:51:31.000000000 +0000
@@ -29,6 +29,10 @@
 ppcasm_headers = ppcasm_memcpy.h ppc_asm.h
 endif
 
+if BUILDARMASM
+armasm_sources = armasm_memcpy.S
+armasm_header = armasm_memcpy.h
+endif
 
 # If the old location isn't cleared, builds of external modules fail
 install-exec-local:
@@ -39,6 +43,7 @@
 
 include_HEADERS = \
 	$(ppcasm_headers)		\
+	$(armasm_headers)		\
 	build.h				\
 	clock.h				\
 	conf.h				\
@@ -69,6 +74,7 @@
 
 libdirect_la_SOURCES = \
 	$(ppcasm_sources)	\
+	$(armasm_sources)	\
 	clock.c			\
 	conf.c			\
 	debug.c			\
diff -Naur DirectFB-1.3.0-org/lib/direct/memcpy.c DirectFB-1.3.0/lib/direct/memcpy.c
--- DirectFB-1.3.0-org/lib/direct/memcpy.c	2009-03-25 07:50:20.000000000 +0000
+++ DirectFB-1.3.0/lib/direct/memcpy.c	2009-03-25 07:53:07.000000000 +0000
@@ -44,7 +44,7 @@
 #include <direct/memcpy.h>
 #include <direct/messages.h>
 
-#if defined (ARCH_PPC) || (SIZEOF_LONG == 8)
+#if defined (ARCH_PPC) || defined (ARCH_ARM) || (SIZEOF_LONG == 8)
 # define RUN_BENCHMARK  1
 #else
 # define RUN_BENCHMARK  0
@@ -58,6 +58,10 @@
 #include "ppcasm_memcpy.h"
 #endif
 
+#ifdef USE_ARMASM && !WORDS_BIGENDIAN
+#include "armasm_memcpy.h"
+#endif
+
 
 #if SIZEOF_LONG == 8
 
@@ -152,6 +156,9 @@
      { "ppccache", "ppcasm_cacheable_memcpy()",  direct_ppcasm_cacheable_memcpy, 0, 0},
 #endif /* __LINUX__ */
 #endif /* USE_PPCASM */
+#ifdef USE_ARMASM && !WORDS_BIGENDIAN
+	 { "arm",      "armasm_memcpy()",            direct_armasm_memcpy, 0, 0},
+#endif
      { NULL, NULL, NULL, 0, 0}
 };
 
_______________________________________________
directfb-dev mailing list
directfb-dev@directfb.org
http://mail.directfb.org/cgi-bin/mailman/listinfo/directfb-dev

Reply via email to