On Sunday 20 March 2005 15:17, Adrian Bunk wrote: > Hi Denis, > > what do your benchmarks say about replacing the whole assembler code > with a > > #define __memcpy __builtin_memcpy
It generates call to out-of-line memcpy() if count is non-constant. # cat t.c extern char *a, *b; extern int n; void f() { __builtin_memcpy(a,b,n); } void g() { __builtin_memcpy(a,b,24); } # gcc -S -O2 --omit-frame-pointer t.c # cat t.s .file "t.c" .text .p2align 2,,3 .globl f .type f, @function f: subl $16, %esp pushl n pushl b pushl a call memcpy addl $28, %esp ret .size f, .-f .p2align 2,,3 .globl g .type g, @function g: pushl %edi pushl %esi movl a, %edi movl b, %esi cld movl $6, %ecx rep movsl popl %esi popl %edi ret .size g, .-g .section .note.GNU-stack,"",@progbits .ident "GCC: (GNU) 3.4.1" Proving that it is slower than inline is left as an excercise to the reader :) Kernel one will be inlined always. void h) { __memcpy(a,b,n);} is movl n, %eax pushl %edi movl %eax, %ecx pushl %esi movl a, %edi movl b, %esi shrl $2, %ecx #APP rep ; movsl movl %eax,%ecx andl $3,%ecx jz 1f rep ; movsb 1: #NO_APP popl %esi popl %edi ret -- vda - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/