On Sunday 20 March 2005 15:17, Adrian Bunk wrote:
> Hi Denis,
> 
> what do your benchmarks say about replacing the whole assembler code 
> with a
> 
>   #define __memcpy __builtin_memcpy

It generates call to out-of-line memcpy()
if count is non-constant.

# cat t.c
extern char *a, *b;
extern int n;

void f() {
    __builtin_memcpy(a,b,n);
}

void g() {
    __builtin_memcpy(a,b,24);
}
# gcc -S -O2 --omit-frame-pointer t.c
# cat t.s
        .file   "t.c"
        .text
        .p2align 2,,3
.globl f
        .type   f, @function
f:
        subl    $16, %esp
        pushl   n
        pushl   b
        pushl   a
        call    memcpy
        addl    $28, %esp
        ret
        .size   f, .-f
        .p2align 2,,3
.globl g
        .type   g, @function
g:
        pushl   %edi
        pushl   %esi
        movl    a, %edi
        movl    b, %esi
        cld
        movl    $6, %ecx
        rep
        movsl
        popl    %esi
        popl    %edi
        ret
        .size   g, .-g
        .section        .note.GNU-stack,"",@progbits
        .ident  "GCC: (GNU) 3.4.1"

Proving that it is slower than inline is left
as an excercise to the reader :)

Kernel one will be inlined always.
void h) { __memcpy(a,b,n);} is
        movl    n, %eax
        pushl   %edi
        movl    %eax, %ecx
        pushl   %esi
        movl    a, %edi
        movl    b, %esi
        shrl    $2, %ecx
#APP
        rep ; movsl
        movl %eax,%ecx
        andl $3,%ecx
        jz 1f
        rep ; movsb
        1:
#NO_APP
        popl    %esi
        popl    %edi
        ret
--
vda

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to