------- Comment #5 from vda dot linux at googlemail dot com  2007-07-22 00:10 
-------
Basically, the reason for the regression is that 4.2.1 doesn't figure out how
to use i386 registers efficiently. 3.4.3 was able to do it. Difference in
assembly:

# grep 'mov.*(' serpent-343-O3.s | wc -l
21
serpent_encrypt:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        pushl   %edx
        movl    8(%ebp), %edi
        movl    16(%ebp), %ecx
        movl    12(%edi), %eax
....

# grep 'mov.*(' serpent-421-O3.s | wc -l
115    <========= many more moves to memory (to stack actually)
serpent_encrypt:
        pushl   %ebp
        movl    %esp, %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $120, %esp <==== allocated storage for spills
        movl    16(%ebp), %eax
        movl    8(%ebp), %edx
        movl    %edx, -128(%ebp)
.....


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28481

Reply via email to