------- Comment #4 from steven at gcc dot gnu dot org  2006-08-28 13:59 -------
>From the hammer branch for AMD64:

.globl f
        .type   f, @function
f:
.LFB4:
        testl   %edi, %edi
        movl    %esi, %eax
        jne     .L3
        movl    %edx, %esi
        movl    %edx, %eax
.L3:
        leal    (%rax,%rsi), %eax
        ret
.LFE4:
        .size   f, .-f
        .p2align 4,,15
.globl f1
        .type   f1, @function
f1:
.LFB5:
        testl   %edi, %edi
        cmove   %edx, %esi
        leal    (%rsi,%rsi), %eax
        ret
.LFE5:
        .size   f1, .-f1


And from gcc 4.2 20060818:

.globl f
        .type   f, @function
f:
.LFB2:
        testl   %edi, %edi
        movl    %esi, %eax
        cmove   %edx, %esi
        cmove   %esi, %eax
        addl    %esi, %eax
        ret
.LFE2:
        .size   f, .-f
        .p2align 4,,15
.globl f1
        .type   f1, @function
f1:
.LFB3:
        testl   %edi, %edi
        cmove   %edx, %esi
        leal    (%rsi,%rsi), %eax
        ret
.LFE3:
        .size   f1, .-f1


So not all gcc3 releases do so well.  Are there GCC releases that optimize the
two functions to identical code?

In any case, this is a missed optimization.  I suppose the trick in this case
is to recognise that "c + d" == "c + c" (perhaps during value numbering?), but
the first step to analyze this bug would be to figure out where gcc3
(supposedly) performs this optimization.


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=28868

Reply via email to