http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54116

             Bug #: 54116
           Summary: suboptimal code for tight loops
    Classification: Unclassified
           Product: gcc
           Version: 4.7.1
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
        AssignedTo: unassig...@gcc.gnu.org
        ReportedBy: nel...@seznam.cz


Consider following loop.

int recal(int *x){int i;
  for(i=0;;i+=4){
    if(__builtin_expect((x[i]|x[i+1])|(x[i+2]|x[i+3]),0))
      break;
  }
  return (x[i]|x[i+1])*(x[i+2]|x[i+3]);
}

On x64 orl instruction is destructive. Gcc saves intermediate result to
register instead recalculating it at end of loop, making loop run slower.

Relevant assembly output is following:

gcc-4.7 -O3 -S
        .file   "recal.c"
        .text
        .p2align 4,,15
        .globl  recal
        .type   recal, @function
recal:
.LFB0:
        .cfi_startproc
        movl    12(%rdi), %edx
        orl     8(%rdi), %edx
        movl    4(%rdi), %ecx
        orl     (%rdi), %ecx
        movl    %edx, %eax
        orl     %ecx, %eax
        jne     .L2
        leaq    16(%rdi), %rax
        .p2align 4,,10
        .p2align 3
.L3:
        movl    12(%rax), %edx
        orl     8(%rax), %edx
        movl    4(%rax), %ecx
        orl     (%rax), %ecx
        addq    $16, %rax
        movl    %edx, %esi
        orl     %ecx, %esi
        je      .L3
.L2:
        movl    %ecx, %eax
        imull   %edx, %eax
        ret
        .cfi_endproc
.LFE0:
        .size   recal, .-recal
        .ident  "GCC: (Debian 4.7.1-2) 4.7.1"
        .section        .note.GNU-stack,"",@progbits
--

Reply via email to