https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62147

            Bug ID: 62147
           Summary: missed loop counter based optimization
           Product: gcc
           Version: 5.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: carrot at google dot com
            Target: powerpc64le

Compile following source code with options -m64 -mcpu=power8 -O2

typedef struct {
      int l;
      int b[258];
} S;

void clear (S* s )
{
   int i;
   int len = s->l + 1;      // int len = s->l; 

   for (i = 0; i <= len; i++)
       s->b[i] = 0;
}

Trunk compiler generates:

clear:
    lwz 9,0(3)
    cmpwi 7,9,-1
    bltlr- 7
    addi 9,9,1
    li 10,0
    rldicl 9,9,0,32
    addi 9,9,1
    sldi 9,9,2
    add 9,3,9
    .p2align 4,,15
.L3:
    stwu 10,4(3)
    cmpld 7,3,9
    bne+ 7,.L3
    blr


It uses cmp/jmp instructions to construct the loop, a better code sequence
should use the bdnz instruction.
If I change the source code as in the comment, gcc can generate bdnz
instruction to form the loop

clear:
    lwz 9,0(3)
    cmpwi 7,9,0
    extsw 9,9
    bltlr- 7
    sldi 9,9,2
    li 10,0
    srdi 9,9,2
    addi 9,9,1
    mtctr 9
    .p2align 4,,15
.L3:
    stwu 10,4(3)
    bdnz .L3
    blr

Reply via email to