https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81611

            Bug ID: 81611
           Summary: gcc un-learned loop / post-increment optimization
           Product: gcc
           Version: 8.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: other
          Assignee: unassigned at gcc dot gnu.org
          Reporter: gjl at gcc dot gnu.org
  Target Milestone: ---

C test case:

void func1 (unsigned char x, char *str)
{
    do {
        *str++ = '0' + (x & 1);
        x = x / 2;
    } while (x);
    *str = 0;
}

$ avr-gcc-8 foo.c -S -mmcu=atmega8 -O2

foo.s:

func1:
    movw r30,r22    ; ok, need the address in some
                    ;     address register (here Z=r30)
    rjmp .L2        ; ???
.L3:
    movw r30,r18     ; what the heck? Moving address back...
.L2:
    movw r18,r30;    ; ...and forth to some fresh register, just to increment
    subi r18,-1      ; the address from the *PREVIOUS* loop
    sbci r19,-1      ; slow, bloat, increases reg pressure
    mov r25,r24      ; ok
    andi r25,lo8(1)  ; ok
    subi r25,lo8(-(48)) ; ok
    st Z,r25         ; Why not just "st Z+, r25" ???
    lsr r24          ; ok
    brne .L3
    std Z+1,__zero_reg__
    ret


Just for reference the code from 4.7:

* Using 5 instructions less
* Occupying 2 registers less
* Loop consumes 4 cycles less (8 instead of 12).

func1:
    movw r30,r22
.L2:
    mov r25,r24
    andi r25,lo8(1)
    subi r25,lo8(-(48))
    st Z+,r25
    lsr r24
    brne .L2
    st Z,__zero_reg__
    ret

That's code I expect from a 3rd millenium compiler!! ;-)

Reply via email to