4.9 Regression] [LRA,x86] Non-optimal code for simple loop with LRA

ysrumyan at gmail dot com Fri, 13 Sep 2013 06:00:55 -0700

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=55342


--- Comment #10 from Yuri Rumyantsev <ysrumyan at gmail dot com> ---
After fix rev. 202468 assembly looks slightly better but we met with another RA
inefficiency which can be illustrated on the attached (t1.c) test compiled with
options "-march=atom -mtune=atom -m32 -O2" that upped bound ol loop check is on
register but base register for "write" is on stack:

.L8:
    movzbl    3(%esp), %edx
    movl    %esi, %ecx
    cmpb    %cl, %dl
    movl    %esi, %edi
    cmovbe    %edx, %edi
.L4:
    movl    %esi, %edx
    movl    28(%esp), %esi  <-- why write is on stack
    movl    %edi, %ecx
    addl    $4, 28(%esp)  <-- perform write incrementation on stack
    subl    %ecx, %edx
    subl    %ecx, %ebx
    movzbl    3(%esp), %ecx
    movb    %dl, (%esi)
    movl    %edi, %edx
    subl    %edx, %ecx
    movb    %bl, 1(%esi)
    movb    %cl, 2(%esi)
    movl    28(%esp), %esi
    cmpl    %ebp, %eax  <-- why upper bound is in register?
    movb    %dl, -1(%esi)
    je    .L1
.L5:
    movzbl    (%eax), %esi
    leal    3(%eax), %eax
    movzbl    -2(%eax), %ebx
    notl    %esi
    notl    %ebx
    movl    %esi, %edx
    movzbl    -1(%eax), %ecx
    cmpb    %bl, %dl
    notl    %ecx
    movb    %cl, 3(%esp)
    jb    .L8
    movzbl    3(%esp), %edx
    movl    %ebx, %edi
    cmpb    %bl, %dl
    cmovbe    %edx, %edi
    jmp    .L4

Is it something wrong in ATOM cost model? But anyway I assume that keeping
upper bound on stack is much cheeper then load base with incrementation from
stack.

[Bug rtl-optimization/55342] [4.8/4.9 Regression] [LRA,x86] Non-optimal code for simple loop with LRA

Reply via email to