https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94084

            Bug ID: 94084
           Summary: Optimizer produces suboptimal code related to
                    loop-invariant
           Product: gcc
           Version: 9.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: zhongyunde at huawei dot com
  Target Milestone: ---

For the following case1 and case2, we can known the global value base is a loop
invariant value, so the load insn can be lifted out of the loop kernel.
But we can find that the case1 and case2 have different result, and the code of
case1 need more optimization.

test base on the x86-64 with https://gcc.godbolt.org/

== [case1] ======================================
extern int base;

void foo (int dest[], int src[], int n)
{
   int i;

   // #pragma no_swp
   for (i=0; i < n; i++)
      dest[base+i] = src[base+i];
}

foo:
        test    edx, edx
        jle     .L1
        xor     ecx, ecx
.L3:
        mov     eax, DWORD PTR base[rip] /* expected exist out of loop */
        add     eax, ecx
        add     ecx, 1
        cdqe
        mov     r8d, DWORD PTR [rsi+rax*4]
        mov     DWORD PTR [rdi+rax*4], r8d
        cmp     edx, ecx
        jne     .L3
.L1:
        ret


== [case2] ======================================
extern int base;
extern int dest[], src[];
void foo (int n)
{
   int i;

   // #pragma no_swp
   for (i=0; i < n; i++)
     dest[base+i] = src[base+i];
}

foo:
        test    edi, edi
        jle     .L1
        movsx   rdx, DWORD PTR base[rip] /* already exist out of loop */
        lea     ecx, [rdi-1]
        lea     rcx, [rdx+1+rcx]
        lea     rax, [0+rdx*4]
        sal     rcx, 2
.L3:
        mov     edx, DWORD PTR src[rax]
        add     rax, 4
        mov     DWORD PTR dest[rax-4], edx
        cmp     rax, rcx
        jne     .L3
.L1:
        ret

Reply via email to