https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88490

            Bug ID: 88490
           Summary: Missed autovectorization when indices are different
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: bugzi...@poradnik-webmastera.com
  Target Milestone: ---

Code below reads and writes data using different indices what is checked by
"if" above loop. This can be autovectorized, as both memory areas do not
overlap. Code compiled with -O3 -march=skylake-avx512

[code]
struct S
{
    double* __restrict__ * __restrict__ d;
};

void test(S* __restrict__ s, int n, int k)
{
    if (n > k)
    {
        for (int n = 0; n < 2; ++n)
        {
            s->d[n][0] = s->d[k][0];
            s->d[n][1] = s->d[k][1];
        }
    }
}
[/code]

[asm]
test(S*, int, int):
        cmp     esi, edx
        jle     .L3
        mov     rcx, QWORD PTR [rdi]
        movsx   rdx, edx
        mov     rax, QWORD PTR [rcx+rdx*8]
        mov     rdx, QWORD PTR [rcx]
        vmovsd  xmm0, QWORD PTR [rax]
        vmovsd  QWORD PTR [rdx], xmm0
        vmovsd  xmm0, QWORD PTR [rax+8]
        vmovsd  QWORD PTR [rdx+8], xmm0
        vmovsd  xmm0, QWORD PTR [rax]
        mov     rdx, QWORD PTR [rcx+8]
        vmovsd  QWORD PTR [rdx], xmm0
        vmovsd  xmm0, QWORD PTR [rax+8]
        vmovsd  QWORD PTR [rdx+8], xmm0
.L3:
        ret
[/asm]

Reply via email to