Bug ID: 114647
           Summary: missing DCE when looping over a VLA
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot
          Reporter: franckbehaghel_gcc at protonmail dot com
  Target Milestone: ---

$cat foo.cpp

#include "stdint.h"
#include "stdio.h"

void foo(int n)

   uint64_t a[n];
   for (uint32_t i=0;i<n;i++) a[i] = i;

   printf(" ");

At O3, gcc vectorize the loop but do not remove it:

        push    rbp
        movsx   rax, edi
        lea     rax, [15+rax*8]
        and     rax, -16
        mov     rbp, rsp
        sub     rsp, rax
        test    edi, edi
        je      .L2
        lea     edx, [rdi-1]
        cmp     edx, 2
        jbe     .L2
        shr     edi, 2
        mov     ecx, 4
        pxor    xmm2, xmm2
        mov     rax, rsp
        mov     edx, edi
        movdqa  xmm1, XMMWORD PTR .LC0[rip]
        movd    xmm4, ecx
        sal     rdx, 5
        pshufd  xmm4, xmm4, 0
        add     rdx, rsp
        movdqa  xmm0, xmm1
        add     rax, 32
        paddd   xmm1, xmm4
        movdqa  xmm3, xmm0
        punpckhdq       xmm0, xmm2
        punpckldq       xmm3, xmm2
        movups  XMMWORD PTR [rax-16], xmm0
        movups  XMMWORD PTR [rax-32], xmm3
        cmp     rdx, rax
        jne     .L4
        mov     edi, 32
        call    putchar

clang ( c or c++ ) does remove the loop :

foo:                                    # @foo
        mov     edi, 32
        jmp     putchar@PLT                     # TAILCALL

Reply via email to