https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61194

--- Comment #1 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
what I find quite absurd is that
void barX() {
  for (int i=0; i<1024; ++i) {
    k[i] = x[i]>0;
    k[i] &=  w[i]<y[i];
//    z[i] = (k[i]) ? z[i] : y[i];
 }
}
vectorize and
void barX() {
  for (int i=0; i<1024; ++i) {
    k[i] = x[i]>0;
    k[i] &=  w[i]<y[i];
    z[i] = (k[i]) ? z[i] : y[i];
 }
}
does not with gcc 4.9.0

This is a regression w.r.t. 4.7.0
compiled as
c++ -Ofast -Wall -fno-tree-slp-vectorize -ftree-loop-if-convert-stores -S
cond.cc -msse4.2 -ftree-vectorizer-verbose=1
that produced
Z4barXv:
.LFB1:
        .cfi_startproc
        xorps   %xmm4, %xmm4
        xorl    %eax, %eax
        pxor    %xmm3, %xmm3
        movdqa  .LC1(%rip), %xmm5
        .p2align 4,,10
        .p2align 3
.L9:
        movaps  y(%rax), %xmm2
        movaps  %xmm4, %xmm1
        movaps  w(%rax), %xmm0
        cmpltps x(%rax), %xmm1
        cmpltps %xmm2, %xmm0
        pand    %xmm5, %xmm0
        pand    %xmm1, %xmm0
        movaps  z(%rax), %xmm1
        movdqa  %xmm0, k(%rax)
        pcmpeqd %xmm3, %xmm0
        blendvps        %xmm0, %xmm2, %xmm1
        movaps  %xmm1, z(%rax)
        addq    $16, %rax
        cmpq    $4096, %rax
        jne     .L9
        rep
        ret
        .cfi_endproc

Reply via email to