https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67406

--- Comment #5 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
does not work...

pragma omp declare simd notinbranch
float __attribute__ ((__target__ ("default")))
fma(float x,float y, float z);
#pragma omp declare simd notinbranch
float __attribute__ ((__target__ ("arch=haswell")))
fma(float x,float y, float z);
void foo() {
  #pragma omp simd
  for (int i=0; i<1024; ++i)
   v0[i] = fma(v1[i],v2[i],v3[i]);
}


generates
.L11:
        vmovss  v3(%rbx), %xmm2
        addq    $4, %rbx
        vmovss  v2-4(%rbx), %xmm1
        vmovss  v1-4(%rbx), %xmm0
        call    _Z15_Z3fmafff.ifuncfff
        vmovss  %xmm0, v0-4(%rbx)
        cmpq    $4096, %rbx
        jne     .L11

dispatching, no vectorization...

Reply via email to