http://gcc.gnu.org/bugzilla/show_bug.cgi?id=57858

--- Comment #2 from vincenzo Innocente <vincenzo.innocente at cern dot ch> ---
actually the code for div and sqr is different already for standard SSE
c++ -std=c++11 -Ofast -S avx2sqrt.cc -ftree-vectorizer-verbose=1 -Wall ; cat
avx2sqrt.s

.L2:
    movdqa    %xmm0, %xmm1
    addl    $1, %eax
    movdqa    %xmm0, %xmm4
    cmpl    $256, %eax
    paddd    %xmm5, %xmm1
    pshufd    $238, %xmm1, %xmm0
    cvtdq2pd    %xmm1, %xmm1
    movapd    %xmm3, %xmm7
    paddd    %xmm6, %xmm4
    cvtdq2pd    %xmm0, %xmm0
    divpd    %xmm0, %xmm7
    movapd    %xmm7, %xmm0
    movapd    %xmm3, %xmm7
    divpd    %xmm1, %xmm7
    addpd    %xmm7, %xmm0
    addpd    %xmm0, %xmm2
    jne    .L3
    movapd    %xmm2, -24(%rsp)
    movsd    -16(%rsp), %xmm0
    addsd    %xmm2, %xmm0
    ret
    .cfi_endproc
.LFE3:
    .size    _Z3divv, .-_Z3divv
    .p2align 4,,15
    .globl    _Z3sqrv
    .type    _Z3sqrv, @function
_Z3sqrv:
.LFB4:
    .cfi_startproc
    movl    $1, %eax
    movsd    .LC4(%rip), %xmm1
    xorpd    %xmm0, %xmm0
    jmp    .L6
    .p2align 4,,10
    .p2align 3
.L7:
    cvtsi2sd    %eax, %xmm1
    sqrtsd    %xmm1, %xmm1
.L6:
    addl    $1, %eax
    addsd    %xmm1, %xmm0
    cmpl    $1025, %eax
    jne    .L7
    rep; ret
    .cfi_endproc

Reply via email to