https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64731

--- Comment #6 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
On the trunk we get at -O2:
.L2:
        leaq    (%rdi,%rax), %rcx
        leaq    (%rsi,%rax), %rdx
        movapd  (%rdx), %xmm0
        addpd   (%rcx), %xmm0
        movaps  %xmm0, -64(%rsp)
        movapd  16(%rcx), %xmm0
        addpd   16(%rdx), %xmm0
        movaps  %xmm0, -48(%rsp)
        movdqa  -64(%rsp), %xmm0
        movaps  %xmm0, (%rdi,%rax)
        movdqa  -48(%rsp), %xmm0
        movaps  %xmm0, 16(%rdi,%rax)
        addq    $32, %rax
        cmpq    $8192, %rax
        jne     .L2

Which is definitely better than before.

Reply via email to