https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108724

--- Comment #10 from Richard Biener <rguenth at gcc dot gnu.org> ---
On trunk we're back to vectorizing but as intended with DImode which makes us
save half of the loads and stores and we think the extended required arithmetic
covers up for that (by quite some margin).

        movabsq $9223372034707292159, %rcx
        movq    (%rdx), %rax
        movq    (%rsi), %rsi
        movq    %rcx, %rdx
        andq    %rax, %rdx
        andq    %rsi, %rcx
        xorq    %rsi, %rax
        addq    %rcx, %rdx
        movabsq $-9223372034707292160, %rcx
        andq    %rcx, %rax
        xorq    %rdx, %rax
        movq    %rax, (%rdi)

vs

        movl    (%rdx), %eax
        addl    (%rsi), %eax
        movl    %eax, (%rdi)
        movl    4(%rdx), %eax
        addl    4(%rsi), %eax
        movl    %eax, 4(%rdi)

Reply via email to