https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111657

--- Comment #5 from Uroš Bizjak <ubizjak at gmail dot com> ---
I have tried to compile with -mtune=nocona that has:

static stringop_algs nocona_memcpy[2] = {
  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
  {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
             {100000, unrolled_loop, false}, {-1, libcall, false}}}};

and compiler produces code as expected in both cases (use unrolled_loop when
rep movsq is unavailable):

foo:
        movq    %fs:0, %rdx
        leaq    t@tpoff(%rdx), %rsi
        movl    $30, %ecx
        rep movsq
        ret

bar:
        xorl    %edx, %edx
.L4:
        movl    %edx, %eax
        movq    %gs:s(%rax), %r9
        movq    %gs:s+8(%rax), %r8
        movq    %gs:s+16(%rax), %rsi
        movq    %gs:s+24(%rax), %rcx
        movq    %r9, (%rdi,%rax)
        movq    %r8, 8(%rdi,%rax)
        movq    %rsi, 16(%rdi,%rax)
        movq    %rcx, 24(%rdi,%rax)
        addl    $32, %edx
        cmpl    $224, %edx
        jb      .L4
        addq    %rdx, %rdi
        movq    %gs:s(%rdx), %rax
        movq    %rax, (%rdi)
        movq    %gs:s+8(%rdx), %rax
        movq    %rax, 8(%rdi)
        ret

Reply via email to