https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90773

            Bug ID: 90773
           Summary: Improve piecewise operation
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hjl.tools at gmail dot com
  Target Milestone: ---

For

[hjl@gnu-cfl-1 pieces-6]$ cat copy.i 
extern char *dst, *src;

void
foo (unsigned int x)
{
  __builtin_memcpy (dst, src, 15);
}
[hjl@gnu-cfl-1 pieces-6]$ 

we generate

        movq    src(%rip), %rdx
        movq    dst(%rip), %rax
        movq    (%rdx), %rcx
        movq    %rcx, (%rax)
        movl    8(%rdx), %ecx
        movl    %ecx, 8(%rax)
        movzwl  12(%rdx), %ecx
        movw    %cx, 12(%rax)
        movzbl  14(%rdx), %edx
        movb    %dl, 14(%rax)
        ret

Instead, we can generate

        movq    src(%rip), %rdx
        movq    dst(%rip), %rax
        movq    (%rdx), %rcx
        movq    %rcx, (%rax)
        movq    7(%rdx), %rcx
        movq    %rcx, 7(%rax)
        ret

Reply via email to