https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87941

            Bug ID: 87941
           Summary: by_pieces infra does not use movmisalign optab
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: amonakov at gcc dot gnu.org
  Target Milestone: ---

by_pieces code uses only mov_optab, never checking movmisalign_optab, so on
STRICT_ALIGNMENT targets such as arm it does not use available misaligned
load/store patterns. It results in suboptimal code for e.g.

void f(char *c)
{
  __builtin_memcpy(c, "foo", 4);
}

where with -O2 -march=armv6t2 gcc emits

f:
        movw    r3, #:lower16:.LANCHOR0
        mov     r2, r0
        movt    r3, #:upper16:.LANCHOR0
        ldr     r0, [r3]
        str     r0, [r2]        @ unaligned
        bx      lr
        .size   f, .-f
        .section        .rodata
        .align  2
        .set    .LANCHOR0,. + 0
.LC0:
        .ascii  "foo\000"

while optimal code is emitted for the equivalent

void f(char *c)
{
  int t;
  __builtin_memcpy(&t, "foo", 4);
  __builtin_memcpy(c,     &t, 4);
}

f:
        movw    r3, #28518
        movt    r3, 111
        str     r3, [r0]        @ unaligned
        bx      lr
        .size   f, .-f

Reply via email to