Ping.
On Thu, Jul 29, 2021 at 4:33 PM Christoph Muellner <cmuell...@gcc.gnu.org> wrote: > > The RISC-V cpymemsi expansion is called, whenever the by-pieces > infrastructure will not be taking care of the builtin expansion. > Currently, that's the case for e.g. memcpy() with n <= 24 bytes. > The code emitted by the by-pieces infrastructure emits code, that > performs unaligned accesses if the target's > riscv_slow_unaligned_access_p is false (and n is not 1). > > If n > 24, then the RISC-V cpymemsi expansion is called, which is > implemented in riscv_expand_block_move(). The current implementation > does not check riscv_slow_unaligned_access_p and never emits unaligned > accesses. > > Since by-pieces emits unaligned accesses, it is reasonable to implement > the same behaviour in the cpymemsi expansion. And that's what this patch > is doing. > > The patch checks riscv_slow_unaligned_access_p at the entry and sets > the allowed alignment accordingly. This alignment is then propagated > down to the routines that emit the actual instructions. > > Without the patch a memcpy() with n==25 will be exanded only > if the given pointers are aligned. With the patch also unaligned > pointers are accepted if riscv_slow_unaligned_access_p is false. > > gcc/ChangeLog: > > * config/riscv/riscv.c (riscv_block_move_straight): Add > parameter align. > (riscv_adjust_block_mem): Replace parameter length by parameter > align. > (riscv_block_move_loop): Add parameter align. > (riscv_expand_block_move): Set alignment properly if the target > has fast unaligned access. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/builtins-strict-align.c: New test. > * gcc.target/riscv/builtins-unaligned-1.c: New test. > * gcc.target/riscv/builtins-unaligned-2.c: New test. > * gcc.target/riscv/builtins-unaligned-3.c: New test. > * gcc.target/riscv/builtins-unaligned-4.c: New test. > * gcc.target/riscv/builtins.h: New test. > > Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org> > --- > gcc/config/riscv/riscv.c | 53 +++++++++++-------- > .../gcc.target/riscv/builtins-strict-align.c | 13 +++++ > .../gcc.target/riscv/builtins-unaligned-1.c | 15 ++++++ > .../gcc.target/riscv/builtins-unaligned-2.c | 15 ++++++ > .../gcc.target/riscv/builtins-unaligned-3.c | 15 ++++++ > .../gcc.target/riscv/builtins-unaligned-4.c | 15 ++++++ > gcc/testsuite/gcc.target/riscv/builtins.h | 10 ++++ > 7 files changed, 115 insertions(+), 21 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h > > diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c > index 576960bb37c..0596a9ff1b6 100644 > --- a/gcc/config/riscv/riscv.c > +++ b/gcc/config/riscv/riscv.c > @@ -3173,11 +3173,13 @@ riscv_legitimize_call_address (rtx addr) > return addr; > } > > -/* Emit straight-line code to move LENGTH bytes from SRC to DEST. > +/* Emit straight-line code to move LENGTH bytes from SRC to DEST > + with accesses that are ALIGN bytes aligned. > Assume that the areas do not overlap. */ > > static void > -riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length) > +riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length, > + unsigned HOST_WIDE_INT align) > { > unsigned HOST_WIDE_INT offset, delta; > unsigned HOST_WIDE_INT bits; > @@ -3185,8 +3187,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned > HOST_WIDE_INT length) > enum machine_mode mode; > rtx *regs; > > - bits = MAX (BITS_PER_UNIT, > - MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest)))); > + bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align)); > > mode = mode_for_size (bits, MODE_INT, 0).require (); > delta = bits / BITS_PER_UNIT; > @@ -3211,21 +3212,20 @@ riscv_block_move_straight (rtx dest, rtx src, > unsigned HOST_WIDE_INT length) > { > src = adjust_address (src, BLKmode, offset); > dest = adjust_address (dest, BLKmode, offset); > - move_by_pieces (dest, src, length - offset, > - MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN); > + move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN); > } > } > > /* Helper function for doing a loop-based block operation on memory > - reference MEM. Each iteration of the loop will operate on LENGTH > - bytes of MEM. > + reference MEM. > > Create a new base register for use within the loop and point it to > the start of MEM. Create a new memory reference that uses this > - register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ > + register and has an alignment of ALIGN. Store them in *LOOP_REG > + and *LOOP_MEM respectively. */ > > static void > -riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length, > +riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align, > rtx *loop_reg, rtx *loop_mem) > { > *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); > @@ -3233,15 +3233,17 @@ riscv_adjust_block_mem (rtx mem, unsigned > HOST_WIDE_INT length, > /* Although the new mem does not refer to a known location, > it does keep up to LENGTH bytes of alignment. */ > *loop_mem = change_address (mem, BLKmode, *loop_reg); > - set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); > + set_mem_align (*loop_mem, align); > } > > /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER > - bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that > - the memory regions do not overlap. */ > + bytes at a time. LENGTH must be at least BYTES_PER_ITER. The alignment > + of the access can be set by ALIGN. Assume that the memory regions do not > + overlap. */ > > static void > riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length, > + unsigned HOST_WIDE_INT align, > unsigned HOST_WIDE_INT bytes_per_iter) > { > rtx label, src_reg, dest_reg, final_src, test; > @@ -3251,8 +3253,8 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned > HOST_WIDE_INT length, > length -= leftover; > > /* Create registers and memory references for use within the loop. */ > - riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); > - riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); > + riscv_adjust_block_mem (src, align, &src_reg, &src); > + riscv_adjust_block_mem (dest, align, &dest_reg, &dest); > > /* Calculate the value that SRC_REG should have after the last iteration > of the loop. */ > @@ -3264,7 +3266,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned > HOST_WIDE_INT length, > emit_label (label); > > /* Emit the loop body. */ > - riscv_block_move_straight (dest, src, bytes_per_iter); > + riscv_block_move_straight (dest, src, bytes_per_iter, align); > > /* Move on to the next block. */ > riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); > @@ -3276,7 +3278,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned > HOST_WIDE_INT length, > > /* Mop up any left-over bytes. */ > if (leftover) > - riscv_block_move_straight (dest, src, leftover); > + riscv_block_move_straight (dest, src, leftover, align); > else > emit_insn(gen_nop ()); > } > @@ -3292,8 +3294,17 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) > unsigned HOST_WIDE_INT hwi_length = UINTVAL (length); > unsigned HOST_WIDE_INT factor, align; > > - align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD); > - factor = BITS_PER_WORD / align; > + if (riscv_slow_unaligned_access_p) > + { > + align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), > BITS_PER_WORD); > + factor = BITS_PER_WORD / align; > + } > + else > + { > + /* Assume data to be aligned. */ > + align = hwi_length * BITS_PER_UNIT; > + factor = 1; > + } > > if (optimize_function_for_size_p (cfun) > && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false)) > @@ -3301,7 +3312,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) > > if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)) > { > - riscv_block_move_straight (dest, src, INTVAL (length)); > + riscv_block_move_straight (dest, src, hwi_length, align); > return true; > } > else if (optimize && align >= BITS_PER_WORD) > @@ -3321,7 +3332,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length) > iter_words = i; > } > > - riscv_block_move_loop (dest, src, bytes, iter_words * > UNITS_PER_WORD); > + riscv_block_move_loop (dest, src, bytes, align, iter_words * > UNITS_PER_WORD); > return true; > } > } > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > new file mode 100644 > index 00000000000..6f7b1f324de > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c > @@ -0,0 +1,13 @@ > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64 > -mstrict-align" } */ > +/* { dg-do compile } */ > + > +#include "builtins.h" > + > +DO_MEMCPY_N(12) > + > +/* { dg-final { scan-assembler-times "lbu" 12 } } */ > +/* { dg-final { scan-assembler-times "sb" 12 } } */ > +/* { dg-final { scan-assembler-not "lw" } } */ > +/* { dg-final { scan-assembler-not "sw" } } */ > +/* { dg-final { scan-assembler-not "ld" } } */ > +/* { dg-final { scan-assembler-not "sd" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > new file mode 100644 > index 00000000000..f97d60a35d4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c > @@ -0,0 +1,15 @@ > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > +/* { dg-do compile } */ > + > +#include "builtins.h" > + > +DO_MEMCPY_N(7) > + > +/* { dg-final { scan-assembler-not "ld" } } */ > +/* { dg-final { scan-assembler-not "sd" } } */ > +/* { dg-final { scan-assembler-times "lw" 1 } } */ > +/* { dg-final { scan-assembler-times "sw" 1 } } */ > +/* { dg-final { scan-assembler-times "lh" 1 } } */ > +/* { dg-final { scan-assembler-times "sh" 1 } } */ > +/* { dg-final { scan-assembler-times "lbu" 1 } } */ > +/* { dg-final { scan-assembler-times "sb" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > new file mode 100644 > index 00000000000..b373651d241 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c > @@ -0,0 +1,15 @@ > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > +/* { dg-do compile } */ > + > +#include "builtins.h" > + > +DO_MEMCPY_N(8) > + > +/* { dg-final { scan-assembler-times "ld" 1 } } */ > +/* { dg-final { scan-assembler-times "sd" 1 } } */ > +/* { dg-final { scan-assembler-not "lw" } } */ > +/* { dg-final { scan-assembler-not "sw" } } */ > +/* { dg-final { scan-assembler-not "lh" } } */ > +/* { dg-final { scan-assembler-not "sh" } } */ > +/* { dg-final { scan-assembler-not "lbu" } } */ > +/* { dg-final { scan-assembler-not "sb" } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > new file mode 100644 > index 00000000000..3f4a6b9630b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c > @@ -0,0 +1,15 @@ > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > +/* { dg-do compile } */ > + > +#include "builtins.h" > + > +DO_MEMCPY_N(31) > + > +/* { dg-final { scan-assembler-times "ld" 3 } } */ > +/* { dg-final { scan-assembler-times "sd" 3 } } */ > +/* { dg-final { scan-assembler-times "lw" 1 } } */ > +/* { dg-final { scan-assembler-times "sw" 1 } } */ > +/* { dg-final { scan-assembler-times "lh" 1 } } */ > +/* { dg-final { scan-assembler-times "sh" 1 } } */ > +/* { dg-final { scan-assembler-times "lbu" 1 } } */ > +/* { dg-final { scan-assembler-times "sb" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > new file mode 100644 > index 00000000000..26fcb7a71a7 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c > @@ -0,0 +1,15 @@ > +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */ > +/* { dg-do compile } */ > + > +#include "builtins.h" > + > +DO_MEMCPY_N(63) > + > +/* { dg-final { scan-assembler-times "ld" 7 } } */ > +/* { dg-final { scan-assembler-times "sd" 7 } } */ > +/* { dg-final { scan-assembler-times "lw" 1 } } */ > +/* { dg-final { scan-assembler-times "sw" 1 } } */ > +/* { dg-final { scan-assembler-times "lh" 1 } } */ > +/* { dg-final { scan-assembler-times "sh" 1 } } */ > +/* { dg-final { scan-assembler-times "lbu" 1 } } */ > +/* { dg-final { scan-assembler-times "sb" 1 } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h > b/gcc/testsuite/gcc.target/riscv/builtins.h > new file mode 100644 > index 00000000000..5cad5fe194b > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/builtins.h > @@ -0,0 +1,10 @@ > +#ifndef BUILTINS_H > +#define BUILTINS_H > + > +#define DO_MEMCPY_N(N) \ > +void do_memcpy_##N (void *d, const void *s) \ > +{ \ > + __builtin_memcpy (d, s, N); \ > +} > + > +#endif /* BUILTINS_H */ > -- > 2.31.1 >