Ping.

On Thu, Jul 29, 2021 at 4:33 PM Christoph Muellner
<cmuell...@gcc.gnu.org> wrote:
>
> The RISC-V cpymemsi expansion is called, whenever the by-pieces
> infrastructure will not be taking care of the builtin expansion.
> Currently, that's the case for e.g. memcpy() with n <= 24 bytes.
> The code emitted by the by-pieces infrastructure emits code, that
> performs unaligned accesses if the target's
> riscv_slow_unaligned_access_p is false (and n is not 1).
>
> If n > 24, then the RISC-V cpymemsi expansion is called, which is
> implemented in riscv_expand_block_move(). The current implementation
> does not check riscv_slow_unaligned_access_p and never emits unaligned
> accesses.
>
> Since by-pieces emits unaligned accesses, it is reasonable to implement
> the same behaviour in the cpymemsi expansion. And that's what this patch
> is doing.
>
> The patch checks riscv_slow_unaligned_access_p at the entry and sets
> the allowed alignment accordingly. This alignment is then propagated
> down to the routines that emit the actual instructions.
>
> Without the patch a memcpy() with n==25 will be exanded only
> if the given pointers are aligned. With the patch also unaligned
> pointers are accepted if riscv_slow_unaligned_access_p is false.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.c (riscv_block_move_straight): Add
>         parameter align.
>         (riscv_adjust_block_mem): Replace parameter length by parameter
>         align.
>         (riscv_block_move_loop): Add parameter align.
>         (riscv_expand_block_move): Set alignment properly if the target
>         has fast unaligned access.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/builtins-strict-align.c: New test.
>         * gcc.target/riscv/builtins-unaligned-1.c: New test.
>         * gcc.target/riscv/builtins-unaligned-2.c: New test.
>         * gcc.target/riscv/builtins-unaligned-3.c: New test.
>         * gcc.target/riscv/builtins-unaligned-4.c: New test.
>         * gcc.target/riscv/builtins.h: New test.
>
> Signed-off-by: Christoph Muellner <cmuell...@gcc.gnu.org>
> ---
>  gcc/config/riscv/riscv.c                      | 53 +++++++++++--------
>  .../gcc.target/riscv/builtins-strict-align.c  | 13 +++++
>  .../gcc.target/riscv/builtins-unaligned-1.c   | 15 ++++++
>  .../gcc.target/riscv/builtins-unaligned-2.c   | 15 ++++++
>  .../gcc.target/riscv/builtins-unaligned-3.c   | 15 ++++++
>  .../gcc.target/riscv/builtins-unaligned-4.c   | 15 ++++++
>  gcc/testsuite/gcc.target/riscv/builtins.h     | 10 ++++
>  7 files changed, 115 insertions(+), 21 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/builtins.h
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 576960bb37c..0596a9ff1b6 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -3173,11 +3173,13 @@ riscv_legitimize_call_address (rtx addr)
>    return addr;
>  }
>
> -/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
> +/* Emit straight-line code to move LENGTH bytes from SRC to DEST
> +   with accesses that are ALIGN bytes aligned.
>     Assume that the areas do not overlap.  */
>
>  static void
> -riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length)
> +riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
> +                          unsigned HOST_WIDE_INT align)
>  {
>    unsigned HOST_WIDE_INT offset, delta;
>    unsigned HOST_WIDE_INT bits;
> @@ -3185,8 +3187,7 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
> HOST_WIDE_INT length)
>    enum machine_mode mode;
>    rtx *regs;
>
> -  bits = MAX (BITS_PER_UNIT,
> -             MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))));
> +  bits = MAX (BITS_PER_UNIT, MIN (BITS_PER_WORD, align));
>
>    mode = mode_for_size (bits, MODE_INT, 0).require ();
>    delta = bits / BITS_PER_UNIT;
> @@ -3211,21 +3212,20 @@ riscv_block_move_straight (rtx dest, rtx src, 
> unsigned HOST_WIDE_INT length)
>      {
>        src = adjust_address (src, BLKmode, offset);
>        dest = adjust_address (dest, BLKmode, offset);
> -      move_by_pieces (dest, src, length - offset,
> -                     MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), RETURN_BEGIN);
> +      move_by_pieces (dest, src, length - offset, align, RETURN_BEGIN);
>      }
>  }
>
>  /* Helper function for doing a loop-based block operation on memory
> -   reference MEM.  Each iteration of the loop will operate on LENGTH
> -   bytes of MEM.
> +   reference MEM.
>
>     Create a new base register for use within the loop and point it to
>     the start of MEM.  Create a new memory reference that uses this
> -   register.  Store them in *LOOP_REG and *LOOP_MEM respectively.  */
> +   register and has an alignment of ALIGN.  Store them in *LOOP_REG
> +   and *LOOP_MEM respectively.  */
>
>  static void
> -riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT length,
> +riscv_adjust_block_mem (rtx mem, unsigned HOST_WIDE_INT align,
>                         rtx *loop_reg, rtx *loop_mem)
>  {
>    *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
> @@ -3233,15 +3233,17 @@ riscv_adjust_block_mem (rtx mem, unsigned 
> HOST_WIDE_INT length,
>    /* Although the new mem does not refer to a known location,
>       it does keep up to LENGTH bytes of alignment.  */
>    *loop_mem = change_address (mem, BLKmode, *loop_reg);
> -  set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
> +  set_mem_align (*loop_mem, align);
>  }
>
>  /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
> -   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  Assume that
> -   the memory regions do not overlap.  */
> +   bytes at a time.  LENGTH must be at least BYTES_PER_ITER.  The alignment
> +   of the access can be set by ALIGN.  Assume that the memory regions do not
> +   overlap.  */
>
>  static void
>  riscv_block_move_loop (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
> +                      unsigned HOST_WIDE_INT align,
>                        unsigned HOST_WIDE_INT bytes_per_iter)
>  {
>    rtx label, src_reg, dest_reg, final_src, test;
> @@ -3251,8 +3253,8 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned 
> HOST_WIDE_INT length,
>    length -= leftover;
>
>    /* Create registers and memory references for use within the loop.  */
> -  riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
> -  riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
> +  riscv_adjust_block_mem (src, align, &src_reg, &src);
> +  riscv_adjust_block_mem (dest, align, &dest_reg, &dest);
>
>    /* Calculate the value that SRC_REG should have after the last iteration
>       of the loop.  */
> @@ -3264,7 +3266,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned 
> HOST_WIDE_INT length,
>    emit_label (label);
>
>    /* Emit the loop body.  */
> -  riscv_block_move_straight (dest, src, bytes_per_iter);
> +  riscv_block_move_straight (dest, src, bytes_per_iter, align);
>
>    /* Move on to the next block.  */
>    riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
> @@ -3276,7 +3278,7 @@ riscv_block_move_loop (rtx dest, rtx src, unsigned 
> HOST_WIDE_INT length,
>
>    /* Mop up any left-over bytes.  */
>    if (leftover)
> -    riscv_block_move_straight (dest, src, leftover);
> +    riscv_block_move_straight (dest, src, leftover, align);
>    else
>      emit_insn(gen_nop ());
>  }
> @@ -3292,8 +3294,17 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
>        unsigned HOST_WIDE_INT hwi_length = UINTVAL (length);
>        unsigned HOST_WIDE_INT factor, align;
>
> -      align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
> -      factor = BITS_PER_WORD / align;
> +      if (riscv_slow_unaligned_access_p)
> +       {
> +         align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 
> BITS_PER_WORD);
> +         factor = BITS_PER_WORD / align;
> +       }
> +      else
> +       {
> +         /* Assume data to be aligned.  */
> +         align = hwi_length * BITS_PER_UNIT;
> +         factor = 1;
> +       }
>
>        if (optimize_function_for_size_p (cfun)
>           && hwi_length * factor * UNITS_PER_WORD > MOVE_RATIO (false))
> @@ -3301,7 +3312,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
>
>        if (hwi_length <= (RISCV_MAX_MOVE_BYTES_STRAIGHT / factor))
>         {
> -         riscv_block_move_straight (dest, src, INTVAL (length));
> +         riscv_block_move_straight (dest, src, hwi_length, align);
>           return true;
>         }
>        else if (optimize && align >= BITS_PER_WORD)
> @@ -3321,7 +3332,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
>                 iter_words = i;
>             }
>
> -         riscv_block_move_loop (dest, src, bytes, iter_words * 
> UNITS_PER_WORD);
> +         riscv_block_move_loop (dest, src, bytes, align, iter_words * 
> UNITS_PER_WORD);
>           return true;
>         }
>      }
> diff --git a/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c 
> b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
> new file mode 100644
> index 00000000000..6f7b1f324de
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/builtins-strict-align.c
> @@ -0,0 +1,13 @@
> +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64 
> -mstrict-align" } */
> +/* { dg-do compile } */
> +
> +#include "builtins.h"
> +
> +DO_MEMCPY_N(12)
> +
> +/* { dg-final { scan-assembler-times "lbu" 12 } } */
> +/* { dg-final { scan-assembler-times "sb"  12 } } */
> +/* { dg-final { scan-assembler-not   "lw" } } */
> +/* { dg-final { scan-assembler-not   "sw" } } */
> +/* { dg-final { scan-assembler-not   "ld" } } */
> +/* { dg-final { scan-assembler-not   "sd" } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c 
> b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
> new file mode 100644
> index 00000000000..f97d60a35d4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
> +/* { dg-do compile } */
> +
> +#include "builtins.h"
> +
> +DO_MEMCPY_N(7)
> +
> +/* { dg-final { scan-assembler-not   "ld"  } } */
> +/* { dg-final { scan-assembler-not   "sd"  } } */
> +/* { dg-final { scan-assembler-times "lw"  1 } } */
> +/* { dg-final { scan-assembler-times "sw"  1 } } */
> +/* { dg-final { scan-assembler-times "lh"  1 } } */
> +/* { dg-final { scan-assembler-times "sh"  1 } } */
> +/* { dg-final { scan-assembler-times "lbu" 1 } } */
> +/* { dg-final { scan-assembler-times "sb"  1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c 
> b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
> new file mode 100644
> index 00000000000..b373651d241
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-2.c
> @@ -0,0 +1,15 @@
> +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
> +/* { dg-do compile } */
> +
> +#include "builtins.h"
> +
> +DO_MEMCPY_N(8)
> +
> +/* { dg-final { scan-assembler-times "ld"  1 } } */
> +/* { dg-final { scan-assembler-times "sd"  1 } } */
> +/* { dg-final { scan-assembler-not   "lw"  } } */
> +/* { dg-final { scan-assembler-not   "sw"  } } */
> +/* { dg-final { scan-assembler-not   "lh"  } } */
> +/* { dg-final { scan-assembler-not   "sh"  } } */
> +/* { dg-final { scan-assembler-not   "lbu" } } */
> +/* { dg-final { scan-assembler-not   "sb"  } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c 
> b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
> new file mode 100644
> index 00000000000..3f4a6b9630b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-3.c
> @@ -0,0 +1,15 @@
> +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
> +/* { dg-do compile } */
> +
> +#include "builtins.h"
> +
> +DO_MEMCPY_N(31)
> +
> +/* { dg-final { scan-assembler-times "ld"  3 } } */
> +/* { dg-final { scan-assembler-times "sd"  3 } } */
> +/* { dg-final { scan-assembler-times "lw"  1 } } */
> +/* { dg-final { scan-assembler-times "sw"  1 } } */
> +/* { dg-final { scan-assembler-times "lh"  1 } } */
> +/* { dg-final { scan-assembler-times "sh"  1 } } */
> +/* { dg-final { scan-assembler-times "lbu" 1 } } */
> +/* { dg-final { scan-assembler-times "sb"  1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c 
> b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
> new file mode 100644
> index 00000000000..26fcb7a71a7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/builtins-unaligned-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-options "-O2 -mtune=thead-c906 -march=rv64gc -mabi=lp64" } */
> +/* { dg-do compile } */
> +
> +#include "builtins.h"
> +
> +DO_MEMCPY_N(63)
> +
> +/* { dg-final { scan-assembler-times "ld"  7 } } */
> +/* { dg-final { scan-assembler-times "sd"  7 } } */
> +/* { dg-final { scan-assembler-times "lw"  1 } } */
> +/* { dg-final { scan-assembler-times "sw"  1 } } */
> +/* { dg-final { scan-assembler-times "lh"  1 } } */
> +/* { dg-final { scan-assembler-times "sh"  1 } } */
> +/* { dg-final { scan-assembler-times "lbu" 1 } } */
> +/* { dg-final { scan-assembler-times "sb"  1 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/builtins.h 
> b/gcc/testsuite/gcc.target/riscv/builtins.h
> new file mode 100644
> index 00000000000..5cad5fe194b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/builtins.h
> @@ -0,0 +1,10 @@
> +#ifndef BUILTINS_H
> +#define BUILTINS_H
> +
> +#define DO_MEMCPY_N(N)                         \
> +void do_memcpy_##N (void *d, const void *s)    \
> +{                                              \
> +  __builtin_memcpy (d, s, N);                  \
> +}
> +
> +#endif /* BUILTINS_H */
> --
> 2.31.1
>

Reply via email to