This patch adds the field overlap_op_by_pieces to the struct
riscv_tune_param, which is used by the TARGET_OVERLAP_OP_BY_PIECES_P()
hook. This hook is used by the by-pieces infrastructure to decide
if overlapping memory accesses should be emitted.

The new property is set to false in all tune structs except for
generic-ooo.

The changes in the expansion can be seen in the adjustments of the
cpymem test cases. These tests also reveal a limitation in the
RISC-V cpymem expansion that prevents this optimization as only
by-pieces cpymem expansions emit overlapping memory accesses.

gcc/ChangeLog:

        * config/riscv/riscv.cc (struct riscv_tune_param): New field
        overlap_op_by_pieces.
        (riscv_overlap_op_by_pieces): New function.
        (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to
        riscv_overlap_op_by_pieces.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/cpymem-32-ooo.c: Adjust for overlapping
        access.
        * gcc.target/riscv/cpymem-64-ooo.c: Likewise.

Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu>
---
 gcc/config/riscv/riscv.cc                     | 20 +++++++++++
 .../gcc.target/riscv/cpymem-32-ooo.c          | 20 +++++------
 .../gcc.target/riscv/cpymem-64-ooo.c          | 33 +++++++------------
 3 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 44945d47fd6..793ec3155b9 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -286,6 +286,7 @@ struct riscv_tune_param
   unsigned short memory_cost;
   unsigned short fmv_cost;
   bool slow_unaligned_access;
+  bool overlap_op_by_pieces;
   bool use_divmod_expansion;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
@@ -425,6 +426,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   5,                                           /* memory_cost */
   8,                                           /* fmv_cost */
   true,                                                /* 
slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
@@ -442,6 +444,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   3,                                           /* memory_cost */
   8,                                           /* fmv_cost */
   true,                                                /* 
slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
@@ -459,6 +462,7 @@ static const struct riscv_tune_param sifive_p400_tune_info 
= {
   3,                                           /* memory_cost */
   4,                                           /* fmv_cost */
   true,                                                /* 
slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
@@ -476,6 +480,7 @@ static const struct riscv_tune_param sifive_p600_tune_info 
= {
   3,                                           /* memory_cost */
   4,                                           /* fmv_cost */
   true,                                                /* 
slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
@@ -493,6 +498,7 @@ static const struct riscv_tune_param thead_c906_tune_info = 
{
   5,            /* memory_cost */
   8,           /* fmv_cost */
   false,            /* slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,       /* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
@@ -510,6 +516,7 @@ static const struct riscv_tune_param 
xiangshan_nanhu_tune_info = {
   3,                                           /* memory_cost */
   3,                                           /* fmv_cost */
   true,                                                /* 
slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
   NULL,                                                /* vector cost */
@@ -527,6 +534,7 @@ static const struct riscv_tune_param generic_ooo_tune_info 
= {
   4,                                           /* memory_cost */
   4,                                           /* fmv_cost */
   false,                                       /* slow_unaligned_access */
+  true,                                                /* overlap_op_by_pieces 
*/
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,                                /* vector cost */
@@ -544,6 +552,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   2,                                           /* memory_cost */
   8,                                           /* fmv_cost */
   false,                                       /* slow_unaligned_access */
+  false,                                       /* overlap_op_by_pieces */
   false,                                       /* use_divmod_expansion */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,                                                /* vector cost */
@@ -9923,6 +9932,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int)
   return riscv_slow_unaligned_access_p;
 }
 
+/* Implement TARGET_OVERLAP_OP_BY_PIECES_P.  */
+
+static bool
+riscv_overlap_op_by_pieces (void)
+{
+  return tune_param->overlap_op_by_pieces;
+}
+
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
 
 static bool
@@ -11340,6 +11357,9 @@ riscv_get_raw_result_mode (int regno)
 #undef TARGET_SLOW_UNALIGNED_ACCESS
 #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access
 
+#undef TARGET_OVERLAP_OP_BY_PIECES_P
+#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces
+
 #undef TARGET_SECONDARY_MEMORY_NEEDED
 #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed
 
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
index 946a773f77a..947d58c30fa 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -24,9 +24,8 @@ void copy_aligned_##N (void *to, void *from)          \
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_N(7)
@@ -36,9 +35,8 @@ COPY_N(7)
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(7)
@@ -66,11 +64,10 @@ COPY_ALIGNED_N(8)
 **    ...
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sw\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_N(11)
@@ -79,11 +76,10 @@ COPY_N(11)
 **copy_aligned_11:
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sw\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(11)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
index 08a927b9483..108748690cd 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
@@ -24,9 +24,8 @@ void copy_aligned_##N (void *to, void *from)          \
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_N(7)
@@ -36,9 +35,8 @@ COPY_N(7)
 **    ...
 **    lw\t[at][0-9],0\([at][0-9]\)
 **    sw\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    sb\t[at][0-9],6\([at][0-9]\)
+**    lw\t[at][0-9],3\([at][0-9]\)
+**    sw\t[at][0-9],3\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(7)
@@ -66,9 +64,8 @@ COPY_ALIGNED_N(8)
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_N(11)
@@ -77,11 +74,9 @@ COPY_N(11)
 **copy_aligned_11:
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    sb\t[at][0-9],10\([at][0-9]\)
+**    lw\t[at][0-9],7\([at][0-9]\)
+**    sw\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(11)
@@ -90,11 +85,9 @@ COPY_ALIGNED_N(11)
 **copy_15:
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],14\([at][0-9]\)
-**    sb\t[at][0-9],14\([at][0-9]\)
+**    ld\t[at][0-9],7\([at][0-9]\)
+**    sd\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_N(15)
@@ -103,11 +96,9 @@ COPY_N(15)
 **copy_aligned_15:
 **    ...
 **    ld\t[at][0-9],0\([at][0-9]\)
-**    ...
 **    sd\t[at][0-9],0\([at][0-9]\)
-**    ...
-**    lbu\t[at][0-9],14\([at][0-9]\)
-**    sb\t[at][0-9],14\([at][0-9]\)
+**    ld\t[at][0-9],7\([at][0-9]\)
+**    sd\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(15)
-- 
2.44.0

Reply via email to