This patch adds the field overlap_op_by_pieces to the struct riscv_tune_param, which is used by the TARGET_OVERLAP_OP_BY_PIECES_P() hook. This hook is used by the by-pieces infrastructure to decide if overlapping memory accesses should be emitted.
The new property is set to false in all tune structs except for generic-ooo. The changes in the expansion can be seen in the adjustments of the cpymem test cases. These tests also reveal a limitation in the RISC-V cpymem expansion that prevents this optimization as only by-pieces cpymem expansions emit overlapping memory accesses. gcc/ChangeLog: * config/riscv/riscv.cc (struct riscv_tune_param): New field overlap_op_by_pieces. (riscv_overlap_op_by_pieces): New function. (TARGET_OVERLAP_OP_BY_PIECES_P): Connect to riscv_overlap_op_by_pieces. gcc/testsuite/ChangeLog: * gcc.target/riscv/cpymem-32-ooo.c: Adjust for overlapping access. * gcc.target/riscv/cpymem-64-ooo.c: Likewise. Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu> --- gcc/config/riscv/riscv.cc | 20 +++++++++++ .../gcc.target/riscv/cpymem-32-ooo.c | 20 +++++------ .../gcc.target/riscv/cpymem-64-ooo.c | 33 +++++++------------ 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 44945d47fd6..793ec3155b9 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -286,6 +286,7 @@ struct riscv_tune_param unsigned short memory_cost; unsigned short fmv_cost; bool slow_unaligned_access; + bool overlap_op_by_pieces; bool use_divmod_expansion; unsigned int fusible_ops; const struct cpu_vector_cost *vec_costs; @@ -425,6 +426,7 @@ static const struct riscv_tune_param rocket_tune_info = { 5, /* memory_cost */ 8, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ @@ -442,6 +444,7 @@ static const struct riscv_tune_param sifive_7_tune_info = { 3, /* memory_cost */ 8, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ @@ -459,6 +462,7 @@ static const struct riscv_tune_param sifive_p400_tune_info = { 3, /* memory_cost */ 4, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ &generic_vector_cost, /* vector cost */ @@ -476,6 +480,7 @@ static const struct riscv_tune_param sifive_p600_tune_info = { 3, /* memory_cost */ 4, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ &generic_vector_cost, /* vector cost */ @@ -493,6 +498,7 @@ static const struct riscv_tune_param thead_c906_tune_info = { 5, /* memory_cost */ 8, /* fmv_cost */ false, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ @@ -510,6 +516,7 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = { 3, /* memory_cost */ 3, /* fmv_cost */ true, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH, /* fusible_ops */ NULL, /* vector cost */ @@ -527,6 +534,7 @@ static const struct riscv_tune_param generic_ooo_tune_info = { 4, /* memory_cost */ 4, /* fmv_cost */ false, /* slow_unaligned_access */ + true, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ &generic_vector_cost, /* vector cost */ @@ -544,6 +552,7 @@ static const struct riscv_tune_param optimize_size_tune_info = { 2, /* memory_cost */ 8, /* fmv_cost */ false, /* slow_unaligned_access */ + false, /* overlap_op_by_pieces */ false, /* use_divmod_expansion */ RISCV_FUSE_NOTHING, /* fusible_ops */ NULL, /* vector cost */ @@ -9923,6 +9932,14 @@ riscv_slow_unaligned_access (machine_mode, unsigned int) return riscv_slow_unaligned_access_p; } +/* Implement TARGET_OVERLAP_OP_BY_PIECES_P. */ + +static bool +riscv_overlap_op_by_pieces (void) +{ + return tune_param->overlap_op_by_pieces; +} + /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ static bool @@ -11340,6 +11357,9 @@ riscv_get_raw_result_mode (int regno) #undef TARGET_SLOW_UNALIGNED_ACCESS #define TARGET_SLOW_UNALIGNED_ACCESS riscv_slow_unaligned_access +#undef TARGET_OVERLAP_OP_BY_PIECES_P +#define TARGET_OVERLAP_OP_BY_PIECES_P riscv_overlap_op_by_pieces + #undef TARGET_SECONDARY_MEMORY_NEEDED #define TARGET_SECONDARY_MEMORY_NEEDED riscv_secondary_memory_needed diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c index 946a773f77a..947d58c30fa 100644 --- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c +++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c @@ -24,9 +24,8 @@ void copy_aligned_##N (void *to, void *from) \ ** ... ** lw\t[at][0-9],0\([at][0-9]\) ** sw\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],6\([at][0-9]\) -** sb\t[at][0-9],6\([at][0-9]\) +** lw\t[at][0-9],3\([at][0-9]\) +** sw\t[at][0-9],3\([at][0-9]\) ** ... */ COPY_N(7) @@ -36,9 +35,8 @@ COPY_N(7) ** ... ** lw\t[at][0-9],0\([at][0-9]\) ** sw\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],6\([at][0-9]\) -** sb\t[at][0-9],6\([at][0-9]\) +** lw\t[at][0-9],3\([at][0-9]\) +** sw\t[at][0-9],3\([at][0-9]\) ** ... */ COPY_ALIGNED_N(7) @@ -66,11 +64,10 @@ COPY_ALIGNED_N(8) ** ... ** ... ** lw\t[at][0-9],0\([at][0-9]\) -** ... ** sw\t[at][0-9],0\([at][0-9]\) ** ... -** lbu\t[at][0-9],10\([at][0-9]\) -** sb\t[at][0-9],10\([at][0-9]\) +** lw\t[at][0-9],7\([at][0-9]\) +** sw\t[at][0-9],7\([at][0-9]\) ** ... */ COPY_N(11) @@ -79,11 +76,10 @@ COPY_N(11) **copy_aligned_11: ** ... ** lw\t[at][0-9],0\([at][0-9]\) -** ... ** sw\t[at][0-9],0\([at][0-9]\) ** ... -** lbu\t[at][0-9],10\([at][0-9]\) -** sb\t[at][0-9],10\([at][0-9]\) +** lw\t[at][0-9],7\([at][0-9]\) +** sw\t[at][0-9],7\([at][0-9]\) ** ... */ COPY_ALIGNED_N(11) diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c index 08a927b9483..108748690cd 100644 --- a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c +++ b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c @@ -24,9 +24,8 @@ void copy_aligned_##N (void *to, void *from) \ ** ... ** lw\t[at][0-9],0\([at][0-9]\) ** sw\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],6\([at][0-9]\) -** sb\t[at][0-9],6\([at][0-9]\) +** lw\t[at][0-9],3\([at][0-9]\) +** sw\t[at][0-9],3\([at][0-9]\) ** ... */ COPY_N(7) @@ -36,9 +35,8 @@ COPY_N(7) ** ... ** lw\t[at][0-9],0\([at][0-9]\) ** sw\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],6\([at][0-9]\) -** sb\t[at][0-9],6\([at][0-9]\) +** lw\t[at][0-9],3\([at][0-9]\) +** sw\t[at][0-9],3\([at][0-9]\) ** ... */ COPY_ALIGNED_N(7) @@ -66,9 +64,8 @@ COPY_ALIGNED_N(8) ** ... ** ld\t[at][0-9],0\([at][0-9]\) ** sd\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],10\([at][0-9]\) -** sb\t[at][0-9],10\([at][0-9]\) +** lw\t[at][0-9],7\([at][0-9]\) +** sw\t[at][0-9],7\([at][0-9]\) ** ... */ COPY_N(11) @@ -77,11 +74,9 @@ COPY_N(11) **copy_aligned_11: ** ... ** ld\t[at][0-9],0\([at][0-9]\) -** ... ** sd\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],10\([at][0-9]\) -** sb\t[at][0-9],10\([at][0-9]\) +** lw\t[at][0-9],7\([at][0-9]\) +** sw\t[at][0-9],7\([at][0-9]\) ** ... */ COPY_ALIGNED_N(11) @@ -90,11 +85,9 @@ COPY_ALIGNED_N(11) **copy_15: ** ... ** ld\t[at][0-9],0\([at][0-9]\) -** ... ** sd\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],14\([at][0-9]\) -** sb\t[at][0-9],14\([at][0-9]\) +** ld\t[at][0-9],7\([at][0-9]\) +** sd\t[at][0-9],7\([at][0-9]\) ** ... */ COPY_N(15) @@ -103,11 +96,9 @@ COPY_N(15) **copy_aligned_15: ** ... ** ld\t[at][0-9],0\([at][0-9]\) -** ... ** sd\t[at][0-9],0\([at][0-9]\) -** ... -** lbu\t[at][0-9],14\([at][0-9]\) -** sb\t[at][0-9],14\([at][0-9]\) +** ld\t[at][0-9],7\([at][0-9]\) +** sd\t[at][0-9],7\([at][0-9]\) ** ... */ COPY_ALIGNED_N(15) -- 2.44.0