https://gcc.gnu.org/g:59e6343f99eb53da07bbd6198f083ce1bbdf20d8

commit 59e6343f99eb53da07bbd6198f083ce1bbdf20d8
Author: Christoph Müllner <christoph.muell...@vrull.eu>
Date:   Mon Apr 29 02:53:20 2024 +0200

    RISC-V: Allow by-pieces to do overlapping accesses in block_move_straight
    
    The current implementation of riscv_block_move_straight() emits a couple
    of loads/stores with with maximum width (e.g. 8-byte for RV64).
    The remainder is handed over to move_by_pieces().
    The by-pieces framework utilizes target hooks to decide about the emitted
    instructions (e.g. unaligned accesses or overlapping accesses).
    
    Since the current implementation will always request less than XLEN bytes
    to be handled by the by-pieces infrastructure, it is impossible that
    overlapping memory accesses can ever be emitted (the by-pieces code does
    not know of any previous instructions that were emitted by the backend).
    
    This patch changes the implementation of riscv_block_move_straight()
    such, that it utilizes the by-pieces framework if the remaining data
    is less than 2*XLEN bytes, which is sufficient to enable overlapping
    memory accesses (if the requirements for them are given).
    
    The changes in the expansion can be seen in the adjustments of the
    cpymem-NN-ooo test cases. The changes in the cpymem-NN tests are
    caused by the different instruction ordering of the code emitted
    by the by-pieces infrastructure, which emits alternating load/store
    sequences.
    
    gcc/ChangeLog:
    
            * config/riscv/riscv-string.cc (riscv_block_move_straight):
            Hand over up to 2xXLEN bytes to move_by_pieces().
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/cpymem-32-ooo.c: Adjustments for overlapping
            access.
            * gcc.target/riscv/cpymem-32.c: Adjustments for code emitted by
            by-pieces.
            * gcc.target/riscv/cpymem-64-ooo.c: Adjustments for overlapping
            access.
            * gcc.target/riscv/cpymem-64.c: Adjustments for code emitted by
            by-pieces.
    
    Signed-off-by: Christoph Müllner <christoph.muell...@vrull.eu>
    (cherry picked from commit ad22c607f3e17f2c6ca45699c1d88adaa618c23c)

Diff:
---
 gcc/config/riscv/riscv-string.cc               |  6 +++---
 gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c | 16 ++++++++--------
 gcc/testsuite/gcc.target/riscv/cpymem-32.c     | 10 ++++------
 gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c |  8 ++++----
 gcc/testsuite/gcc.target/riscv/cpymem-64.c     |  9 +++------
 5 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index b6cd70323563..96394844bbb6 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -637,18 +637,18 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned 
HOST_WIDE_INT length,
   delta = bits / BITS_PER_UNIT;
 
   /* Allocate a buffer for the temporary registers.  */
-  regs = XALLOCAVEC (rtx, length / delta);
+  regs = XALLOCAVEC (rtx, length / delta - 1);
 
   /* Load as many BITS-sized chunks as possible.  Use a normal load if
      the source has enough alignment, otherwise use left/right pairs.  */
-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+  for (offset = 0, i = 0; offset + 2 * delta <= length; offset += delta, i++)
     {
       regs[i] = gen_reg_rtx (mode);
       riscv_emit_move (regs[i], adjust_address (src, mode, offset));
     }
 
   /* Copy the chunks to the destination.  */
-  for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+  for (offset = 0, i = 0; offset + 2 * delta <= length; offset += delta, i++)
     riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
 
   /* Mop up any left-over bytes.  */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
index 947d58c30fa3..2a48567353a6 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32-ooo.c
@@ -91,8 +91,8 @@ COPY_ALIGNED_N(11)
 **    ...
 **    sw\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],14\([at][0-9]\)
-**    sb\t[at][0-9],14\([at][0-9]\)
+**    lw\t[at][0-9],11\([at][0-9]\)
+**    sw\t[at][0-9],11\([at][0-9]\)
 **    ...
 */
 COPY_N(15)
@@ -104,8 +104,8 @@ COPY_N(15)
 **    ...
 **    sw\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],14\([at][0-9]\)
-**    sb\t[at][0-9],14\([at][0-9]\)
+**    lw\t[at][0-9],11\([at][0-9]\)
+**    sw\t[at][0-9],11\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(15)
@@ -117,8 +117,8 @@ COPY_ALIGNED_N(15)
 **    ...
 **    sw\t[at][0-9],20\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],26\([at][0-9]\)
-**    sb\t[at][0-9],26\([at][0-9]\)
+**    lw\t[at][0-9],23\([at][0-9]\)
+**    sw\t[at][0-9],23\([at][0-9]\)
 **    ...
 */
 COPY_N(27)
@@ -130,8 +130,8 @@ COPY_N(27)
 **    ...
 **    sw\t[at][0-9],20\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],26\([at][0-9]\)
-**    sb\t[at][0-9],26\([at][0-9]\)
+**    lw\t[at][0-9],23\([at][0-9]\)
+**    sw\t[at][0-9],23\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(27)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-32.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-32.c
index 44ba14a1d51f..2030a39ca970 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-32.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-32.c
@@ -24,10 +24,10 @@ void copy_aligned_##N (void *to, void *from)                
\
 **    ...
 **    lbu\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    ...
 **    sb\t[at][0-9],0\([at][0-9]\)
 **    ...
+**    lbu\t[at][0-9],6\([at][0-9]\)
+**    ...
 **    sb\t[at][0-9],6\([at][0-9]\)
 **    ...
 */
@@ -50,10 +50,9 @@ COPY_ALIGNED_N(7)
 **    ...
 **    lbu\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],7\([at][0-9]\)
-**    ...
 **    sb\t[at][0-9],0\([at][0-9]\)
 **    ...
+**    lbu\t[at][0-9],7\([at][0-9]\)
 **    sb\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
@@ -73,10 +72,9 @@ COPY_ALIGNED_N(8)
 **    ...
 **    lbu\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    ...
 **    sb\t[at][0-9],0\([at][0-9]\)
 **    ...
+**    lbu\t[at][0-9],10\([at][0-9]\)
 **    sb\t[at][0-9],10\([at][0-9]\)
 **    ...
 */
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
index 108748690cd3..147324093cb1 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-64-ooo.c
@@ -110,8 +110,8 @@ COPY_ALIGNED_N(15)
 **    ...
 **    sd\t[at][0-9],16\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],26\([at][0-9]\)
-**    sb\t[at][0-9],26\([at][0-9]\)
+**    lw\t[at][0-9],23\([at][0-9]\)
+**    sw\t[at][0-9],23\([at][0-9]\)
 **    ...
 */
 COPY_N(27)
@@ -123,8 +123,8 @@ COPY_N(27)
 **    ...
 **    sd\t[at][0-9],16\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],26\([at][0-9]\)
-**    sb\t[at][0-9],26\([at][0-9]\)
+**    lw\t[at][0-9],23\([at][0-9]\)
+**    sw\t[at][0-9],23\([at][0-9]\)
 **    ...
 */
 COPY_ALIGNED_N(27)
diff --git a/gcc/testsuite/gcc.target/riscv/cpymem-64.c 
b/gcc/testsuite/gcc.target/riscv/cpymem-64.c
index bdfaca0d46a8..37b8ef0e0200 100644
--- a/gcc/testsuite/gcc.target/riscv/cpymem-64.c
+++ b/gcc/testsuite/gcc.target/riscv/cpymem-64.c
@@ -24,10 +24,9 @@ void copy_aligned_##N (void *to, void *from)         \
 **    ...
 **    lbu\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],6\([at][0-9]\)
-**    ...
 **    sb\t[at][0-9],0\([at][0-9]\)
 **    ...
+**    lbu\t[at][0-9],6\([at][0-9]\)
 **    sb\t[at][0-9],6\([at][0-9]\)
 **    ...
 */
@@ -50,10 +49,9 @@ COPY_ALIGNED_N(7)
 **    ...
 **    lbu\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],7\([at][0-9]\)
-**    ...
 **    sb\t[at][0-9],0\([at][0-9]\)
 **    ...
+**    lbu\t[at][0-9],7\([at][0-9]\)
 **    sb\t[at][0-9],7\([at][0-9]\)
 **    ...
 */
@@ -73,10 +71,9 @@ COPY_ALIGNED_N(8)
 **    ...
 **    lbu\t[at][0-9],0\([at][0-9]\)
 **    ...
-**    lbu\t[at][0-9],10\([at][0-9]\)
-**    ...
 **    sb\t[at][0-9],0\([at][0-9]\)
 **    ...
+**    lbu\t[at][0-9],10\([at][0-9]\)
 **    sb\t[at][0-9],10\([at][0-9]\)
 **    ...
 */

Reply via email to