The 64-bit register-to-register moves on PRU are implemented with two
instructions moving 32-bit registers.  Defining a split for the 64-bit
moves allows this to be described in RTL, and thus one of the 32-bit
moves to be eliminated if the destination register is dead.

Also, split the loading of non-trivial 64-bit integer constants.  The
resulting 32-bit integer constants have better chance to be loaded with
something more optimal than an "ldi32".

For now do the splits only after register allocation, because LRA does
not yet efficiently handle subregs.  See
https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html

This patch shows slight improvement for wikisort benchmark from
embench-iot:

Benchmark          size-before  size-after  difference
---------          -----------  ----------  ----------
aha-mont64          1,648       1,648       0
crc32                 104       104         0
depthconv           1,172       1,172       0
edn                 3,040       3,040       0
huffbench           1,616       1,616       0
matmult-int           748       748         0
md5sum                700       700         0
nettle-aes          2,664       2,664       0
nettle-sha256       5,732       5,732       0
nsichneu           21,372       21,372      0
picojpeg            9,716       9,716       0
qrduino             8,556       8,556       0
sglib-combined      3,724       3,724       0
slre                3,488       3,488       0
statemate           1,132       1,132       0
tarfind               652       652         0
ud                  1,004       1,004       0
wikisort           18,120       18,092      -28
xgboost               300       300         0

gcc/ChangeLog:

        * config/pru/pru.md (reg move splitter): New splitter for 64-bit
        register moves into two 32-bit moves.
        (const_int move splitter): New splitter for 64-bit constant
        integer moves into two 32-bit moves.

gcc/testsuite/ChangeLog:

        * gcc.target/pru/mov64-subreg-1.c: New test.
        * gcc.target/pru/mov64-subreg-2.c: New test.

Signed-off-by: Dimitar Dimitrov <dimi...@dinux.eu>
---
 gcc/config/pru/pru.md                         | 77 +++++++++++++++++++
 gcc/testsuite/gcc.target/pru/mov64-subreg-1.c |  9 +++
 gcc/testsuite/gcc.target/pru/mov64-subreg-2.c |  8 ++
 3 files changed, 94 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/pru/mov64-subreg-1.c
 create mode 100644 gcc/testsuite/gcc.target/pru/mov64-subreg-2.c

diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index fcd310613f5..3504e42e900 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -283,6 +283,83 @@ (define_insn "mov<mode>"
   [(set_attr "type" "st,ld,alu,alu,alu,alu,alu,alu")
    (set_attr "length" "4,4,4,4,8,8,8,16")])
 
+; Break 64-bit register-to-register moves into 32-bit moves.
+; If only a subreg of the destination is used, this split would allow
+; for the other 32-bit subreg of the DI register to be eliminated.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (match_operand:DI 1 "register_operand"))]
+  "
+   /* TODO - LRA does not yet handle subregs efficiently.
+      So it is profitable to split only after register allocation is
+      complete.
+      Once https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html
+      is merged, this condition should be removed to allow splitting
+      before LRA.  */
+   reload_completed
+   /* Sign-extended paradoxical registers require expansion
+      of the proper pattern.  We can do only zero extension here.  */
+   && (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1])
+       ? SUBREG_PROMOTED_VAR_P (operands[1])
+         && SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0
+       : true)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1]))
+    {
+      gcc_assert (SUBREG_PROMOTED_VAR_P (operands[1]));
+      gcc_assert (SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0);
+
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = const0_rtx;
+    }
+  else if (!reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = src_hi;
+    }
+  else
+    {
+      operands[0] = dst_hi;
+      operands[1] = src_hi;
+      operands[2] = dst_lo;
+      operands[3] = src_lo;
+    }
+  "
+)
+
+; Break loading of non-trivial 64-bit constant integers.  The split
+; will not generate better code sequence, but at least would allow
+; dropping a non-live 32-bit part of the destination, or better
+; constant propagation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+       (match_operand:DI 1 "const_int_operand"))]
+  "reload_completed
+   && !satisfies_constraint_Z (operands[1])
+   && !satisfies_constraint_Um (operands[1])
+   && !satisfies_constraint_T (operands[1])"
+
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  operands[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);;
+  operands[0] = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  "
+)
+
 ;
 ; load_multiple pattern(s).
 ;
diff --git a/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c 
b/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c
new file mode 100644
index 00000000000..9b60aa033f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/mov64-subreg-1.c
@@ -0,0 +1,9 @@
+/* { dg-do assemble } */
+/* { dg-options "-Os" } */
+/* { dg-final { object-size text == 8 } } */
+
+
+unsigned test(char a, unsigned long long b)
+{
+        return b;
+}
diff --git a/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c 
b/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c
new file mode 100644
index 00000000000..146cf945608
--- /dev/null
+++ b/gcc/testsuite/gcc.target/pru/mov64-subreg-2.c
@@ -0,0 +1,8 @@
+/* { dg-do assemble } */
+/* { dg-options "-Os" } */
+/* { dg-final { object-size text == 12 } } */
+
+unsigned long long test(void)
+{
+       return 0xffffffff00000000UL;
+}
-- 
2.49.0

Reply via email to