Unfortunately, doesn't cause a performance improvement for coremark,
but happens a few times in newlib, just enough to affect coremark
0.01% by size (or 4 bytes, and three cycles (__fwalk_sglue and
__vfiprintf_r each two bytes).

gcc:
        * config/cris/cris.md (splitop): Add PLUS.
        * config/cris/cris.cc (cris_split_constant): Also handle
        PLUS when a split into two insns may be useful.

gcc/testsuite:
        * gcc.target/cris/peep2-addsplit1.c: New test.
---
 gcc/config/cris/cris.cc                       | 25 +++++++-
 gcc/config/cris/cris.md                       |  6 +-
 .../gcc.target/cris/peep2-addsplit1.c         | 59 +++++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/cris/peep2-addsplit1.c

diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 331f5908a538..561ca1b3fa92 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -2642,7 +2642,30 @@ cris_split_constant (HOST_WIDE_INT wval, enum rtx_code 
code,
   int32_t ival = (int32_t) wval;
   uint32_t uval = (uint32_t) wval;
 
-  if (code != AND || IN_RANGE(ival, -32, 31)
+  /* Can we do with two addq or two subq, improving chances of filling a
+     delay-slot?  At worst, we break even, both performance and
+     size-wise.  */
+  if (code == PLUS
+      && (IN_RANGE (ival, -63 * 2, -63 - 1)
+         || IN_RANGE (ival, 63 + 1, 63 * 2)))
+    {
+      if (generate)
+       {
+         int sign = ival < 0 ? -1 : 1;
+         int aval = abs (ival);
+
+         if (mode != SImode)
+           {
+             dest = gen_rtx_REG (SImode, REGNO (dest));
+             op = gen_rtx_REG (SImode, REGNO (op));
+           }
+         emit_insn (gen_addsi3 (dest, op, GEN_INT (63 * sign)));
+         emit_insn (gen_addsi3 (dest, op, GEN_INT ((aval - 63) * sign)));
+       }
+      return 2;
+    }
+
+  if (code != AND || IN_RANGE (ival, -32, 31)
       /* Implemented using movu.[bw] elsewhere.  */
       || ival == 255 || ival == 65535
       /* Implemented using clear.[bw] elsewhere.  */
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 53fc2f2de4af..243d47748b78 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -209,7 +209,7 @@ (define_code_iterator plusminusumin [plus minus umin])
 (define_code_iterator plusumin [plus umin])
 
 ;; For opsplit1.
-(define_code_iterator splitop [and])
+(define_code_iterator splitop [and plus])
 
 ;; The addsubbo and nd code-attributes form a hack.  We need to output
 ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd
@@ -2984,6 +2984,10 @@ (define_peephole2 ; movandsplit1
 
 ;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
 ;; Testcase: gcc.target/cris/peep2-andsplit1.c
+;; 
+;; Another case is add<ext> N,rx with -126..-64,64..126: it has the same
+;; size and execution time as two addq or subq, but addq and subq can fill
+;; a delay-slot.
 (define_peephole2 ; opsplit1
   [(parallel
     [(set (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c 
b/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c
new file mode 100644
index 000000000000..7dff1d8c77c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/peep2-addsplit1.c
@@ -0,0 +1,52 @@
+/* Check that "opsplit1" with PLUS does its job.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-leading-underscore" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+int addsi (int x)
+{
+  return x + 64;
+}
+
+char addqi (char x)
+{
+  return x + 126;
+}
+
+short addhi (short x)
+{
+  return x - 64;
+}
+
+unsigned short addhi2 (short x)
+{
+  return x - 126;
+}
+
+/*
+** addsi:
+**     addq 63,.r10
+**     ret
+**     addq 1,.r10
+*/
+
+/*
+** addqi:
+**     addq 63,.r10
+**     ret
+**     addq 63,.r10
+*/
+
+/*
+** addhi:
+**     subq 63,.r10
+**     ret
+**     subq 1,.r10
+*/
+
+/*
+** addhi2:
+**     subq 63,.r10
+**     ret
+**     subq 63,.r10
+*/
-- 
2.30.2

Reply via email to