While moves of constants into registers are separately
optimizable, a combination of a move with a subsequent "and"
is slightly preferable even if the move can be generated
with the same number (and timing) of insns, as moves of
"just" registers are eliminated now and then in different
passes, loosely speaking.  This movandsplit1 pattern feeds
into the opsplit1/AND peephole2, with matching occurrences
observed in the floating point functions in libgcc.  Also, a
test-case to fit.  Coremark improvements are unimpressive:
less than 0.0003% speed, 0.1% size.

But that was pre-LRA; after the switch to LRA this peephole2
doesn't match anymore (for any of coremark, local tests,
libgcc and newlib libc) and the test-case passes with and
without the patch.  Still, there's no apparent reason why
LRA prefers "move R1,R2" "and I,R2" to "move I,R1" "and
R1,R2", or why that wouldn't "randomly" change (also seen
with other operations than "and").  Thus committed.

gcc:
        * config/cris/cris.md (movandsplit1): New define_peephole2.

gcc/testsuite:
        * gcc.target/cris/peep2-movandsplit1.c: New test.
---
 gcc/config/cris/cris.md                       | 38 +++++++++++++++++++
 .../gcc.target/cris/peep2-movandsplit1.c      | 17 +++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/cris/peep2-movandsplit1.c

diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index d5aadf752e86..53fc2f2de4af 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -2944,6 +2944,44 @@ (define_peephole2 ; andqu
   operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode));
 })
 
+;; Somewhat similar to andqu, but a different range and expansion,
+;; intended to feed the output into opsplit1 with AND:
+;;  move.d 0x7ffff,$r10
+;;  and.d $r11,$r10
+;; into:
+;;  move.d $r11,$r10
+;;  and.d 0x7ffff,$r10
+;; which opsplit1/AND will change into:
+;;  move.d $r11,$r10 (unaffected by opsplit1/AND; shown only for context)
+;;  lslq 13,$r10
+;;  lsrq 13,$r10
+;; thereby winning in space, but in time only if the 0x7ffff happened to
+;; be unaligned in the code.
+(define_peephole2 ; movandsplit1
+  [(parallel
+    [(set (match_operand 0 "register_operand")
+         (match_operand 1 "const_int_operand"))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_operand 2 "register_operand")
+         (and (match_operand 3 "register_operand")
+              (match_operand 4 "register_operand")))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && cris_splittable_constant_p (INTVAL (operands[1]), AND,
+                                 GET_MODE (operands[2]),
+                                 optimize_function_for_speed_p (cfun))"
+  [(parallel
+    [(set (match_dup 2) (match_dup 4))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_dup 2) (match_dup 5))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+{
+  operands[5] = gen_rtx_AND (GET_MODE (operands[2]), operands[2], operands[1]);
+})
+
 ;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
 ;; Testcase: gcc.target/cris/peep2-andsplit1.c
 (define_peephole2 ; opsplit1
diff --git a/gcc/testsuite/gcc.target/cris/peep2-movandsplit1.c 
b/gcc/testsuite/gcc.target/cris/peep2-movandsplit1.c
new file mode 100644
index 000000000000..e4a860d966e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/cris/peep2-movandsplit1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-final { scan-assembler-times "lsrq " 2 } } */
+/* { dg-final { scan-assembler-times "lslq " 2 } } */
+/* { dg-final { scan-assembler-times "move.d \\\$r11,\\\$r10" 2 } } */
+/* { dg-final { scan-assembler-times "\tmov" 2 } } */
+/* { dg-final { scan-assembler-not "\tand" } } */
+/* { dg-options "-O2" } */
+
+unsigned int xmovandr (unsigned int y, unsigned int x)
+{
+  return x & 0x7ffff;
+}
+
+unsigned int xmovandl (unsigned int y, unsigned int x)
+{
+  return x & 0xfffe0000;
+}
-- 
2.30.2

Reply via email to