As mentioned in PR65768, ARM gcc generates suboptimal code for constant
Uses in loop. Part of the reason is that ARM back-end is splitting
constants during expansion of RTL, making it hard for the RTL
optimization passes to optimize it. Zhenqiang posted a patch at
https://gcc.gnu.org/ml/gcc-patches/2014-08/msg00325.html to fix this

As mentioned in PR65768, I tried with few more test-cases and enhanced
it. Regression tested on arm-none-linux-gnu and no new regressions. Is
this OK for trunk?

Thanks,
Kugan


gcc/ChangeLog:

2015-04-15  Kugan Vivekanandarajah  <kug...@linaro.org>
            Zhenqiang Chen  <zhenqiang.c...@linaro.org>

        PR target/65768
        * config/arm/arm-protos.h (const_ok_for_split): New definition.
        * config/arm/arm.c (const_ok_for_split): New function.
        * config/arm/arm.md (subsi3, andsi3, iorsi3, xorsi3, movsi): Keep some
         large constants in register instead of splitting them.

gcc/testsuite/ChangeLog:

2015-04-15  Kugan Vivekanandarajah  <kug...@linaro.org>
            Zhenqiang Chen  <zhenqiang.c...@linaro.org>

        PR target/65768
        * gcc.target/arm/maskdata.c: New test.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 16eb854..1b131a9 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -58,6 +58,7 @@ extern bool arm_modes_tieable_p (machine_mode, machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
+extern int const_ok_for_split (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
                               HOST_WIDE_INT, rtx, rtx, int);
 extern int legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8fd1388..0c13666 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3745,6 +3745,41 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code 
code)
     }
 }
 
+/* Return true if I is a valid constant for split with the operation CODE.
+   The condition should align with the constrain of the corresponding
+   define_insn_and_split pattern to make sure later pass can optimize
+   the constants.  */
+int
+const_ok_for_split (HOST_WIDE_INT i, enum rtx_code code)
+{
+  if (optimize < 2
+      || !can_create_pseudo_p ()
+      || const_ok_for_arm (i)
+        /* Since expand pass always uses "sign-extend" to get the value
+           (trunc_int_for_mode called from immed_wide_int_const) for rtl,
+           and logs show most negative values are UNSIGNED when they are
+           TREE node. And combine pass is smart enough to recover the
+           negative value to positive value.  */
+      || ((i < 0) && const_ok_for_arm (-i)))
+    return 1;
+
+  switch (code)
+    {
+    case AND:
+      /* zero_extendhi instruction is efficient.  */
+      return const_ok_for_arm (~i) || (i == 0xffff);
+
+    case IOR:
+      return TARGET_THUMB2 && const_ok_for_arm (~i);
+
+    case SET:
+      return const_ok_for_arm (i) || const_ok_for_arm (~i);
+
+    default:
+      return 1;
+    }
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 164ac13..a169775 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1164,10 +1164,16 @@
     {
       if (TARGET_32BIT)
         {
-          arm_split_constant (MINUS, SImode, NULL_RTX,
-                             INTVAL (operands[1]), operands[0],
-                             operands[2], optimize && can_create_pseudo_p ());
-          DONE;
+         if (!const_ok_for_split (INTVAL (operands[1]), MINUS))
+           operands[1] = force_reg (SImode, operands[1]);
+         else
+           {
+             arm_split_constant (MINUS, SImode, NULL_RTX,
+                                 INTVAL (operands[1]), operands[0],
+                                 operands[2],
+                                 optimize && can_create_pseudo_p ());
+             DONE;
+           }
        }
       else /* TARGET_THUMB1 */
         operands[1] = force_reg (SImode, operands[1]);
@@ -2078,14 +2084,19 @@
              operands[1] = convert_to_mode (QImode, operands[1], 1);
              emit_insn (gen_thumb2_zero_extendqisi2_v6 (operands[0],
                                                         operands[1]));
+             DONE;
            }
+         else if (!const_ok_for_split (INTVAL (operands[2]), AND))
+           operands[2] = force_reg (SImode, operands[2]);
          else
-           arm_split_constant (AND, SImode, NULL_RTX,
-                               INTVAL (operands[2]), operands[0],
-                               operands[1],
-                               optimize && can_create_pseudo_p ());
+           {
+             arm_split_constant (AND, SImode, NULL_RTX,
+                                 INTVAL (operands[2]), operands[0],
+                                 operands[1],
+                                 optimize && can_create_pseudo_p ());
 
-          DONE;
+             DONE;
+           }
         }
     }
   else /* TARGET_THUMB1 */
@@ -2884,10 +2895,16 @@
     {
       if (TARGET_32BIT)
         {
-          arm_split_constant (IOR, SImode, NULL_RTX,
-                             INTVAL (operands[2]), operands[0], operands[1],
-                             optimize && can_create_pseudo_p ());
-          DONE;
+         if (!const_ok_for_split (INTVAL (operands[2]), IOR))
+           operands[2] = force_reg (SImode, operands[2]);
+         else
+           {
+             arm_split_constant (IOR, SImode, NULL_RTX,
+                                 INTVAL (operands[2]), operands[0],
+                                 operands[1],
+                                 optimize && can_create_pseudo_p ());
+             DONE;
+           }
        }
       else /* TARGET_THUMB1 */
         {
@@ -3054,10 +3071,16 @@
     {
       if (TARGET_32BIT)
         {
-          arm_split_constant (XOR, SImode, NULL_RTX,
-                             INTVAL (operands[2]), operands[0], operands[1],
-                             optimize && can_create_pseudo_p ());
-          DONE;
+         if (!const_ok_for_split (INTVAL (operands[2]), XOR))
+           operands[2] = force_reg (SImode, operands[2]);
+         else
+           {
+             arm_split_constant (XOR, SImode, NULL_RTX,
+                                 INTVAL (operands[2]), operands[0],
+                                 operands[1],
+                                 optimize && can_create_pseudo_p ());
+             DONE;
+           }
        }
       else /* TARGET_THUMB1 */
         {
@@ -5554,10 +5577,18 @@
           && !(const_ok_for_arm (INTVAL (operands[1]))
                || const_ok_for_arm (~INTVAL (operands[1]))))
         {
-           arm_split_constant (SET, SImode, NULL_RTX,
-                              INTVAL (operands[1]), operands[0], NULL_RTX,
-                              optimize && can_create_pseudo_p ());
-          DONE;
+          if (!const_ok_for_split (INTVAL (operands[1]), SET))
+            {
+               emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+               DONE;
+            }
+         else
+            {
+               arm_split_constant (SET, SImode, NULL_RTX,
+                                   INTVAL (operands[1]), operands[0], NULL_RTX,
+                                   optimize && can_create_pseudo_p ());
+               DONE;
+            }
         }
     }
   else /* TARGET_THUMB1...  */
diff --git a/gcc/testsuite/gcc.target/arm/maskdata.c 
b/gcc/testsuite/gcc.target/arm/maskdata.c
index e69de29..6d6bb39 100644
--- a/gcc/testsuite/gcc.target/arm/maskdata.c
+++ b/gcc/testsuite/gcc.target/arm/maskdata.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options " -O2 -fno-gcse " }  */
+/* { dg-require-effective-target arm_thumb2_ok } */
+
+#define MASK 0xff00ff
+void maskdata (int * data, int len)
+{
+   int i = len;
+   for (; i > 0; i -= 2)
+    {
+      data[i] &= MASK;
+      data[i + 1] &= MASK;
+    }
+}
+/* { dg-final { scan-assembler-not "65280" } } */

Reply via email to