New algorithmic optimisations:
((X inner_op C0) outer_op C1)
With X being a tree where value_range has reasoned certain bits to always be zero throughout its computed value range, we will call this the zero_mask,
   and with inner_op = {|,^}, outer_op = {|,^} and inner_op != outer_op.
   if (inner_op == '^') C0 &= ~C1;
   if ((C0 & ~zero_mask) == 0) then emit (X outer_op (C0 outer_op C1)
   if ((C1 & ~zero_mask) == 0) then emit (X inner_op (C0 outer_op C1)

(X {&,^,|} C2) << C1 -> (X << C1) {&,^,|} (C2 << C1)
(X {&,^,|} C2) >> C1 -> (X >> C1) & (C2 >> C1)

The second and third transformations enable more applications of the first. Also some targets may benefit from delaying shift operations. I am aware that such an optimization, in combination with one or more optimizations that cause the reverse transformation, may lead to an infinite loop. Though such behavior has not been detected during regression testing and bootstrapping on aarch64.

This patch backports the optimization from trunk to the embedded-5-branch.

The original patches are at:
https://gcc.gnu.org/ml/gcc-patches/2015-08/msg01493.html (for the first optimization and changes to second and third) https://gcc.gnu.org/ml/gcc-patches/2015-07/msg00517.html (the addition of the original second and third optimizations)

gcc/ChangeLog:

2015-11-27  Andre Vieira  <andre.simoesdiasvie...@arm.com>
Backport from mainline:

  2015-10-09  Andre Vieira  <andre.simoesdiasvie...@arm.com>

        * match.pd: ((X inner_op C0) outer_op C1) New pattern.
        ((X & C2) << C1): Expand to...
        (X {&,^,|} C2 << C1): ...This.
        ((X & C2) >> C1): Expand to...
        (X {&,^,|} C2 >> C1): ...This.
  2015-07-07  Richard Biener  <rguent...@suse.de>

        * fold-const.c (fold_binary_loc): Move
        (X & C2) << C1 -> (X << C1) & (C2 << C1) simplification ...
        * match.pd: ... here.


gcc/testsuite/ChangeLog:

2015-11-27  Andre Vieira  <andre.simoesdiasvie...@arm.com>
Backport from mainline:

  2015-10-09  Andre Vieira  <andre.simoesdiasvie...@arm.com>
            Hale Wang  <hale.w...@arm.com>

        * gcc.dg/tree-ssa/forwprop-33.c: New.
From ac76659583ab0d9eb96552a540a22e66fdd430f7 Mon Sep 17 00:00:00 2001
From: Andre Simoes Dias Vieira <andsi...@arm.com>
Date: Fri, 23 Oct 2015 15:35:27 +0100
Subject: [PATCH 1/2] backport algorithmic optimization

---
 gcc/fold-const.c                            | 21 ---------
 gcc/match.pd                                | 54 ++++++++++++++++++++++
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c | 71 +++++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c

diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 6d085b185895f715d99a53a9e706295aa983d52a..5e0477f974173627317a25e018c9a6e098b099fb 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -12080,27 +12080,6 @@ fold_binary_loc (location_t loc,
 			     prec) == 0)
 	return TREE_OPERAND (arg0, 0);
 
-      /* Fold (X & C2) << C1 into (X << C1) & (C2 << C1)
-	      (X & C2) >> C1 into (X >> C1) & (C2 >> C1)
-	 if the latter can be further optimized.  */
-      if ((code == LSHIFT_EXPR || code == RSHIFT_EXPR)
-	  && TREE_CODE (arg0) == BIT_AND_EXPR
-	  && TREE_CODE (arg1) == INTEGER_CST
-	  && TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST)
-	{
-	  tree mask = fold_build2_loc (loc, code, type,
-				   fold_convert_loc (loc, type,
-						     TREE_OPERAND (arg0, 1)),
-				   arg1);
-	  tree shift = fold_build2_loc (loc, code, type,
-				    fold_convert_loc (loc, type,
-						      TREE_OPERAND (arg0, 0)),
-				    arg1);
-	  tem = fold_binary_loc (loc, BIT_AND_EXPR, type, shift, mask);
-	  if (tem)
-	    return tem;
-	}
-
       return NULL_TREE;
 
     case MIN_EXPR:
diff --git a/gcc/match.pd b/gcc/match.pd
index e40720e130fe0302466969e39ad5803960b942da..bea0237ae3b335c73bbc71f6d3997b1d84a58986 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -389,6 +389,49 @@ along with GCC; see the file COPYING3.  If not see
 	&& (TREE_CODE (@4) != SSA_NAME || has_single_use (@4)))
    (bit_xor (bit_and (bit_xor @0 @1) @2) @0)))
 
+/* ((X inner_op C0) outer_op C1)
+   With X being a tree where value_range has reasoned certain bits to always be
+   zero throughout its computed value range,
+   inner_op = {|,^}, outer_op = {|,^} and inner_op != outer_op
+   where zero_mask has 1's for all bits that are sure to be 0 in
+   and 0's otherwise.
+   if (inner_op == '^') C0 &= ~C1;
+   if ((C0 & ~zero_mask) == 0) then emit (X outer_op (C0 outer_op C1)
+   if ((C1 & ~zero_mask) == 0) then emit (X inner_op (C0 outer_op C1)
+*/
+(for inner_op (bit_ior bit_xor)
+     outer_op (bit_xor bit_ior)
+(simplify
+ (outer_op
+  (inner_op@3 @2 INTEGER_CST@0) INTEGER_CST@1)
+ (with
+  {
+    bool fail = false;
+    wide_int zero_mask_not;
+    wide_int C0;
+    wide_int cst_emit;
+
+    if (TREE_CODE (@2) == SSA_NAME &&
+	(TREE_CODE (@3) != SSA_NAME || has_single_use (@3)))
+      zero_mask_not = get_nonzero_bits (@2);
+    else
+      fail = true;
+
+    if (inner_op == BIT_XOR_EXPR)
+      {
+	C0 = wi::bit_and_not (@0, @1);
+	cst_emit = wi::bit_or (C0, @1);
+      }
+    else
+      {
+	C0 = @0;
+	cst_emit = wi::bit_xor (@0, @1);
+      }
+  }
+  (if (!fail && wi::bit_and (C0, zero_mask_not) == 0)
+   (outer_op @2 { wide_int_to_tree (type, cst_emit); }))
+  (if (!fail && wi::bit_and (@1, zero_mask_not) == 0)
+    (inner_op @2 { wide_int_to_tree (type, cst_emit); })))))
 
 /* Associate (p +p off1) +p off2 as (p +p (off1 + off2)).  */
 (simplify
@@ -614,6 +657,17 @@ along with GCC; see the file COPYING3.  If not see
   (cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
   (icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))
 
+/* Fold (X {&,^,|} C2) << C1 into (X << C1) {&,^,|} (C2 << C1)
+   (X {&,^,|} C2) >> C1 into (X >> C1) & (C2 >> C1).  */
+ (for shift (lshift rshift)
+ (for bit_op (bit_and bit_xor bit_ior)
+  (simplify
+   (shift (convert? (bit_op@4 @0 INTEGER_CST@2)) INTEGER_CST@1)
+   (with { bool fail = !(TREE_CODE (@4) != SSA_NAME || has_single_use (@4)); }
+    (if (!fail && tree_nop_conversion_p (type, TREE_TYPE (@0)))
+     (with { tree mask = int_const_binop (shift, fold_convert (type, @2), @1); }
+      (bit_op (shift (convert @0) @1) { mask; })))))))
+
 /* Simplifications of conversions.  */
 
 /* Basic strip-useless-type-conversions / strip_nops.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c
new file mode 100644
index 0000000000000000000000000000000000000000..c7124deee11f3655d7c50bd9605bb7caddba6470
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-33.c
@@ -0,0 +1,71 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop3" } */
+
+unsigned short
+test1 (unsigned short a)
+{
+  a ^= 0x4002;
+  a >>= 1;
+  a |= 0x8000; /* Simplify to ((a >> 1) ^ 0xa001).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\^ 40961" "forwprop3" } } */
+
+unsigned short
+test2 (unsigned short a)
+{
+  a |= 0x4002;
+  a <<= 1;
+  a ^= 0x0001; /* Simplify to ((a << 1) | 0x8005).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\| 32773" "forwprop3" } } */
+
+unsigned short
+test3 (unsigned short a)
+{
+  a &= 0xd123;
+  a ^= 0x6040;
+  a |= 0xc031; /* Simplify to ((a & 0xd123) | 0xe071).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\| 57457" "forwprop3" } } */
+
+unsigned short
+test4 (unsigned short a)
+{
+  a ^= 0x8002;
+  a >>= 1;
+  a |= 0x8000;
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\^ 49153" "forwprop3" } } */
+
+unsigned short
+test5 (unsigned short a)
+{
+  a ^= 0x8002;
+  a >>= 1;
+  a |= 0x8001; /* Only move shift inward: (((a >> 1) ^ 0x4001) | 0x8001).  */
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\^ 16385" "forwprop3" } } */
+/* { dg-final { scan-tree-dump "\\| 32769" "forwprop3" } } */
+
+short
+test6 (short a)
+{
+  a &= 0x7fff;
+  a >>= 2;
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\& 8191" "forwprop3" } } */
+
+short
+test7 (short a)
+{
+  a &= 0x8fff;
+  a >>= 2;
+  return a;
+}
+/* { dg-final { scan-tree-dump "\\& -7169" "forwprop3" } } */
-- 
1.9.1

Reply via email to