On Thu, May 28, 2015 at 09:48:10PM +0200, Marc Glisse wrote:
> Side note: if we are looking for extra patterns to simplify, llvm has an
> almost unlimited supply. Here are a few we don't seem to have (there are
> more where those came from), of course several need constraining /
> generalizing, it is just a list of hints I wrote for myself.
> 
> (A|B) & ~(A&B) -> A^B
> (A | B) & ((~A) ^ B) -> (A & B)
> (A & (~B)) | (A ^ B) -> (A ^ B)
> ((B | C) & A) | B -> B | (A & C)
> A | ( A ^ B) -> A |  B
> A | (~A ^ B) -> A | ~B
> (A ^ B) & ((B ^ C) ^ A) -> (A ^ B) & ~C
> (A ^ B) | ((B ^ C) ^ A) -> (A ^ B) | C
> (A & B) | (A ^ B) -> (A | B)
> A | ~(A ^ B) -> A | ~B
> (A & B) | ((~A) ^ B) -> (~A ^ B)
> ~(~X & Y) -> (X | ~Y)
> ~(~X >>s Y) -> (X >>s Y)
> (A & B)^(A | B) -> A ^ B
> (A | ~B) ^ (~A | B) -> A ^ B
> (A & ~B) ^ (~A & B) -> A ^ B
> (A ^ C)^(A | B) -> ((~A) & B) ^ C
> (A & B) ^ (A ^ B) -> (A | B)
> (A & ~B) ^ (~A) -> ~(A & B)
> (A&B)+(A^B) -> A|B
> (A&B)+(A|B) -> A+B
> (A|B)-(A^B) -> A&B
> ((X | Y) - X) -> (~X & Y)
> fmax(x,NaN) -> x
> fmax(a,fmax(a,b)) -> fmax(a,b)
> (X+2) >u X -> x <u 256-2
> (1 << X) <  30 -> X <= 4
> ((X & ~7) == 0) -> X < 8
> 2 * X < 5 -> X <= 2
> ((1 << x)&8) == 0 -> x != 3
> ((1 << x)&7) == 0 -> x > 2
> Y - Z < X - Z -> Y < X
> 3 * X == 3 * Y -> X == Y
> A >> 3 == B >> 3 -> (A ^ B) < 8
> (float)int <= 4.4 -> int <= 4
> x unle x -> x ord x

Thanks for this list.  I'll look at implementing (some of) them.
 
> On Thu, 28 May 2015, Jakub Jelinek wrote:
> 
> >Is CST2 a multiple of CST1 the best test though?

Apparently not ;).

> >I mean say in
> >(0x8001U << x) == 0x20000U
> >0x20000U isn't a multiple of 0x8001U, yet there is only one
> >valid value of x for which it holds (17), so we could very well
> >optimize that to x == 17.

Yeah.

> >If popcount of the CST1 is 1, then multiple_of_p is supposedly sufficient
> >(have you checked if CST1 is negative that it still works?), for others
> >supposedly we could have a helper function that would just try
> >in a loop all shift counts from 0 to precision - 1, and note when
> >(CST1 << b) == CST2 - if for no b, then it should fold regardless of
> >has_single_use to false or true, if for exactly one shift count, then
> >use a comparison against that shift count, otherwise give up?
> 
> ctz(CST2)-ctz(CST1) should provide a single candidate without looping.
> ctz(CST1) is also relevant when CST2==0.

That seems to work well so the following patch is an attempt to do it
so.
If CST2 is non-zero, we compute a candidate and verify whether this
candidate works.  If so, we know there's exactly one so we should be
able to fold the shift into comparison.
I've tried even negative numbers and it seems to DTRT, but I'd certainly
appreciate if y'all could take a look at this.  Thanks.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-06-08  Marek Polacek  <pola...@redhat.com>

        PR tree-optimization/66299
        * match.pd ((CST1 << A) == CST2 -> A == ctz (CST2) - ctz (CST1)
        ((CST1 << A) != CST2 -> A != ctz (CST2) - ctz (CST1)): New
        patterns.

        * gcc.dg/pr66299-1.c: New test.
        * gcc.dg/pr66299-2.c: New test.

diff --git gcc/match.pd gcc/match.pd
index abd7851..32e913c 100644
--- gcc/match.pd
+++ gcc/match.pd
@@ -676,6 +676,18 @@ along with GCC; see the file COPYING3.  If not see
   (cmp (bit_and (lshift integer_onep @0) integer_onep) integer_zerop)
   (icmp @0 { build_zero_cst (TREE_TYPE (@0)); })))
 
+/* (CST1 << A) == CST2 -> A == ctz (CST2) - ctz (CST1)
+   (CST1 << A) != CST2 -> A != ctz (CST2) - ctz (CST1)
+   if CST2 != 0.  */
+(for cmp (ne eq)
+ (simplify
+  (cmp (lshift INTEGER_CST@0 @1) INTEGER_CST@2)
+  (with {
+   unsigned int cand = wi::ctz (@2) - wi::ctz (@0); }
+  (if (!integer_zerop (@2)
+       && wi::eq_p (wi::lshift (@0, cand), @2))
+   (cmp @1 { build_int_cst (TREE_TYPE (@1), cand); })))))
+
 /* Simplifications of conversions.  */
 
 /* Basic strip-useless-type-conversions / strip_nops.  */
diff --git gcc/testsuite/gcc.dg/pr66299-1.c gcc/testsuite/gcc.dg/pr66299-1.c
index e69de29..e7b978d 100644
--- gcc/testsuite/gcc.dg/pr66299-1.c
+++ gcc/testsuite/gcc.dg/pr66299-1.c
@@ -0,0 +1,92 @@
+/* PR tree-optimization/66299 */
+/* { dg-do run } */
+/* { dg-options "-fdump-tree-original" } */
+
+void
+test1 (int x)
+{
+  if ((0 << x) != 0
+      || (1 << x) != 2
+      || (2 << x) != 4
+      || (3 << x) != 6
+      || (4 << x) != 8
+      || (5 << x) != 10
+      || (6 << x) != 12
+      || (7 << x) != 14
+      || (8 << x) != 16
+      || (9 << x) != 18
+      || (10 << x) != 20)
+    __builtin_abort ();
+}
+
+void
+test2 (int x)
+{
+  if (!((0 << x) == 0
+        && (1 << x) == 4
+        && (2 << x) == 8
+        && (3 << x) == 12
+        && (4 << x) == 16
+        && (5 << x) == 20
+        && (6 << x) == 24
+        && (7 << x) == 28
+        && (8 << x) == 32
+        && (9 << x) == 36
+       && (10 << x) == 40))
+    __builtin_abort ();
+}
+
+void
+test3 (unsigned int x)
+{
+  if ((0U << x) != 0U
+      || (1U << x) != 16U
+      || (2U << x) != 32U
+      || (3U << x) != 48U
+      || (4U << x) != 64U
+      || (5U << x) != 80U
+      || (6U << x) != 96U
+      || (7U << x) != 112U
+      || (8U << x) != 128U
+      || (9U << x) != 144U
+      || (10U << x) != 160U)
+    __builtin_abort ();
+}
+
+void
+test4 (unsigned int x)
+{
+  if (!((0U << x) == 0U
+       || (1U << x) == 8U
+       || (2U << x) == 16U
+       || (3U << x) == 24U
+       || (4U << x) == 32U
+       || (5U << x) == 40U
+       || (6U << x) == 48U
+       || (7U << x) == 56U
+       || (8U << x) == 64U
+       || (9U << x) == 72U
+       || (10U << x) == 80U))
+    __builtin_abort ();
+}
+
+void
+test5 (int x)
+{
+  if ((0 << x) == 1
+      || (0 << x) != 0
+      || (0x8001U << x) != 0x20000U)
+    __builtin_abort ();
+}
+
+int
+main (void)
+{
+  test1 (1);
+  test2 (2);
+  test3 (4U);
+  test4 (3U);
+  test5 (17);
+}
+
+/* { dg-final { scan-tree-dump-not "<<" "original" } } */
diff --git gcc/testsuite/gcc.dg/pr66299-2.c gcc/testsuite/gcc.dg/pr66299-2.c
index e69de29..45e9218 100644
--- gcc/testsuite/gcc.dg/pr66299-2.c
+++ gcc/testsuite/gcc.dg/pr66299-2.c
@@ -0,0 +1,33 @@
+/* PR tree-optimization/66299 */
+/* { dg-do run } */
+/* { dg-options "-fdump-tree-optimized -O" } */
+
+void
+test1 (int x, unsigned u)
+{
+  if ((1U << x) != 64
+      || (2 << x) != u
+      || (x << x) != 384
+      || (3 << x) == 9
+      || (x << 14) != 98304U
+      || (1 << x) == 14
+      || (3 << 2) != 12)
+    __builtin_abort ();
+}
+
+void
+test2 (int x)
+{
+  unsigned int t = ((unsigned int) 1U << x);
+  if (t != 2U)
+    __builtin_abort ();
+}
+
+int
+main (void)
+{
+  test1 (6, 128U);
+  test2 (1);
+}
+
+/* { dg-final { scan-tree-dump-not "<<" "optimized" } } */

        Marek

Reply via email to