Re: [PATCH] Reassociate X == CST1 || X == CST2 if popcount (CST2 - CST1) == 1 into ((X - CST1) & ~(CST2 - CST1)) == 0

Jakub Jelinek Sat, 12 Oct 2013 00:41:10 -0700

On Sat, Oct 12, 2013 at 10:08:12AM +0800, Zhenqiang Chen wrote:
> As you had mentioned, the transition in this patch does not reduce
> instructions. But the preexisting optimization does. So we prefer to do the
> preexisting optimization first. E.g.
> 
> X == 10 || X == 12 || X == 26


Ok, that makes sense.

@@ -2279,6 +2275,71 @@ optimize_range_tests (enum tree_code opcode,
        }
     }
 
+  /* Optimize X == CST1 || X == CST2
+     if popcount (CST2 - CST1) == 1 into
+     ((X - CST1) & ~(CST2 - CST1)) == 0.  */

Mention here also similarly to the other comment that it works also
with ranges.

+  if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
+    for (i = first; i < length; i++)
+      {
+       tree lowi, highi, lowj, highj, type, tem1, tem2, mask;
+
+       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
+         continue;
+       type = TREE_TYPE (ranges[i].exp);
+       if (!INTEGRAL_TYPE_P (type))
+         continue;
+       lowi = ranges[i].low;
+       if (lowi == NULL_TREE)
+         continue;

The other loop has here:
      if (lowi == NULL_TREE)
        lowi = TYPE_MIN_VALUE (type);
instead, which is better, can you please change it?

+       highi = ranges[i].high;
+       if (highi == NULL_TREE)
+         continue;
+       for (j = i + 1; j < length && j < i + 64; j++)
+         {
+           lowj = ranges[j].low;
+           if (lowj == NULL_TREE)
+             continue;
+           highj = ranges[j].high;
+           if (highj == NULL_TREE)
+             continue;

The other loop has
        if (highj == NULL_TREE)
          highj = TYPE_MAX_VALUE (type);
here instead.

+           if (ranges[j].exp == NULL_TREE || ranges[j].in_p
+               || (ranges[i].exp != ranges[j].exp))
+             continue;

Can you please move this test the lowj = assignment, and
remove the ranges[j].exp == NULL_TREE test (both here and
in the earlier loop)?  I mean, we've checked at the
beginning of for (i = first; i < length; i++) loop that
ranges[i].exp is not NULL_TREE, and if ranges[j].exp is NULL_TREE,
it will be different than ranges[i].exp.
So
        if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
          continue;
(and please also collapse the two checks in the first loop,
so that both look similar).

+           /* Check lowj > highi.  */
+           tem1 = fold_binary (GT_EXPR, boolean_type_node,
+                              lowj, highi);
+           if (tem1 == NULL_TREE || !integer_onep (tem1))
+             continue;
+           /* Check highi - lowi == highj - lowj.  */
+           tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
+           if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
+             continue;
+           tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
+           if (tem2 == NULL_TREE || TREE_CODE (tem2) != INTEGER_CST)
+             continue;
+           if (!tree_int_cst_equal (tem1, tem2))
+             continue;
+           /* Check popcount (lowj - lowi) == 1.  */
+           tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
+           if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
+             continue;
+           if (tree_log2 (tem1) < 0)
+             continue;
+           mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
+           tem1 = fold_binary (MINUS_EXPR, type, ranges[i].exp, lowi);
+           tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
+           lowi = build_int_cst (type, 0);

Please use lowj instead of lowi here.  Because, if update_range_test
fails, we continue looking for further matches in following ranges,
and while lowj will be computed again, lowi will be assumed to contain
the low bound of the first range.

+           if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, tem1,
+                                  ranges[i].in_p, lowi, tem2,

And here too.

Or, alternatively to avoid duplication, you could put the whole
for (i = first; i < length; i++)
loop from the first optimization into another
int i;
  for (pass = 0; pass < (BRANCH_COST (optimize_function_for_speed_p (cfun),
                                      false) >= 2) + 1; pass++)
  {
  }
loop, use whatever is common in the loop unconditionally, and just
conditionalize the rest on pass == 0 vs. pass == 1.
Or maybe even better just move this whole loop into a new function,
with ranges, opcode, first, length arguments plus another (bool?) argument
which of the two optimizations you want to perform, and return the bool
any_changes.  Then you wouldn't run into line length issues that you could
run into with the extra loop.

--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/reassoc-33.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target { ! "m68k*-*-* mmix*-*-* mep*-*-* bfin*-*-* v850*-*-* 
picochip*-*-* moxie*-*-* cris*-*-* m32c*-*-* fr30*-*-* mcore*-*-* powerpc*-*-* 
xtensa*-*-*"} } } */
+
+/* { dg-options "-O2 -fno-inline -fdump-tree-reassoc1-details" } */
+/* { dg-additional-options "-mbranch-cost=2" { target avr-*-* } } */
+
+int test (int a, int b, int c)
+{
+  if (a == 43 || a == 75 || a == 44 || a == 78 || a == 77 || a == 46 || a == 
76 || a == 45)
+    return b;
+  else
+    return c;
+}
+
+int main ()
+{
+  if (test (43, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (44, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (45, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (46, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (75, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (76, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (77, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (78, 20, 30) != 20)
+    __builtin_abort ();
+  if (test (30, 20, 30) != 30)
+    __builtin_abort ();

Perhaps it would be better to test more than just one
value outside of the range.  Say everything from -10 to 100 might be better,
but you'd want to make sure the optimization can't be applied to the
What about

int
main ()
{
  volatile int n43, n47, n75, n79;
  n43 = 43; n47 = n43 + 4; n75 = 75; n79 = n75 + 4;
  int i;
  for (i = -10; i <= 100; i++)
    if (test (i, 20, 30) != 30 - ((i >= n43 && i < n47)
                                  || (i >= n75 && i < n79)) * 10)
      __builtin_abort ();
  return 0;
}

?

        Jakub

Re: [PATCH] Reassociate X == CST1 || X == CST2 if popcount (CST2 - CST1) == 1 into ((X - CST1) & ~(CST2 - CST1)) == 0

Reply via email to