[PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309, take 2)

Jakub Jelinek Fri, 30 Sep 2011 08:04:48 -0700

On Fri, Sep 30, 2011 at 03:14:12PM +0200, Richard Guenther wrote:
> Ah, indeed.  I'll have a look at the updated patch.


Here is what I've committed after bootstrapping/regtesting it on
x86_64-linux and i686-linux and Richard's approval on IRC.

2011-09-30  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/46309
        * fold-const.c (make_range, merge_ranges): Remove prototypes.
        (make_range_step): New function.
        (make_range): Use it.
        * tree.h (make_range_step): New prototypes.
        * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
        * tree-ssa-reassoc.c: Include diagnostic-core.h.
        (struct range_entry): New type.
        (init_range_entry, range_entry_cmp, update_range_test,
        optimize_range_tests): New functions.
        (reassociate_bb): Call optimize_range_tests.

        * gcc.dg/pr46309.c: New test.

--- gcc/fold-const.c.jj 2011-09-30 13:17:22.000000000 +0200
+++ gcc/fold-const.c    2011-09-30 14:08:30.000000000 +0200
@@ -115,9 +115,6 @@ static int simple_operand_p (const_tree)
 static tree range_binop (enum tree_code, tree, tree, int, tree, int);
 static tree range_predecessor (tree);
 static tree range_successor (tree);
-extern tree make_range (tree, int *, tree *, tree *, bool *);
-extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
-                         tree, tree);
 static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
 static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, 
tree);
 static tree unextend (tree, int, int, tree);
@@ -3790,6 +3787,255 @@ range_binop (enum tree_code code, tree t
   return constant_boolean_node (result, type);
 }
 
+/* Helper routine for make_range.  Perform one step for it, return
+   new expression if the loop should continue or NULL_TREE if it should
+   stop.  */
+
+tree
+make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
+                tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
+                bool *strict_overflow_p)
+{
+  tree arg0_type = TREE_TYPE (arg0);
+  tree n_low, n_high, low = *p_low, high = *p_high;
+  int in_p = *p_in_p, n_in_p;
+
+  switch (code)
+    {
+    case TRUTH_NOT_EXPR:
+      *p_in_p = ! in_p;
+      return arg0;
+
+    case EQ_EXPR: case NE_EXPR:
+    case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
+      /* We can only do something if the range is testing for zero
+        and if the second operand is an integer constant.  Note that
+        saying something is "in" the range we make is done by
+        complementing IN_P since it will set in the initial case of
+        being not equal to zero; "out" is leaving it alone.  */
+      if (low == NULL_TREE || high == NULL_TREE
+         || ! integer_zerop (low) || ! integer_zerop (high)
+         || TREE_CODE (arg1) != INTEGER_CST)
+       return NULL_TREE;
+
+      switch (code)
+       {
+       case NE_EXPR:  /* - [c, c]  */
+         low = high = arg1;
+         break;
+       case EQ_EXPR:  /* + [c, c]  */
+         in_p = ! in_p, low = high = arg1;
+         break;
+       case GT_EXPR:  /* - [-, c] */
+         low = 0, high = arg1;
+         break;
+       case GE_EXPR:  /* + [c, -] */
+         in_p = ! in_p, low = arg1, high = 0;
+         break;
+       case LT_EXPR:  /* - [c, -] */
+         low = arg1, high = 0;
+         break;
+       case LE_EXPR:  /* + [-, c] */
+         in_p = ! in_p, low = 0, high = arg1;
+         break;
+       default:
+         gcc_unreachable ();
+       }
+
+      /* If this is an unsigned comparison, we also know that EXP is
+        greater than or equal to zero.  We base the range tests we make
+        on that fact, so we record it here so we can parse existing
+        range tests.  We test arg0_type since often the return type
+        of, e.g. EQ_EXPR, is boolean.  */
+      if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
+       {
+         if (! merge_ranges (&n_in_p, &n_low, &n_high,
+                             in_p, low, high, 1,
+                             build_int_cst (arg0_type, 0),
+                             NULL_TREE))
+           return NULL_TREE;
+
+         in_p = n_in_p, low = n_low, high = n_high;
+
+         /* If the high bound is missing, but we have a nonzero low
+            bound, reverse the range so it goes from zero to the low bound
+            minus 1.  */
+         if (high == 0 && low && ! integer_zerop (low))
+           {
+             in_p = ! in_p;
+             high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
+                                 integer_one_node, 0);
+             low = build_int_cst (arg0_type, 0);
+           }
+       }
+
+      *p_low = low;
+      *p_high = high;
+      *p_in_p = in_p;
+      return arg0;
+
+    case NEGATE_EXPR:
+      /* (-x) IN [a,b] -> x in [-b, -a]  */
+      n_low = range_binop (MINUS_EXPR, exp_type,
+                          build_int_cst (exp_type, 0),
+                          0, high, 1);
+      n_high = range_binop (MINUS_EXPR, exp_type,
+                           build_int_cst (exp_type, 0),
+                           0, low, 0);
+      if (n_high != 0 && TREE_OVERFLOW (n_high))
+       return NULL_TREE;
+      goto normalize;
+
+    case BIT_NOT_EXPR:
+      /* ~ X -> -X - 1  */
+      return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
+                        build_int_cst (exp_type, 1));
+
+    case PLUS_EXPR:
+    case MINUS_EXPR:
+      if (TREE_CODE (arg1) != INTEGER_CST)
+       return NULL_TREE;
+
+      /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
+        move a constant to the other side.  */
+      if (!TYPE_UNSIGNED (arg0_type)
+         && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
+       return NULL_TREE;
+
+      /* If EXP is signed, any overflow in the computation is undefined,
+        so we don't worry about it so long as our computations on
+        the bounds don't overflow.  For unsigned, overflow is defined
+        and this is exactly the right thing.  */
+      n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
+                          arg0_type, low, 0, arg1, 0);
+      n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
+                           arg0_type, high, 1, arg1, 0);
+      if ((n_low != 0 && TREE_OVERFLOW (n_low))
+         || (n_high != 0 && TREE_OVERFLOW (n_high)))
+       return NULL_TREE;
+
+      if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
+       *strict_overflow_p = true;
+
+      normalize:
+       /* Check for an unsigned range which has wrapped around the maximum
+          value thus making n_high < n_low, and normalize it.  */
+       if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
+         {
+           low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
+                              integer_one_node, 0);
+           high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
+                               integer_one_node, 0);
+
+           /* If the range is of the form +/- [ x+1, x ], we won't
+              be able to normalize it.  But then, it represents the
+              whole range or the empty set, so make it
+              +/- [ -, - ].  */
+           if (tree_int_cst_equal (n_low, low)
+               && tree_int_cst_equal (n_high, high))
+             low = high = 0;
+           else
+             in_p = ! in_p;
+         }
+       else
+         low = n_low, high = n_high;
+
+       *p_low = low;
+       *p_high = high;
+       *p_in_p = in_p;
+       return arg0;
+
+    CASE_CONVERT:
+    case NON_LVALUE_EXPR:
+      if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
+       return NULL_TREE;
+
+      if (! INTEGRAL_TYPE_P (arg0_type)
+         || (low != 0 && ! int_fits_type_p (low, arg0_type))
+         || (high != 0 && ! int_fits_type_p (high, arg0_type)))
+       return NULL_TREE;
+
+      n_low = low, n_high = high;
+
+      if (n_low != 0)
+       n_low = fold_convert_loc (loc, arg0_type, n_low);
+
+      if (n_high != 0)
+       n_high = fold_convert_loc (loc, arg0_type, n_high);
+
+      /* If we're converting arg0 from an unsigned type, to exp,
+        a signed type,  we will be doing the comparison as unsigned.
+        The tests above have already verified that LOW and HIGH
+        are both positive.
+
+        So we have to ensure that we will handle large unsigned
+        values the same way that the current signed bounds treat
+        negative values.  */
+
+      if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
+       {
+         tree high_positive;
+         tree equiv_type;
+         /* For fixed-point modes, we need to pass the saturating flag
+            as the 2nd parameter.  */
+         if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
+           equiv_type
+             = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type),
+                                               TYPE_SATURATING (arg0_type));
+         else
+           equiv_type
+             = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1);
+
+         /* A range without an upper bound is, naturally, unbounded.
+            Since convert would have cropped a very large value, use
+            the max value for the destination type.  */
+         high_positive
+           = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
+             : TYPE_MAX_VALUE (arg0_type);
+
+         if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
+           high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
+                                            fold_convert_loc (loc, arg0_type,
+                                                              high_positive),
+                                            build_int_cst (arg0_type, 1));
+
+         /* If the low bound is specified, "and" the range with the
+            range for which the original unsigned value will be
+            positive.  */
+         if (low != 0)
+           {
+             if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high,
+                                 1, fold_convert_loc (loc, arg0_type,
+                                                      integer_zero_node),
+                                 high_positive))
+               return NULL_TREE;
+
+             in_p = (n_in_p == in_p);
+           }
+         else
+           {
+             /* Otherwise, "or" the range with the range of the input
+                that will be interpreted as negative.  */
+             if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high,
+                                 1, fold_convert_loc (loc, arg0_type,
+                                                      integer_zero_node),
+                                 high_positive))
+               return NULL_TREE;
+
+             in_p = (in_p != n_in_p);
+           }
+       }
+
+      *p_low = n_low;
+      *p_high = n_high;
+      *p_in_p = in_p;
+      return arg0;
+
+    default:
+      return NULL_TREE;
+    }
+}
+
 /* Given EXP, a logical expression, set the range it is testing into
    variables denoted by PIN_P, PLOW, and PHIGH.  Return the expression
    actually being tested.  *PLOW and *PHIGH will be made of the same
@@ -3804,10 +4050,10 @@ make_range (tree exp, int *pin_p, tree *
            bool *strict_overflow_p)
 {
   enum tree_code code;
-  tree arg0 = NULL_TREE, arg1 = NULL_TREE;
-  tree exp_type = NULL_TREE, arg0_type = NULL_TREE;
-  int in_p, n_in_p;
-  tree low, high, n_low, n_high;
+  tree arg0, arg1 = NULL_TREE;
+  tree exp_type, nexp;
+  int in_p;
+  tree low, high;
   location_t loc = EXPR_LOCATION (exp);
 
   /* Start with simply saying "EXP != 0" and then look at the code of EXP
@@ -3823,255 +4069,26 @@ make_range (tree exp, int *pin_p, tree *
     {
       code = TREE_CODE (exp);
       exp_type = TREE_TYPE (exp);
+      arg0 = NULL_TREE;
 
       if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
        {
          if (TREE_OPERAND_LENGTH (exp) > 0)
            arg0 = TREE_OPERAND (exp, 0);
-         if (TREE_CODE_CLASS (code) == tcc_comparison
-             || TREE_CODE_CLASS (code) == tcc_unary
-             || TREE_CODE_CLASS (code) == tcc_binary)
-           arg0_type = TREE_TYPE (arg0);
          if (TREE_CODE_CLASS (code) == tcc_binary
              || TREE_CODE_CLASS (code) == tcc_comparison
              || (TREE_CODE_CLASS (code) == tcc_expression
                  && TREE_OPERAND_LENGTH (exp) > 1))
            arg1 = TREE_OPERAND (exp, 1);
        }
+      if (arg0 == NULL_TREE)
+       break;
 
-      switch (code)
-       {
-       case TRUTH_NOT_EXPR:
-         in_p = ! in_p, exp = arg0;
-         continue;
-
-       case EQ_EXPR: case NE_EXPR:
-       case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
-         /* We can only do something if the range is testing for zero
-            and if the second operand is an integer constant.  Note that
-            saying something is "in" the range we make is done by
-            complementing IN_P since it will set in the initial case of
-            being not equal to zero; "out" is leaving it alone.  */
-         if (low == 0 || high == 0
-             || ! integer_zerop (low) || ! integer_zerop (high)
-             || TREE_CODE (arg1) != INTEGER_CST)
-           break;
-
-         switch (code)
-           {
-           case NE_EXPR:  /* - [c, c]  */
-             low = high = arg1;
-             break;
-           case EQ_EXPR:  /* + [c, c]  */
-             in_p = ! in_p, low = high = arg1;
-             break;
-           case GT_EXPR:  /* - [-, c] */
-             low = 0, high = arg1;
-             break;
-           case GE_EXPR:  /* + [c, -] */
-             in_p = ! in_p, low = arg1, high = 0;
-             break;
-           case LT_EXPR:  /* - [c, -] */
-             low = arg1, high = 0;
-             break;
-           case LE_EXPR:  /* + [-, c] */
-             in_p = ! in_p, low = 0, high = arg1;
-             break;
-           default:
-             gcc_unreachable ();
-           }
-
-         /* If this is an unsigned comparison, we also know that EXP is
-            greater than or equal to zero.  We base the range tests we make
-            on that fact, so we record it here so we can parse existing
-            range tests.  We test arg0_type since often the return type
-            of, e.g. EQ_EXPR, is boolean.  */
-         if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
-           {
-             if (! merge_ranges (&n_in_p, &n_low, &n_high,
-                                 in_p, low, high, 1,
-                                 build_int_cst (arg0_type, 0),
-                                 NULL_TREE))
-               break;
-
-             in_p = n_in_p, low = n_low, high = n_high;
-
-             /* If the high bound is missing, but we have a nonzero low
-                bound, reverse the range so it goes from zero to the low bound
-                minus 1.  */
-             if (high == 0 && low && ! integer_zerop (low))
-               {
-                 in_p = ! in_p;
-                 high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
-                                     integer_one_node, 0);
-                 low = build_int_cst (arg0_type, 0);
-               }
-           }
-
-         exp = arg0;
-         continue;
-
-       case NEGATE_EXPR:
-         /* (-x) IN [a,b] -> x in [-b, -a]  */
-         n_low = range_binop (MINUS_EXPR, exp_type,
-                              build_int_cst (exp_type, 0),
-                              0, high, 1);
-         n_high = range_binop (MINUS_EXPR, exp_type,
-                               build_int_cst (exp_type, 0),
-                               0, low, 0);
-         if (n_high != 0 && TREE_OVERFLOW (n_high))
-           break;
-         goto normalize;
-
-       case BIT_NOT_EXPR:
-         /* ~ X -> -X - 1  */
-         exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
-                           build_int_cst (exp_type, 1));
-         continue;
-
-       case PLUS_EXPR:  case MINUS_EXPR:
-         if (TREE_CODE (arg1) != INTEGER_CST)
-           break;
-
-         /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
-            move a constant to the other side.  */
-         if (!TYPE_UNSIGNED (arg0_type)
-             && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
-           break;
-
-         /* If EXP is signed, any overflow in the computation is undefined,
-            so we don't worry about it so long as our computations on
-            the bounds don't overflow.  For unsigned, overflow is defined
-            and this is exactly the right thing.  */
-         n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
-                              arg0_type, low, 0, arg1, 0);
-         n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
-                               arg0_type, high, 1, arg1, 0);
-         if ((n_low != 0 && TREE_OVERFLOW (n_low))
-             || (n_high != 0 && TREE_OVERFLOW (n_high)))
-           break;
-
-         if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
-           *strict_overflow_p = true;
-
-       normalize:
-         /* Check for an unsigned range which has wrapped around the maximum
-            value thus making n_high < n_low, and normalize it.  */
-         if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
-           {
-             low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
-                                integer_one_node, 0);
-             high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
-                                 integer_one_node, 0);
-
-             /* If the range is of the form +/- [ x+1, x ], we won't
-                be able to normalize it.  But then, it represents the
-                whole range or the empty set, so make it
-                +/- [ -, - ].  */
-             if (tree_int_cst_equal (n_low, low)
-                 && tree_int_cst_equal (n_high, high))
-               low = high = 0;
-             else
-               in_p = ! in_p;
-           }
-         else
-           low = n_low, high = n_high;
-
-         exp = arg0;
-         continue;
-
-       CASE_CONVERT: case NON_LVALUE_EXPR:
-         if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
-           break;
-
-         if (! INTEGRAL_TYPE_P (arg0_type)
-             || (low != 0 && ! int_fits_type_p (low, arg0_type))
-             || (high != 0 && ! int_fits_type_p (high, arg0_type)))
-           break;
-
-         n_low = low, n_high = high;
-
-         if (n_low != 0)
-           n_low = fold_convert_loc (loc, arg0_type, n_low);
-
-         if (n_high != 0)
-           n_high = fold_convert_loc (loc, arg0_type, n_high);
-
-
-         /* If we're converting arg0 from an unsigned type, to exp,
-            a signed type,  we will be doing the comparison as unsigned.
-            The tests above have already verified that LOW and HIGH
-            are both positive.
-
-            So we have to ensure that we will handle large unsigned
-            values the same way that the current signed bounds treat
-            negative values.  */
-
-         if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
-           {
-             tree high_positive;
-             tree equiv_type;
-             /* For fixed-point modes, we need to pass the saturating flag
-                as the 2nd parameter.  */
-             if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
-               equiv_type = lang_hooks.types.type_for_mode
-                            (TYPE_MODE (arg0_type),
-                             TYPE_SATURATING (arg0_type));
-             else
-               equiv_type = lang_hooks.types.type_for_mode
-                            (TYPE_MODE (arg0_type), 1);
-
-             /* A range without an upper bound is, naturally, unbounded.
-                Since convert would have cropped a very large value, use
-                the max value for the destination type.  */
-             high_positive
-               = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
-               : TYPE_MAX_VALUE (arg0_type);
-
-             if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
-               high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
-                                            fold_convert_loc (loc, arg0_type,
-                                                              high_positive),
-                                            build_int_cst (arg0_type, 1));
-
-             /* If the low bound is specified, "and" the range with the
-                range for which the original unsigned value will be
-                positive.  */
-             if (low != 0)
-               {
-                 if (! merge_ranges (&n_in_p, &n_low, &n_high,
-                                     1, n_low, n_high, 1,
-                                     fold_convert_loc (loc, arg0_type,
-                                                       integer_zero_node),
-                                     high_positive))
-                   break;
-
-                 in_p = (n_in_p == in_p);
-               }
-             else
-               {
-                 /* Otherwise, "or" the range with the range of the input
-                    that will be interpreted as negative.  */
-                 if (! merge_ranges (&n_in_p, &n_low, &n_high,
-                                     0, n_low, n_high, 1,
-                                     fold_convert_loc (loc, arg0_type,
-                                                       integer_zero_node),
-                                     high_positive))
-                   break;
-
-                 in_p = (in_p != n_in_p);
-               }
-           }
-
-         exp = arg0;
-         low = n_low, high = n_high;
-         continue;
-
-       default:
-         break;
-       }
-
-      break;
+      nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low,
+                             &high, &in_p, strict_overflow_p);
+      if (nexp == NULL_TREE)
+       break;
+      exp = nexp;
     }
 
   /* If EXP is a constant, we can evaluate whether this is true or false.  */
--- gcc/tree.h.jj       2011-09-29 14:25:29.000000000 +0200
+++ gcc/tree.h  2011-09-30 14:08:38.000000000 +0200
@@ -5384,6 +5384,8 @@ extern unsigned int get_pointer_alignmen
 extern tree fold_call_stmt (gimple, bool);
 extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum 
built_in_function);
 extern tree make_range (tree, int *, tree *, tree *, bool *);
+extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
+                            tree *, tree *, int *, bool *);
 extern tree build_range_check (location_t, tree, tree, int, tree, tree);
 extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
                          tree, tree);
--- gcc/Makefile.in.jj  2011-09-29 14:25:46.000000000 +0200
+++ gcc/Makefile.in     2011-09-30 13:40:52.000000000 +0200
@@ -2650,7 +2650,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c 
    $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \
    tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \
    $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \
-   tree-pretty-print.h gimple-pretty-print.h
+   tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H)
 tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
    $(TREE_H) $(TM_P_H) $(GGC_H) output.h \
    $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \
--- gcc/tree-ssa-reassoc.c.jj   2011-09-29 14:25:29.000000000 +0200
+++ gcc/tree-ssa-reassoc.c      2011-09-30 14:08:00.000000000 +0200
@@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.  
 #include "flags.h"
 #include "target.h"
 #include "params.h"
+#include "diagnostic-core.h"
 
 /*  This is a simple global reassociation pass.  It is, in part, based
     on the LLVM pass of the same name (They do some things more/less
@@ -1568,6 +1569,457 @@ optimize_ops_list (enum tree_code opcode
     optimize_ops_list (opcode, ops);
 }
 
+/* The following functions are subroutines to optimize_range_tests and allow
+   it to try to change a logical combination of comparisons into a range
+   test.
+
+   For example, both
+       X == 2 || X == 5 || X == 3 || X == 4
+   and
+       X >= 2 && X <= 5
+   are converted to
+       (unsigned) (X - 2) <= 3
+
+   For more information see comments above fold_test_range in fold-const.c,
+   this implementation is for GIMPLE.  */
+
+struct range_entry
+{
+  tree exp;
+  tree low;
+  tree high;
+  bool in_p;
+  bool strict_overflow_p;
+  unsigned int idx, next;
+};
+
+/* This is similar to make_range in fold-const.c, but on top of
+   GIMPLE instead of trees.  */
+
+static void
+init_range_entry (struct range_entry *r, tree exp)
+{
+  int in_p;
+  tree low, high;
+  bool is_bool, strict_overflow_p;
+
+  r->exp = NULL_TREE;
+  r->in_p = false;
+  r->strict_overflow_p = false;
+  r->low = NULL_TREE;
+  r->high = NULL_TREE;
+  if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
+    return;
+
+  /* Start with simply saying "EXP != 0" and then look at the code of EXP
+     and see if we can refine the range.  Some of the cases below may not
+     happen, but it doesn't seem worth worrying about this.  We "continue"
+     the outer loop when we've changed something; otherwise we "break"
+     the switch, which will "break" the while.  */
+  low = build_int_cst (TREE_TYPE (exp), 0);
+  high = low;
+  in_p = 0;
+  strict_overflow_p = false;
+  is_bool = false;
+  if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
+    {
+      if (TYPE_UNSIGNED (TREE_TYPE (exp)))
+       is_bool = true;
+      else
+       return;
+    }
+  else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
+    is_bool = true;
+
+  while (1)
+    {
+      gimple stmt;
+      enum tree_code code;
+      tree arg0, arg1, exp_type;
+      tree nexp;
+      location_t loc;
+
+      if (TREE_CODE (exp) != SSA_NAME)
+       break;
+
+      stmt = SSA_NAME_DEF_STMT (exp);
+      if (!is_gimple_assign (stmt))
+       break;
+
+      code = gimple_assign_rhs_code (stmt);
+      arg0 = gimple_assign_rhs1 (stmt);
+      arg1 = gimple_assign_rhs2 (stmt);
+      exp_type = TREE_TYPE (exp);
+      loc = gimple_location (stmt);
+      switch (code)
+       {
+       case BIT_NOT_EXPR:
+         if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
+           {
+             in_p = !in_p;
+             exp = arg0;
+             continue;
+           }
+         break;
+       case SSA_NAME:
+         exp = arg0;
+         continue;
+       CASE_CONVERT:
+         if (is_bool)
+           goto do_default;
+         if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
+           {
+             if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
+               is_bool = true;
+             else
+               return;
+           }
+         else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
+           is_bool = true;
+         goto do_default;
+       case EQ_EXPR:
+       case NE_EXPR:
+       case LT_EXPR:
+       case LE_EXPR:
+       case GE_EXPR:
+       case GT_EXPR:
+         is_bool = true;
+         /* FALLTHRU */
+       default:
+         if (!is_bool)
+           return;
+       do_default:
+         nexp = make_range_step (loc, code, arg0, arg1, exp_type,
+                                 &low, &high, &in_p,
+                                 &strict_overflow_p);
+         if (nexp != NULL_TREE)
+           {
+             exp = nexp;
+             gcc_assert (TREE_CODE (exp) == SSA_NAME);
+             continue;
+           }
+         break;
+       }
+      break;
+    }
+  if (is_bool)
+    {
+      r->exp = exp;
+      r->in_p = in_p;
+      r->low = low;
+      r->high = high;
+      r->strict_overflow_p = strict_overflow_p;
+    }
+}
+
+/* Comparison function for qsort.  Sort entries
+   without SSA_NAME exp first, then with SSA_NAMEs sorted
+   by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
+   by increasing ->low and if ->low is the same, by increasing
+   ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
+   maximum.  */
+
+static int
+range_entry_cmp (const void *a, const void *b)
+{
+  const struct range_entry *p = (const struct range_entry *) a;
+  const struct range_entry *q = (const struct range_entry *) b;
+
+  if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
+    {
+      if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
+       {
+         /* Group range_entries for the same SSA_NAME together.  */
+         if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
+           return -1;
+         else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
+           return 1;
+         /* If ->low is different, NULL low goes first, then by
+            ascending low.  */
+         if (p->low != NULL_TREE)
+           {
+             if (q->low != NULL_TREE)
+               {
+                 tree tem = fold_binary (LT_EXPR, boolean_type_node,
+                                         p->low, q->low);
+                 if (tem && integer_onep (tem))
+                   return -1;
+                 tem = fold_binary (GT_EXPR, boolean_type_node,
+                                    p->low, q->low);
+                 if (tem && integer_onep (tem))
+                   return 1;
+               }
+             else
+               return 1;
+           }
+         else if (q->low != NULL_TREE)
+           return -1;
+         /* If ->high is different, NULL high goes last, before that by
+            ascending high.  */
+         if (p->high != NULL_TREE)
+           {
+             if (q->high != NULL_TREE)
+               {
+                 tree tem = fold_binary (LT_EXPR, boolean_type_node,
+                                         p->high, q->high);
+                 if (tem && integer_onep (tem))
+                   return -1;
+                 tem = fold_binary (GT_EXPR, boolean_type_node,
+                                    p->high, q->high);
+                 if (tem && integer_onep (tem))
+                   return 1;
+               }
+             else
+               return -1;
+           }
+         else if (p->high != NULL_TREE)
+           return 1;
+         /* If both ranges are the same, sort below by ascending idx.  */
+       }
+      else
+       return 1;
+    }
+  else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
+    return -1;
+
+  if (p->idx < q->idx)
+    return -1;
+  else
+    {
+      gcc_checking_assert (p->idx > q->idx);
+      return 1;
+    }
+}
+
+/* Helper routine of optimize_range_test.
+   [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
+   RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
+   OPCODE and OPS are arguments of optimize_range_tests.  Return
+   true if the range merge has been successful.  */
+
+static bool
+update_range_test (struct range_entry *range, struct range_entry *otherrange,
+                  unsigned int count, enum tree_code opcode,
+                  VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
+                  tree low, tree high, bool strict_overflow_p)
+{
+  tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
+  location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
+  tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
+  enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
+  gimple_stmt_iterator gsi;
+
+  if (tem == NULL_TREE)
+    return false;
+
+  if (strict_overflow_p && issue_strict_overflow_warning (wc))
+    warning_at (loc, OPT_Wstrict_overflow,
+               "assuming signed overflow does not occur "
+               "when simplifying range test");
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      struct range_entry *r;
+      fprintf (dump_file, "Optimizing range tests ");
+      print_generic_expr (dump_file, range->exp, 0);
+      fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
+      print_generic_expr (dump_file, range->low, 0);
+      fprintf (dump_file, ", ");
+      print_generic_expr (dump_file, range->high, 0);
+      fprintf (dump_file, "]");
+      for (r = otherrange; r < otherrange + count; r++)
+       {
+         fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
+         print_generic_expr (dump_file, r->low, 0);
+         fprintf (dump_file, ", ");
+         print_generic_expr (dump_file, r->high, 0);
+         fprintf (dump_file, "]");
+       }
+      fprintf (dump_file, "\n into ");
+      print_generic_expr (dump_file, tem, 0);
+      fprintf (dump_file, "\n");
+    }
+
+  if (opcode == BIT_IOR_EXPR)
+    tem = invert_truthvalue_loc (loc, tem);
+
+  tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
+  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
+  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
+                                 GSI_SAME_STMT);
+
+  VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
+  range->exp = exp;
+  range->low = low;
+  range->high = high;
+  range->in_p = in_p;
+  range->strict_overflow_p = false;
+
+  for (range = otherrange; range < otherrange + count; range++)
+    {
+      VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
+      range->exp = NULL_TREE;
+    }
+  return true;
+}
+
+/* Optimize range tests, similarly how fold_range_test optimizes
+   it on trees.  The tree code for the binary
+   operation between all the operands is OPCODE.  */
+
+static void
+optimize_range_tests (enum tree_code opcode,
+                     VEC (operand_entry_t, heap) **ops)
+{
+  unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
+  operand_entry_t oe;
+  struct range_entry *ranges;
+  bool any_changes = false;
+
+  if (length == 1)
+    return;
+
+  ranges = XNEWVEC (struct range_entry, length);
+  for (i = 0; i < length; i++)
+    {
+      ranges[i].idx = i;
+      init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
+      /* For | invert it now, we will invert it again before emitting
+        the optimized expression.  */
+      if (opcode == BIT_IOR_EXPR)
+       ranges[i].in_p = !ranges[i].in_p;
+    }
+
+  qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
+  for (i = 0; i < length; i++)
+    if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
+      break;
+
+  /* Try to merge ranges.  */
+  for (first = i; i < length; i++)
+    {
+      tree low = ranges[i].low;
+      tree high = ranges[i].high;
+      int in_p = ranges[i].in_p;
+      bool strict_overflow_p = ranges[i].strict_overflow_p;
+      int update_fail_count = 0;
+
+      for (j = i + 1; j < length; j++)
+       {
+         if (ranges[i].exp != ranges[j].exp)
+           break;
+         if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
+                            ranges[j].in_p, ranges[j].low, ranges[j].high))
+           break;
+         strict_overflow_p |= ranges[j].strict_overflow_p;
+       }
+
+      if (j == i + 1)
+       continue;
+
+      if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
+                            ops, ranges[i].exp, in_p, low, high,
+                            strict_overflow_p))
+       {
+         i = j - 1;
+         any_changes = true;
+       }
+      /* Avoid quadratic complexity if all merge_ranges calls would succeed,
+        while update_range_test would fail.  */
+      else if (update_fail_count == 64)
+       i = j - 1;
+      else
+       ++update_fail_count;
+    }
+
+  /* Optimize X == CST1 || X == CST2
+     if popcount (CST1 ^ CST2) == 1 into
+     (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
+     Similarly for ranges.  E.g.
+     X != 2 && X != 3 && X != 10 && X != 11
+     will be transformed by the above loop into
+     (X - 2U) <= 1U && (X - 10U) <= 1U
+     and this loop can transform that into
+     ((X & ~8) - 2U) <= 1U.  */
+  for (i = first; i < length; i++)
+    {
+      tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
+
+      if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
+       continue;
+      type = TREE_TYPE (ranges[i].exp);
+      if (!INTEGRAL_TYPE_P (type))
+       continue;
+      lowi = ranges[i].low;
+      if (lowi == NULL_TREE)
+       lowi = TYPE_MIN_VALUE (type);
+      highi = ranges[i].high;
+      if (highi == NULL_TREE)
+       continue;
+      for (j = i + 1; j < length && j < i + 64; j++)
+       {
+         if (ranges[j].exp == NULL_TREE)
+           continue;
+         if (ranges[i].exp != ranges[j].exp)
+           break;
+         if (ranges[j].in_p)
+           continue;
+         lowj = ranges[j].low;
+         if (lowj == NULL_TREE)
+           continue;
+         highj = ranges[j].high;
+         if (highj == NULL_TREE)
+           highj = TYPE_MAX_VALUE (type);
+         tem = fold_binary (GT_EXPR, boolean_type_node,
+                            lowj, highi);
+         if (tem == NULL_TREE || !integer_onep (tem))
+           continue;
+         lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
+         if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
+           continue;
+         gcc_checking_assert (!integer_zerop (lowxor));
+         tem = fold_binary (MINUS_EXPR, type, lowxor,
+                            build_int_cst (type, 1));
+         if (tem == NULL_TREE)
+           continue;
+         tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
+         if (tem == NULL_TREE || !integer_zerop (tem))
+           continue;
+         highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
+         if (!tree_int_cst_equal (lowxor, highxor))
+           continue;
+         tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
+         exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
+         lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
+         highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
+         if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
+                                ranges[i].in_p, lowj, highj,
+                                ranges[i].strict_overflow_p
+                                || ranges[j].strict_overflow_p))
+           {
+             any_changes = true;
+             break;
+           }
+       }
+    }
+
+  if (any_changes)
+    {
+      j = 0;
+      FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
+       {
+         if (oe->op == error_mark_node)
+           continue;
+         else if (i != j)
+           VEC_replace (operand_entry_t, *ops, j, oe);
+         j++;
+       }
+      VEC_truncate (operand_entry_t, *ops, j);
+    }
+
+  XDELETEVEC (ranges);
+}
+
 /* Return true if OPERAND is defined by a PHI node which uses the LHS
    of STMT in it's operands.  This is also known as a "destructive
    update" operation.  */
@@ -2447,6 +2899,9 @@ reassociate_bb (basic_block bb)
                  optimize_ops_list (rhs_code, &ops);
                }
 
+             if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
+               optimize_range_tests (rhs_code, &ops);
+
              if (VEC_length (operand_entry_t, ops) == 1)
                {
                  if (dump_file && (dump_flags & TDF_DETAILS))
--- gcc/testsuite/gcc.dg/pr46309.c.jj   2011-09-30 13:40:52.000000000 +0200
+++ gcc/testsuite/gcc.dg/pr46309.c      2011-09-30 13:40:52.000000000 +0200
@@ -0,0 +1,66 @@
+/* PR tree-optimization/46309 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-reassoc-details" } */
+
+int
+f1 (int a)
+{
+  int v1 = (a == 3);
+  int v2 = (a == 1);
+  int v3 = (a == 4);
+  int v4 = (a == 2);
+  return v1 || v2 || v3 || v4;
+}
+
+int
+f2 (int a)
+{
+  int v1 = (a == 1);
+  int v2 = (a == 2);
+  int v3 = (a == 3);
+  int v4 = (a == 4);
+  return v1 || v2 || v3 || v4;
+}
+
+int
+f3 (int a)
+{
+  int v1 = (a == 3);
+  int v2 = (a == 1);
+  return v1 || v2;
+}
+
+int
+f4 (int a)
+{
+  int v1 = (a == 1);
+  int v2 = (a == 2);
+  return v1 || v2;
+}
+
+int
+f5 (unsigned int a)
+{
+  int v1 = (a <= 31);
+  int v2 = (a >= 64 && a <= 95);
+  return v1 || v2;
+}
+
+int
+f6 (unsigned int a)
+{
+  int v1 = (a <= 31);
+  int v2 = (a >= 64 && a <= 95);
+  int v3 = (a >= 128 && a <= 159);
+  int v4 = (a >= 192 && a <= 223);
+  return v1 || v2 || v3 || v4;
+}
+
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
-.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
-.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
-.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
-.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
-.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */
+/* { dg-final { scan-tree-dump-times "Optimizing range tests 
D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } */
+/* { dg-final { cleanup-tree-dump "reassoc1" } } */
+/* { dg-final { cleanup-tree-dump "reassoc2" } } */


        Jakub

[PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309, take 2)

Reply via email to