Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

Richard Guenther Fri, 30 Sep 2011 03:33:36 -0700

On Thu, Sep 29, 2011 at 11:15 PM, Jakub Jelinek <ja...@redhat.com> wrote:
> Hi!
>
> This patch implements a fold_range_test like optimization on GIMPLE, inside
> tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be
> shared in between the two.
> The advantage of the reassoc optimization is that it doesn't attempt to
> merge just 2 ranges at a time, instead it sorts the ranges for the same
> SSA_NAME and thus can optimize even cases where source code doesn't have
> the numbers in a range test in increasing or decreasing order (and also
> can optimize things that were in multiple statements in the source).
> Additionally, it optimizes cases like
> x == 1 || x == 3
> into (x & ~2) == 1.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2011-09-27  Jakub Jelinek  <ja...@redhat.com>
>
>        PR tree-optimization/46309
>        * fold-const.c (make_range, merge_ranges): Remove prototypes.
>        (range_binop): Likewise, no longer static.
>        (make_range_step): New function.
>        (make_range): Use it.
>        * tree.h (make_range_step, range_binop): New prototypes.
>        * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H).
>        * tree-ssa-reassoc.c: Include diagnostic-core.h.
>        (struct range_entry): New type.
>        (init_range_entry, range_entry_cmp, update_range_test,
>        optimize_range_tests): New functions.
>        (reassociate_bb): Call optimize_range_tests.
>
>        * gcc.dg/pr46309.c: New test.
>
> --- gcc/fold-const.c.jj 2011-09-05 12:28:53.000000000 +0200
> +++ gcc/fold-const.c    2011-09-27 16:39:09.000000000 +0200
> @@ -112,12 +112,8 @@ static tree decode_field_reference (loca
>  static int all_ones_mask_p (const_tree, int);
>  static tree sign_bit_p (tree, const_tree);
>  static int simple_operand_p (const_tree);
> -static tree range_binop (enum tree_code, tree, tree, int, tree, int);
>  static tree range_predecessor (tree);
>  static tree range_successor (tree);
> -extern tree make_range (tree, int *, tree *, tree *, bool *);
> -extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
> -                         tree, tree);
>  static tree fold_range_test (location_t, enum tree_code, tree, tree, tree);
>  static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, 
> tree);
>  static tree unextend (tree, int, int, tree);
> @@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp)
>    must be specified for a comparison.  ARG1 will be converted to ARG0's
>    type if both are specified.  */
>
> -static tree
> +tree
>  range_binop (enum tree_code code, tree type, tree arg0, int upper0_p,
>             tree arg1, int upper1_p)
>  {
> @@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t
>   return constant_boolean_node (result, type);
>  }
>
> +/* Helper routine for make_range.  Perform one step for it, return
> +   new expression if the loop should continue or NULL_TREE if it should
> +   stop.  */
> +
> +tree
> +make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1,
> +                tree exp_type, tree *p_low, tree *p_high, int *p_in_p,
> +                bool *strict_overflow_p)
> +{
> +  tree arg0_type = TREE_TYPE (arg0);
> +  tree n_low, n_high, low = *p_low, high = *p_high;
> +  int in_p = *p_in_p, n_in_p;
> +
> +  switch (code)
> +    {
> +    case TRUTH_NOT_EXPR:
> +      *p_in_p = ! in_p;
> +      return arg0;
> +
> +    case EQ_EXPR: case NE_EXPR:
> +    case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> +      /* We can only do something if the range is testing for zero
> +        and if the second operand is an integer constant.  Note that
> +        saying something is "in" the range we make is done by
> +        complementing IN_P since it will set in the initial case of
> +        being not equal to zero; "out" is leaving it alone.  */
> +      if (low == NULL_TREE || high == NULL_TREE
> +         || ! integer_zerop (low) || ! integer_zerop (high)
> +         || TREE_CODE (arg1) != INTEGER_CST)
> +       return NULL_TREE;
> +
> +      switch (code)
> +       {
> +       case NE_EXPR:  /* - [c, c]  */
> +         low = high = arg1;
> +         break;
> +       case EQ_EXPR:  /* + [c, c]  */
> +         in_p = ! in_p, low = high = arg1;
> +         break;
> +       case GT_EXPR:  /* - [-, c] */
> +         low = 0, high = arg1;
> +         break;
> +       case GE_EXPR:  /* + [c, -] */
> +         in_p = ! in_p, low = arg1, high = 0;
> +         break;
> +       case LT_EXPR:  /* - [c, -] */
> +         low = arg1, high = 0;
> +         break;
> +       case LE_EXPR:  /* + [-, c] */
> +         in_p = ! in_p, low = 0, high = arg1;
> +         break;
> +       default:
> +         gcc_unreachable ();
> +       }
> +
> +      /* If this is an unsigned comparison, we also know that EXP is
> +        greater than or equal to zero.  We base the range tests we make
> +        on that fact, so we record it here so we can parse existing
> +        range tests.  We test arg0_type since often the return type
> +        of, e.g. EQ_EXPR, is boolean.  */
> +      if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
> +       {
> +         if (! merge_ranges (&n_in_p, &n_low, &n_high,
> +                             in_p, low, high, 1,
> +                             build_int_cst (arg0_type, 0),
> +                             NULL_TREE))
> +           return NULL_TREE;
> +
> +         in_p = n_in_p, low = n_low, high = n_high;
> +
> +         /* If the high bound is missing, but we have a nonzero low
> +            bound, reverse the range so it goes from zero to the low bound
> +            minus 1.  */
> +         if (high == 0 && low && ! integer_zerop (low))
> +           {
> +             in_p = ! in_p;
> +             high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
> +                                 integer_one_node, 0);
> +             low = build_int_cst (arg0_type, 0);
> +           }
> +       }
> +
> +      *p_low = low;
> +      *p_high = high;
> +      *p_in_p = in_p;
> +      return arg0;
> +
> +    case NEGATE_EXPR:
> +      /* (-x) IN [a,b] -> x in [-b, -a]  */
> +      n_low = range_binop (MINUS_EXPR, exp_type,
> +                          build_int_cst (exp_type, 0),
> +                          0, high, 1);
> +      n_high = range_binop (MINUS_EXPR, exp_type,
> +                           build_int_cst (exp_type, 0),
> +                           0, low, 0);
> +      if (n_high != 0 && TREE_OVERFLOW (n_high))
> +       return NULL_TREE;
> +      goto normalize;
> +
> +    case BIT_NOT_EXPR:
> +      /* ~ X -> -X - 1  */
> +      return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
> +                        build_int_cst (exp_type, 1));
> +
> +    case PLUS_EXPR:
> +    case MINUS_EXPR:
> +      if (TREE_CODE (arg1) != INTEGER_CST)
> +       return NULL_TREE;
> +
> +      /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
> +        move a constant to the other side.  */
> +      if (!TYPE_UNSIGNED (arg0_type)
> +         && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
> +       return NULL_TREE;
> +
> +      /* If EXP is signed, any overflow in the computation is undefined,
> +        so we don't worry about it so long as our computations on
> +        the bounds don't overflow.  For unsigned, overflow is defined
> +        and this is exactly the right thing.  */
> +      n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> +                          arg0_type, low, 0, arg1, 0);
> +      n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> +                           arg0_type, high, 1, arg1, 0);
> +      if ((n_low != 0 && TREE_OVERFLOW (n_low))
> +         || (n_high != 0 && TREE_OVERFLOW (n_high)))
> +       return NULL_TREE;
> +
> +      if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
> +       *strict_overflow_p = true;
> +
> +      normalize:
> +       /* Check for an unsigned range which has wrapped around the maximum
> +          value thus making n_high < n_low, and normalize it.  */
> +       if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
> +         {
> +           low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
> +                              integer_one_node, 0);
> +           high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
> +                               integer_one_node, 0);
> +
> +           /* If the range is of the form +/- [ x+1, x ], we won't
> +              be able to normalize it.  But then, it represents the
> +              whole range or the empty set, so make it
> +              +/- [ -, - ].  */
> +           if (tree_int_cst_equal (n_low, low)
> +               && tree_int_cst_equal (n_high, high))
> +             low = high = 0;
> +           else
> +             in_p = ! in_p;
> +         }
> +       else
> +         low = n_low, high = n_high;
> +
> +       *p_low = low;
> +       *p_high = high;
> +       *p_in_p = in_p;
> +       return arg0;
> +
> +    CASE_CONVERT:
> +    case NON_LVALUE_EXPR:
> +      if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
> +       return NULL_TREE;
> +
> +      if (! INTEGRAL_TYPE_P (arg0_type)
> +         || (low != 0 && ! int_fits_type_p (low, arg0_type))
> +         || (high != 0 && ! int_fits_type_p (high, arg0_type)))
> +       return NULL_TREE;
> +
> +      n_low = low, n_high = high;
> +
> +      if (n_low != 0)
> +       n_low = fold_convert_loc (loc, arg0_type, n_low);
> +
> +      if (n_high != 0)
> +       n_high = fold_convert_loc (loc, arg0_type, n_high);
> +
> +      /* If we're converting arg0 from an unsigned type, to exp,
> +        a signed type,  we will be doing the comparison as unsigned.
> +        The tests above have already verified that LOW and HIGH
> +        are both positive.
> +
> +        So we have to ensure that we will handle large unsigned
> +        values the same way that the current signed bounds treat
> +        negative values.  */
> +
> +      if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
> +       {
> +         tree high_positive;
> +         tree equiv_type;
> +         /* For fixed-point modes, we need to pass the saturating flag
> +            as the 2nd parameter.  */
> +         if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
> +           equiv_type
> +             = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type),
> +                                               TYPE_SATURATING (arg0_type));
> +         else
> +           equiv_type
> +             = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1);
> +
> +         /* A range without an upper bound is, naturally, unbounded.
> +            Since convert would have cropped a very large value, use
> +            the max value for the destination type.  */
> +         high_positive
> +           = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
> +             : TYPE_MAX_VALUE (arg0_type);
> +
> +         if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
> +           high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
> +                                            fold_convert_loc (loc, arg0_type,
> +                                                              high_positive),
> +                                            build_int_cst (arg0_type, 1));
> +
> +         /* If the low bound is specified, "and" the range with the
> +            range for which the original unsigned value will be
> +            positive.  */
> +         if (low != 0)
> +           {
> +             if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high,
> +                                 1, fold_convert_loc (loc, arg0_type,
> +                                                      integer_zero_node),
> +                                 high_positive))
> +               return NULL_TREE;
> +
> +             in_p = (n_in_p == in_p);
> +           }
> +         else
> +           {
> +             /* Otherwise, "or" the range with the range of the input
> +                that will be interpreted as negative.  */
> +             if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high,
> +                                 1, fold_convert_loc (loc, arg0_type,
> +                                                      integer_zero_node),
> +                                 high_positive))
> +               return NULL_TREE;
> +
> +             in_p = (in_p != n_in_p);
> +           }
> +       }
> +
> +      *p_low = n_low;
> +      *p_high = n_high;
> +      *p_in_p = in_p;
> +      return arg0;
> +
> +    default:
> +      return NULL_TREE;
> +    }
> +}
> +
>  /* Given EXP, a logical expression, set the range it is testing into
>    variables denoted by PIN_P, PLOW, and PHIGH.  Return the expression
>    actually being tested.  *PLOW and *PHIGH will be made of the same
> @@ -3804,10 +4049,10 @@ make_range (tree exp, int *pin_p, tree *
>            bool *strict_overflow_p)
>  {
>   enum tree_code code;
> -  tree arg0 = NULL_TREE, arg1 = NULL_TREE;
> -  tree exp_type = NULL_TREE, arg0_type = NULL_TREE;
> -  int in_p, n_in_p;
> -  tree low, high, n_low, n_high;
> +  tree arg0, arg1 = NULL_TREE;
> +  tree exp_type, nexp;
> +  int in_p;
> +  tree low, high;
>   location_t loc = EXPR_LOCATION (exp);
>
>   /* Start with simply saying "EXP != 0" and then look at the code of EXP
> @@ -3823,255 +4068,26 @@ make_range (tree exp, int *pin_p, tree *
>     {
>       code = TREE_CODE (exp);
>       exp_type = TREE_TYPE (exp);
> +      arg0 = NULL_TREE;
>
>       if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
>        {
>          if (TREE_OPERAND_LENGTH (exp) > 0)
>            arg0 = TREE_OPERAND (exp, 0);
> -         if (TREE_CODE_CLASS (code) == tcc_comparison
> -             || TREE_CODE_CLASS (code) == tcc_unary
> -             || TREE_CODE_CLASS (code) == tcc_binary)
> -           arg0_type = TREE_TYPE (arg0);
>          if (TREE_CODE_CLASS (code) == tcc_binary
>              || TREE_CODE_CLASS (code) == tcc_comparison
>              || (TREE_CODE_CLASS (code) == tcc_expression
>                  && TREE_OPERAND_LENGTH (exp) > 1))
>            arg1 = TREE_OPERAND (exp, 1);
>        }
> +      if (arg0 == NULL_TREE)
> +       break;
>
> -      switch (code)
> -       {
> -       case TRUTH_NOT_EXPR:
> -         in_p = ! in_p, exp = arg0;
> -         continue;
> -
> -       case EQ_EXPR: case NE_EXPR:
> -       case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR:
> -         /* We can only do something if the range is testing for zero
> -            and if the second operand is an integer constant.  Note that
> -            saying something is "in" the range we make is done by
> -            complementing IN_P since it will set in the initial case of
> -            being not equal to zero; "out" is leaving it alone.  */
> -         if (low == 0 || high == 0
> -             || ! integer_zerop (low) || ! integer_zerop (high)
> -             || TREE_CODE (arg1) != INTEGER_CST)
> -           break;
> -
> -         switch (code)
> -           {
> -           case NE_EXPR:  /* - [c, c]  */
> -             low = high = arg1;
> -             break;
> -           case EQ_EXPR:  /* + [c, c]  */
> -             in_p = ! in_p, low = high = arg1;
> -             break;
> -           case GT_EXPR:  /* - [-, c] */
> -             low = 0, high = arg1;
> -             break;
> -           case GE_EXPR:  /* + [c, -] */
> -             in_p = ! in_p, low = arg1, high = 0;
> -             break;
> -           case LT_EXPR:  /* - [c, -] */
> -             low = arg1, high = 0;
> -             break;
> -           case LE_EXPR:  /* + [-, c] */
> -             in_p = ! in_p, low = 0, high = arg1;
> -             break;
> -           default:
> -             gcc_unreachable ();
> -           }
> -
> -         /* If this is an unsigned comparison, we also know that EXP is
> -            greater than or equal to zero.  We base the range tests we make
> -            on that fact, so we record it here so we can parse existing
> -            range tests.  We test arg0_type since often the return type
> -            of, e.g. EQ_EXPR, is boolean.  */
> -         if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0))
> -           {
> -             if (! merge_ranges (&n_in_p, &n_low, &n_high,
> -                                 in_p, low, high, 1,
> -                                 build_int_cst (arg0_type, 0),
> -                                 NULL_TREE))
> -               break;
> -
> -             in_p = n_in_p, low = n_low, high = n_high;
> -
> -             /* If the high bound is missing, but we have a nonzero low
> -                bound, reverse the range so it goes from zero to the low 
> bound
> -                minus 1.  */
> -             if (high == 0 && low && ! integer_zerop (low))
> -               {
> -                 in_p = ! in_p;
> -                 high = range_binop (MINUS_EXPR, NULL_TREE, low, 0,
> -                                     integer_one_node, 0);
> -                 low = build_int_cst (arg0_type, 0);
> -               }
> -           }
> -
> -         exp = arg0;
> -         continue;
> -
> -       case NEGATE_EXPR:
> -         /* (-x) IN [a,b] -> x in [-b, -a]  */
> -         n_low = range_binop (MINUS_EXPR, exp_type,
> -                              build_int_cst (exp_type, 0),
> -                              0, high, 1);
> -         n_high = range_binop (MINUS_EXPR, exp_type,
> -                               build_int_cst (exp_type, 0),
> -                               0, low, 0);
> -         if (n_high != 0 && TREE_OVERFLOW (n_high))
> -           break;
> -         goto normalize;
> -
> -       case BIT_NOT_EXPR:
> -         /* ~ X -> -X - 1  */
> -         exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0),
> -                           build_int_cst (exp_type, 1));
> -         continue;
> -
> -       case PLUS_EXPR:  case MINUS_EXPR:
> -         if (TREE_CODE (arg1) != INTEGER_CST)
> -           break;
> -
> -         /* If flag_wrapv and ARG0_TYPE is signed, then we cannot
> -            move a constant to the other side.  */
> -         if (!TYPE_UNSIGNED (arg0_type)
> -             && !TYPE_OVERFLOW_UNDEFINED (arg0_type))
> -           break;
> -
> -         /* If EXP is signed, any overflow in the computation is undefined,
> -            so we don't worry about it so long as our computations on
> -            the bounds don't overflow.  For unsigned, overflow is defined
> -            and this is exactly the right thing.  */
> -         n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> -                              arg0_type, low, 0, arg1, 0);
> -         n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR,
> -                               arg0_type, high, 1, arg1, 0);
> -         if ((n_low != 0 && TREE_OVERFLOW (n_low))
> -             || (n_high != 0 && TREE_OVERFLOW (n_high)))
> -           break;
> -
> -         if (TYPE_OVERFLOW_UNDEFINED (arg0_type))
> -           *strict_overflow_p = true;
> -
> -       normalize:
> -         /* Check for an unsigned range which has wrapped around the maximum
> -            value thus making n_high < n_low, and normalize it.  */
> -         if (n_low && n_high && tree_int_cst_lt (n_high, n_low))
> -           {
> -             low = range_binop (PLUS_EXPR, arg0_type, n_high, 0,
> -                                integer_one_node, 0);
> -             high = range_binop (MINUS_EXPR, arg0_type, n_low, 0,
> -                                 integer_one_node, 0);
> -
> -             /* If the range is of the form +/- [ x+1, x ], we won't
> -                be able to normalize it.  But then, it represents the
> -                whole range or the empty set, so make it
> -                +/- [ -, - ].  */
> -             if (tree_int_cst_equal (n_low, low)
> -                 && tree_int_cst_equal (n_high, high))
> -               low = high = 0;
> -             else
> -               in_p = ! in_p;
> -           }
> -         else
> -           low = n_low, high = n_high;
> -
> -         exp = arg0;
> -         continue;
> -
> -       CASE_CONVERT: case NON_LVALUE_EXPR:
> -         if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type))
> -           break;
> -
> -         if (! INTEGRAL_TYPE_P (arg0_type)
> -             || (low != 0 && ! int_fits_type_p (low, arg0_type))
> -             || (high != 0 && ! int_fits_type_p (high, arg0_type)))
> -           break;
> -
> -         n_low = low, n_high = high;
> -
> -         if (n_low != 0)
> -           n_low = fold_convert_loc (loc, arg0_type, n_low);
> -
> -         if (n_high != 0)
> -           n_high = fold_convert_loc (loc, arg0_type, n_high);
> -
> -
> -         /* If we're converting arg0 from an unsigned type, to exp,
> -            a signed type,  we will be doing the comparison as unsigned.
> -            The tests above have already verified that LOW and HIGH
> -            are both positive.
> -
> -            So we have to ensure that we will handle large unsigned
> -            values the same way that the current signed bounds treat
> -            negative values.  */
> -
> -         if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type))
> -           {
> -             tree high_positive;
> -             tree equiv_type;
> -             /* For fixed-point modes, we need to pass the saturating flag
> -                as the 2nd parameter.  */
> -             if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type)))
> -               equiv_type = lang_hooks.types.type_for_mode
> -                            (TYPE_MODE (arg0_type),
> -                             TYPE_SATURATING (arg0_type));
> -             else
> -               equiv_type = lang_hooks.types.type_for_mode
> -                            (TYPE_MODE (arg0_type), 1);
> -
> -             /* A range without an upper bound is, naturally, unbounded.
> -                Since convert would have cropped a very large value, use
> -                the max value for the destination type.  */
> -             high_positive
> -               = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type)
> -               : TYPE_MAX_VALUE (arg0_type);
> -
> -             if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type))
> -               high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type,
> -                                            fold_convert_loc (loc, arg0_type,
> -                                                              high_positive),
> -                                            build_int_cst (arg0_type, 1));
> -
> -             /* If the low bound is specified, "and" the range with the
> -                range for which the original unsigned value will be
> -                positive.  */
> -             if (low != 0)
> -               {
> -                 if (! merge_ranges (&n_in_p, &n_low, &n_high,
> -                                     1, n_low, n_high, 1,
> -                                     fold_convert_loc (loc, arg0_type,
> -                                                       integer_zero_node),
> -                                     high_positive))
> -                   break;
> -
> -                 in_p = (n_in_p == in_p);
> -               }
> -             else
> -               {
> -                 /* Otherwise, "or" the range with the range of the input
> -                    that will be interpreted as negative.  */
> -                 if (! merge_ranges (&n_in_p, &n_low, &n_high,
> -                                     0, n_low, n_high, 1,
> -                                     fold_convert_loc (loc, arg0_type,
> -                                                       integer_zero_node),
> -                                     high_positive))
> -                   break;
> -
> -                 in_p = (in_p != n_in_p);
> -               }
> -           }
> -
> -         exp = arg0;
> -         low = n_low, high = n_high;
> -         continue;
> -
> -       default:
> -         break;
> -       }
> -
> -      break;
> +      nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low,
> +                             &high, &in_p, strict_overflow_p);
> +      if (nexp == NULL_TREE)
> +       break;
> +      exp = nexp;
>     }
>
>   /* If EXP is a constant, we can evaluate whether this is true or false.  */
> --- gcc/tree.h.jj       2011-09-20 21:43:07.000000000 +0200
> +++ gcc/tree.h  2011-09-27 16:38:58.000000000 +0200
> @@ -5384,7 +5384,10 @@ extern unsigned int get_pointer_alignmen
>  extern tree fold_call_stmt (gimple, bool);
>  extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum 
> built_in_function);
>  extern tree make_range (tree, int *, tree *, tree *, bool *);
> +extern tree make_range_step (location_t, enum tree_code, tree, tree, tree,
> +                            tree *, tree *, int *, bool *);
>  extern tree build_range_check (location_t, tree, tree, int, tree, tree);
> +extern tree range_binop (enum tree_code, tree, tree, int, tree, int);
>  extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int,
>                          tree, tree);
>  extern void set_builtin_user_assembler_name (tree decl, const char *asmspec);
> --- gcc/Makefile.in.jj  2011-09-18 21:20:04.000000000 +0200
> +++ gcc/Makefile.in     2011-09-29 14:09:42.000000000 +0200
> @@ -2641,7 +2641,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c
>    $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \
>    tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \
>    $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \
> -   tree-pretty-print.h gimple-pretty-print.h
> +   tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H)
>  tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
>    $(TREE_H) $(TM_P_H) $(GGC_H) output.h \
>    $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \
> --- gcc/tree-ssa-reassoc.c.jj   2011-09-08 11:21:10.000000000 +0200
> +++ gcc/tree-ssa-reassoc.c      2011-09-29 13:33:00.000000000 +0200
> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3.
>  #include "flags.h"
>  #include "target.h"
>  #include "params.h"
> +#include "diagnostic-core.h"
>
>  /*  This is a simple global reassociation pass.  It is, in part, based
>     on the LLVM pass of the same name (They do some things more/less
> @@ -1568,6 +1569,422 @@ optimize_ops_list (enum tree_code opcode
>     optimize_ops_list (opcode, ops);
>  }
>
> +/* The following functions are subroutines to optimize_range_tests and allow
> +   it to try to change a logical combination of comparisons into a range
> +   test.
> +
> +   For example, both
> +       X == 2 || X == 5 || X == 3 || X == 4
> +   and
> +       X >= 2 && X <= 5
> +   are converted to
> +       (unsigned) (X - 2) <= 3
> +
> +   For more information see comments above fold_test_range in fold-const.c,
> +   this implementation is for GIMPLE.  */
> +
> +struct range_entry
> +{
> +  tree exp;
> +  tree low;
> +  tree high;
> +  bool in_p;
> +  bool strict_overflow_p;
> +  unsigned int idx, next;
> +};
> +
> +/* This is similar to make_range in fold-const.c, but on top of
> +   GIMPLE instead of trees.  */
> +
> +static void
> +init_range_entry (struct range_entry *r, tree exp)
> +{
> +  int in_p;
> +  tree low, high;
> +  bool is_bool, strict_overflow_p;
> +
> +  r->exp = NULL_TREE;
> +  r->in_p = false;
> +  r->strict_overflow_p = false;
> +  r->low = NULL_TREE;
> +  r->high = NULL_TREE;
> +  if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
> +    return;
> +
> +  /* Start with simply saying "EXP != 0" and then look at the code of EXP
> +     and see if we can refine the range.  Some of the cases below may not
> +     happen, but it doesn't seem worth worrying about this.  We "continue"
> +     the outer loop when we've changed something; otherwise we "break"
> +     the switch, which will "break" the while.  */
> +  low = build_int_cst (TREE_TYPE (exp), 0);
> +  high = low;
> +  in_p = 0;
> +  strict_overflow_p = false;
> +  is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;


Effective boolean are also TYPE_PRECISION () == 1 types.  Remember
we don't preserve conversions from BOOLEAN_TYPE to such types.

> +
> +  while (1)
> +    {
> +      gimple stmt;
> +      enum tree_code code;
> +      tree arg0, arg1, exp_type;
> +      tree nexp;
> +      location_t loc;
> +
> +      if (TREE_CODE (exp) != SSA_NAME)
> +       break;
> +
> +      stmt = SSA_NAME_DEF_STMT (exp);
> +      if (!is_gimple_assign (stmt))
> +       break;
> +
> +      code = gimple_assign_rhs_code (stmt);
> +      arg0 = gimple_assign_rhs1 (stmt);
> +      arg1 = gimple_assign_rhs2 (stmt);
> +      exp_type = TREE_TYPE (exp);
> +      loc = gimple_location (stmt);
> +      switch (code)
> +       {
> +       case BIT_NOT_EXPR:
> +         if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)

See above.

> +           {
> +             in_p = !in_p;
> +             exp = arg0;
> +             continue;
> +           }
> +         break;
> +       case SSA_NAME:
> +         exp = arg0;
> +         continue;
> +       CASE_CONVERT:
> +         is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE;

Likewise.  Though I wonder why !=?  Does it matter whether the extension
sign- or zero-extends?

> +         goto do_default;
> +       case EQ_EXPR:
> +       case NE_EXPR:
> +       case LT_EXPR:
> +       case LE_EXPR:
> +       case GE_EXPR:
> +       case GT_EXPR:
> +         is_bool = true;
> +         /* FALLTHRU */
> +       do_default:
> +       default:
> +         if (!is_bool)
> +           return;
> +         nexp = make_range_step (loc, code, arg0, arg1, exp_type,
> +                                 &low, &high, &in_p,
> +                                 &strict_overflow_p);
> +         if (nexp != NULL_TREE)
> +           {
> +             exp = nexp;
> +             gcc_assert (TREE_CODE (exp) == SSA_NAME);
> +             continue;
> +           }
> +         break;
> +       }
> +      break;
> +    }
> +  if (is_bool)
> +    {
> +      r->exp = exp;
> +      r->in_p = in_p;
> +      r->low = low;
> +      r->high = high;
> +      r->strict_overflow_p = strict_overflow_p;
> +    }
> +}
> +
> +/* Comparison function for qsort.  Sort entries
> +   without SSA_NAME exp first, then with SSA_NAMEs sorted
> +   by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
> +   by increasing ->low and if ->low is the same, by increasing
> +   ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
> +   maximum.  */
> +
> +static int
> +range_entry_cmp (const void *a, const void *b)
> +{
> +  const struct range_entry *p = (const struct range_entry *) a;
> +  const struct range_entry *q = (const struct range_entry *) b;
> +
> +  if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
> +    {
> +      if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
> +       {
> +         /* Group range_entries for the same SSA_NAME together.  */
> +         if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
> +           return -1;
> +         else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
> +           return 1;
> +         /* If ->low is different, NULL low goes first, then by
> +            ascending low.  */
> +         if (p->low != NULL_TREE)
> +           {
> +             if (q->low != NULL_TREE)
> +               {
> +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> +                                                p->low, 0, q->low, 0)))

that's just

tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low);
if (tem && integer_onep (tem))
  return -1;

which avoids building the LT_EXPR tree if it doesn't fold.  Similar below.
(ISTR some integer_onep () variant that handles a NULL argument ...)

> +                   return -1;
> +                 if (integer_onep (range_binop (GT_EXPR, integer_type_node,
> +                                                p->low, 0, q->low, 0)))
> +                   return 1;
> +               }
> +             else
> +               return 1;
> +           }
> +         else if (q->low != NULL_TREE)
> +           return -1;
> +         /* If ->high is different, NULL high goes last, before that by
> +            ascending high.  */
> +         if (p->high != NULL_TREE)
> +           {
> +             if (q->high != NULL_TREE)
> +               {
> +                 if (integer_onep (range_binop (LT_EXPR, integer_type_node,
> +                                                p->high, 0, q->high, 0)))
> +                   return -1;
> +                 if (integer_onep (range_binop (GT_EXPR, integer_type_node,
> +                                                p->high, 0, q->high, 0)))
> +                   return 1;
> +               }
> +             else
> +               return -1;
> +           }
> +         else if (p->high != NULL_TREE)
> +           return 1;
> +         /* If both ranges are the same, sort below by ascending idx.  */
> +       }
> +      else
> +       return 1;
> +    }
> +  else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
> +    return -1;
> +
> +  if (p->idx < q->idx)
> +    return -1;
> +  else
> +    {
> +      gcc_checking_assert (p->idx > q->idx);
> +      return 1;
> +    }
> +}
> +
> +/* Helper routine of optimize_range_test.
> +   [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
> +   RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
> +   OPCODE and OPS are arguments of optimize_range_tests.  Return
> +   true if the range merge has been successful.  */
> +
> +static bool
> +update_range_test (struct range_entry *range, struct range_entry *otherrange,
> +                  unsigned int count, enum tree_code opcode,
> +                  VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
> +                  tree low, tree high, bool strict_overflow_p)
> +{
> +  tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
> +  location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
> +  tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
> +  enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
> +  gimple_stmt_iterator gsi;
> +
> +  if (tem == NULL_TREE)
> +    return false;
> +
> +  if (strict_overflow_p && issue_strict_overflow_warning (wc))
> +    warning_at (loc, OPT_Wstrict_overflow,
> +               "assuming signed overflow does not occur "
> +               "when simplifying range test");
> +
> +  if (dump_file && (dump_flags & TDF_DETAILS))
> +    {
> +      struct range_entry *r;
> +      fprintf (dump_file, "Optimizing range tests ");
> +      print_generic_expr (dump_file, range->exp, 0);
> +      fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
> +      print_generic_expr (dump_file, range->low, 0);
> +      fprintf (dump_file, ", ");
> +      print_generic_expr (dump_file, range->high, 0);
> +      fprintf (dump_file, "]");
> +      for (r = otherrange; r < otherrange + count; r++)
> +       {
> +         fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
> +         print_generic_expr (dump_file, r->low, 0);
> +         fprintf (dump_file, ", ");
> +         print_generic_expr (dump_file, r->high, 0);
> +         fprintf (dump_file, "]");
> +       }
> +      fprintf (dump_file, "\n into ");
> +      print_generic_expr (dump_file, tem, 0);
> +      fprintf (dump_file, "\n");
> +    }
> +
> +  if (opcode == BIT_IOR_EXPR)
> +    tem = invert_truthvalue_loc (loc, tem);
> +
> +  tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
> +  gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
> +  tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
> +                                 GSI_SAME_STMT);
> +
> +  VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
> +  range->exp = exp;
> +  range->low = low;
> +  range->high = high;
> +  range->in_p = in_p;
> +  range->strict_overflow_p = false;
> +
> +  for (range = otherrange; range < otherrange + count; range++)
> +    {
> +      VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
> +      range->exp = NULL_TREE;
> +    }
> +  return true;
> +}
> +
> +/* Optimize range tests, similarly how fold_range_test optimizes
> +   it on trees.  The tree code for the binary
> +   operation between all the operands is OPCODE.  */
> +
> +static void
> +optimize_range_tests (enum tree_code opcode,
> +                     VEC (operand_entry_t, heap) **ops)
> +{
> +  unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
> +  operand_entry_t oe;
> +  struct range_entry *ranges;
> +  bool any_changes = false;
> +
> +  if (length == 1)
> +    return;
> +
> +  ranges = XNEWVEC (struct range_entry, length);
> +  for (i = 0; i < length; i++)
> +    {
> +      ranges[i].idx = i;
> +      init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, 
> i)->op);
> +      /* For | invert it now, we will invert it again before emitting
> +        the optimized expression.  */
> +      if (opcode == BIT_IOR_EXPR)
> +       ranges[i].in_p = !ranges[i].in_p;
> +    }
> +
> +  qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
> +  for (i = 0; i < length; i++)
> +    if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
> +      break;
> +
> +  /* Try to merge ranges.  */
> +  for (first = i; i < length; i++)
> +    {
> +      tree low = ranges[i].low;
> +      tree high = ranges[i].high;
> +      int in_p = ranges[i].in_p;
> +      bool strict_overflow_p = ranges[i].strict_overflow_p;
> +
> +      for (j = i + 1; j < length; j++)
> +       {

That looks quadratic - do we want to limit this with a --param, simply
partitioning the array into quadratic chunks?

> +         if (ranges[i].exp != ranges[j].exp)
> +           break;

Or isn't it too bad because of this check?

> +         if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
> +                            ranges[j].in_p, ranges[j].low, ranges[j].high))
> +           break;

And this early out?  I suppose some comment on why together with
the sorting this is of limited complexity would help.

> +         strict_overflow_p |= ranges[j].strict_overflow_p;
> +       }
> +
> +      if (j > i + 1
> +         && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
> +                               ops, ranges[i].exp, in_p, low, high,
> +                               strict_overflow_p))
> +       {
> +         i = j - 1;
> +         any_changes = true;
> +       }
> +    }
> +
> +  /* Optimize X == CST1 || X == CST2
> +     if popcount (CST1 ^ CST2) == 1 into
> +     (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
> +     Similarly for ranges.  E.g.
> +     X != 2 && X != 3 && X != 10 && X != 11
> +     will be transformed by the above loop into
> +     (X - 2U) <= 1U && (X - 10U) <= 1U
> +     and this loop can transform that into
> +     ((X & ~8) - 2U) <= 1U.  */
> +  for (i = first; i < length; i++)
> +    {
> +      tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
> +
> +      if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
> +       continue;
> +      type = TREE_TYPE (ranges[i].exp);
> +      if (!INTEGRAL_TYPE_P (type))
> +       continue;
> +      lowi = ranges[i].low;
> +      if (lowi == NULL_TREE)
> +       lowi = TYPE_MIN_VALUE (type);
> +      highi = ranges[i].high;
> +      if (highi == NULL_TREE)
> +       continue;
> +      for (j = i + 1; j < length && j < i + 64; j++)
> +       {
> +         if (ranges[j].exp == NULL_TREE)
> +           continue;
> +         if (ranges[i].exp != ranges[j].exp)
> +           break;
> +         if (ranges[j].in_p)
> +           continue;
> +         lowj = ranges[j].low;
> +         if (lowj == NULL_TREE)
> +           continue;
> +         highj = ranges[j].high;
> +         if (highj == NULL_TREE)
> +           highj = TYPE_MAX_VALUE (type);
> +         if (!integer_onep (range_binop (GT_EXPR, integer_type_node,
> +                                         lowj, 0, highi, 0)))
> +           continue;

See above.

> +         lowxor = range_binop (BIT_XOR_EXPR, NULL_TREE, lowi, 0, lowj, 0);
> +         if (lowxor == NULL_TREE)
> +           continue;

Likewise.

> +         gcc_checking_assert (!integer_zerop (lowxor));
> +         tem = range_binop (MINUS_EXPR, NULL_TREE, lowxor, 0,
> +                            build_int_cst (type, 1), 0);
> +         tem = range_binop (BIT_AND_EXPR, NULL_TREE, lowxor, 0, tem, 0);
> +         if (!integer_zerop (tem))
> +           continue;
> +         highxor = range_binop (BIT_XOR_EXPR, NULL_TREE, highi, 0, highj, 0);
> +         if (!tree_int_cst_equal (lowxor, highxor))
> +           continue;
> +         tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
> +         exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
> +         lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
> +         highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
> +         if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
> +                                ranges[i].in_p, lowj, highj,
> +                                ranges[i].strict_overflow_p
> +                                || ranges[j].strict_overflow_p))
> +           {
> +             any_changes = true;
> +             break;
> +           }
> +       }
> +    }
> +
> +  if (any_changes)
> +    {
> +      j = 0;
> +      FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
> +       {
> +         if (oe->op == error_mark_node)
> +           continue;
> +         else if (i != j)
> +           VEC_replace (operand_entry_t, *ops, j, oe);
> +         j++;
> +       }
> +      VEC_truncate (operand_entry_t, *ops, j);
> +    }
> +
> +  XDELETEVEC (ranges);
> +}
> +
>  /* Return true if OPERAND is defined by a PHI node which uses the LHS
>    of STMT in it's operands.  This is also known as a "destructive
>    update" operation.  */
> @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb)
>                  optimize_ops_list (rhs_code, &ops);
>                }
>
> +             if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)

I think we want to check that the type is boolean here, so we don't setup
the machinery needlessly?

Otherwise it looks like a nice improvement.

Thanks,
Richard.

> +               optimize_range_tests (rhs_code, &ops);
> +
>              if (VEC_length (operand_entry_t, ops) == 1)
>                {
>                  if (dump_file && (dump_flags & TDF_DETAILS))
> --- gcc/testsuite/gcc.dg/pr46309.c.jj   2011-09-29 14:03:28.000000000 +0200
> +++ gcc/testsuite/gcc.dg/pr46309.c      2011-09-29 14:08:32.000000000 +0200
> @@ -0,0 +1,66 @@
> +/* PR tree-optimization/46309 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-reassoc-details" } */
> +
> +int
> +f1 (int a)
> +{
> +  int v1 = (a == 3);
> +  int v2 = (a == 1);
> +  int v3 = (a == 4);
> +  int v4 = (a == 2);
> +  return v1 || v2 || v3 || v4;
> +}
> +
> +int
> +f2 (int a)
> +{
> +  int v1 = (a == 1);
> +  int v2 = (a == 2);
> +  int v3 = (a == 3);
> +  int v4 = (a == 4);
> +  return v1 || v2 || v3 || v4;
> +}
> +
> +int
> +f3 (int a)
> +{
> +  int v1 = (a == 3);
> +  int v2 = (a == 1);
> +  return v1 || v2;
> +}
> +
> +int
> +f4 (int a)
> +{
> +  int v1 = (a == 1);
> +  int v2 = (a == 2);
> +  return v1 || v2;
> +}
> +
> +int
> +f5 (unsigned int a)
> +{
> +  int v1 = (a <= 31);
> +  int v2 = (a >= 64 && a <= 95);
> +  return v1 || v2;
> +}
> +
> +int
> +f6 (unsigned int a)
> +{
> +  int v1 = (a <= 31);
> +  int v2 = (a >= 64 && a <= 95);
> +  int v3 = (a >= 128 && a <= 159);
> +  int v4 = (a >= 192 && a <= 223);
> +  return v1 || v2 || v3 || v4;
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
> -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
> -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
> -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
> -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. 
> -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */
> +/* { dg-final { scan-tree-dump-times "Optimizing range tests 
> D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } 
> */
> +/* { dg-final { cleanup-tree-dump "reassoc1" } } */
> +/* { dg-final { cleanup-tree-dump "reassoc2" } } */
>
>        Jakub
>

Re: [PATCH] fold_range_test like optimization on GIMPLE (PR tree-optimization/46309)

Reply via email to