On Thu, Sep 29, 2011 at 11:15 PM, Jakub Jelinek <ja...@redhat.com> wrote: > Hi! > > This patch implements a fold_range_test like optimization on GIMPLE, inside > tree-ssa-reassoc and tweaks fold-const.c so that most of the code can be > shared in between the two. > The advantage of the reassoc optimization is that it doesn't attempt to > merge just 2 ranges at a time, instead it sorts the ranges for the same > SSA_NAME and thus can optimize even cases where source code doesn't have > the numbers in a range test in increasing or decreasing order (and also > can optimize things that were in multiple statements in the source). > Additionally, it optimizes cases like > x == 1 || x == 3 > into (x & ~2) == 1. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? > > 2011-09-27 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/46309 > * fold-const.c (make_range, merge_ranges): Remove prototypes. > (range_binop): Likewise, no longer static. > (make_range_step): New function. > (make_range): Use it. > * tree.h (make_range_step, range_binop): New prototypes. > * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H). > * tree-ssa-reassoc.c: Include diagnostic-core.h. > (struct range_entry): New type. > (init_range_entry, range_entry_cmp, update_range_test, > optimize_range_tests): New functions. > (reassociate_bb): Call optimize_range_tests. > > * gcc.dg/pr46309.c: New test. > > --- gcc/fold-const.c.jj 2011-09-05 12:28:53.000000000 +0200 > +++ gcc/fold-const.c 2011-09-27 16:39:09.000000000 +0200 > @@ -112,12 +112,8 @@ static tree decode_field_reference (loca > static int all_ones_mask_p (const_tree, int); > static tree sign_bit_p (tree, const_tree); > static int simple_operand_p (const_tree); > -static tree range_binop (enum tree_code, tree, tree, int, tree, int); > static tree range_predecessor (tree); > static tree range_successor (tree); > -extern tree make_range (tree, int *, tree *, tree *, bool *); > -extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, > - tree, tree); > static tree fold_range_test (location_t, enum tree_code, tree, tree, tree); > static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, > tree); > static tree unextend (tree, int, int, tree); > @@ -3731,7 +3727,7 @@ simple_operand_p (const_tree exp) > must be specified for a comparison. ARG1 will be converted to ARG0's > type if both are specified. */ > > -static tree > +tree > range_binop (enum tree_code code, tree type, tree arg0, int upper0_p, > tree arg1, int upper1_p) > { > @@ -3790,6 +3786,255 @@ range_binop (enum tree_code code, tree t > return constant_boolean_node (result, type); > } > > +/* Helper routine for make_range. Perform one step for it, return > + new expression if the loop should continue or NULL_TREE if it should > + stop. */ > + > +tree > +make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1, > + tree exp_type, tree *p_low, tree *p_high, int *p_in_p, > + bool *strict_overflow_p) > +{ > + tree arg0_type = TREE_TYPE (arg0); > + tree n_low, n_high, low = *p_low, high = *p_high; > + int in_p = *p_in_p, n_in_p; > + > + switch (code) > + { > + case TRUTH_NOT_EXPR: > + *p_in_p = ! in_p; > + return arg0; > + > + case EQ_EXPR: case NE_EXPR: > + case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR: > + /* We can only do something if the range is testing for zero > + and if the second operand is an integer constant. Note that > + saying something is "in" the range we make is done by > + complementing IN_P since it will set in the initial case of > + being not equal to zero; "out" is leaving it alone. */ > + if (low == NULL_TREE || high == NULL_TREE > + || ! integer_zerop (low) || ! integer_zerop (high) > + || TREE_CODE (arg1) != INTEGER_CST) > + return NULL_TREE; > + > + switch (code) > + { > + case NE_EXPR: /* - [c, c] */ > + low = high = arg1; > + break; > + case EQ_EXPR: /* + [c, c] */ > + in_p = ! in_p, low = high = arg1; > + break; > + case GT_EXPR: /* - [-, c] */ > + low = 0, high = arg1; > + break; > + case GE_EXPR: /* + [c, -] */ > + in_p = ! in_p, low = arg1, high = 0; > + break; > + case LT_EXPR: /* - [c, -] */ > + low = arg1, high = 0; > + break; > + case LE_EXPR: /* + [-, c] */ > + in_p = ! in_p, low = 0, high = arg1; > + break; > + default: > + gcc_unreachable (); > + } > + > + /* If this is an unsigned comparison, we also know that EXP is > + greater than or equal to zero. We base the range tests we make > + on that fact, so we record it here so we can parse existing > + range tests. We test arg0_type since often the return type > + of, e.g. EQ_EXPR, is boolean. */ > + if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0)) > + { > + if (! merge_ranges (&n_in_p, &n_low, &n_high, > + in_p, low, high, 1, > + build_int_cst (arg0_type, 0), > + NULL_TREE)) > + return NULL_TREE; > + > + in_p = n_in_p, low = n_low, high = n_high; > + > + /* If the high bound is missing, but we have a nonzero low > + bound, reverse the range so it goes from zero to the low bound > + minus 1. */ > + if (high == 0 && low && ! integer_zerop (low)) > + { > + in_p = ! in_p; > + high = range_binop (MINUS_EXPR, NULL_TREE, low, 0, > + integer_one_node, 0); > + low = build_int_cst (arg0_type, 0); > + } > + } > + > + *p_low = low; > + *p_high = high; > + *p_in_p = in_p; > + return arg0; > + > + case NEGATE_EXPR: > + /* (-x) IN [a,b] -> x in [-b, -a] */ > + n_low = range_binop (MINUS_EXPR, exp_type, > + build_int_cst (exp_type, 0), > + 0, high, 1); > + n_high = range_binop (MINUS_EXPR, exp_type, > + build_int_cst (exp_type, 0), > + 0, low, 0); > + if (n_high != 0 && TREE_OVERFLOW (n_high)) > + return NULL_TREE; > + goto normalize; > + > + case BIT_NOT_EXPR: > + /* ~ X -> -X - 1 */ > + return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0), > + build_int_cst (exp_type, 1)); > + > + case PLUS_EXPR: > + case MINUS_EXPR: > + if (TREE_CODE (arg1) != INTEGER_CST) > + return NULL_TREE; > + > + /* If flag_wrapv and ARG0_TYPE is signed, then we cannot > + move a constant to the other side. */ > + if (!TYPE_UNSIGNED (arg0_type) > + && !TYPE_OVERFLOW_UNDEFINED (arg0_type)) > + return NULL_TREE; > + > + /* If EXP is signed, any overflow in the computation is undefined, > + so we don't worry about it so long as our computations on > + the bounds don't overflow. For unsigned, overflow is defined > + and this is exactly the right thing. */ > + n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, > + arg0_type, low, 0, arg1, 0); > + n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, > + arg0_type, high, 1, arg1, 0); > + if ((n_low != 0 && TREE_OVERFLOW (n_low)) > + || (n_high != 0 && TREE_OVERFLOW (n_high))) > + return NULL_TREE; > + > + if (TYPE_OVERFLOW_UNDEFINED (arg0_type)) > + *strict_overflow_p = true; > + > + normalize: > + /* Check for an unsigned range which has wrapped around the maximum > + value thus making n_high < n_low, and normalize it. */ > + if (n_low && n_high && tree_int_cst_lt (n_high, n_low)) > + { > + low = range_binop (PLUS_EXPR, arg0_type, n_high, 0, > + integer_one_node, 0); > + high = range_binop (MINUS_EXPR, arg0_type, n_low, 0, > + integer_one_node, 0); > + > + /* If the range is of the form +/- [ x+1, x ], we won't > + be able to normalize it. But then, it represents the > + whole range or the empty set, so make it > + +/- [ -, - ]. */ > + if (tree_int_cst_equal (n_low, low) > + && tree_int_cst_equal (n_high, high)) > + low = high = 0; > + else > + in_p = ! in_p; > + } > + else > + low = n_low, high = n_high; > + > + *p_low = low; > + *p_high = high; > + *p_in_p = in_p; > + return arg0; > + > + CASE_CONVERT: > + case NON_LVALUE_EXPR: > + if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type)) > + return NULL_TREE; > + > + if (! INTEGRAL_TYPE_P (arg0_type) > + || (low != 0 && ! int_fits_type_p (low, arg0_type)) > + || (high != 0 && ! int_fits_type_p (high, arg0_type))) > + return NULL_TREE; > + > + n_low = low, n_high = high; > + > + if (n_low != 0) > + n_low = fold_convert_loc (loc, arg0_type, n_low); > + > + if (n_high != 0) > + n_high = fold_convert_loc (loc, arg0_type, n_high); > + > + /* If we're converting arg0 from an unsigned type, to exp, > + a signed type, we will be doing the comparison as unsigned. > + The tests above have already verified that LOW and HIGH > + are both positive. > + > + So we have to ensure that we will handle large unsigned > + values the same way that the current signed bounds treat > + negative values. */ > + > + if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type)) > + { > + tree high_positive; > + tree equiv_type; > + /* For fixed-point modes, we need to pass the saturating flag > + as the 2nd parameter. */ > + if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type))) > + equiv_type > + = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), > + TYPE_SATURATING (arg0_type)); > + else > + equiv_type > + = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1); > + > + /* A range without an upper bound is, naturally, unbounded. > + Since convert would have cropped a very large value, use > + the max value for the destination type. */ > + high_positive > + = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type) > + : TYPE_MAX_VALUE (arg0_type); > + > + if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type)) > + high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type, > + fold_convert_loc (loc, arg0_type, > + high_positive), > + build_int_cst (arg0_type, 1)); > + > + /* If the low bound is specified, "and" the range with the > + range for which the original unsigned value will be > + positive. */ > + if (low != 0) > + { > + if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high, > + 1, fold_convert_loc (loc, arg0_type, > + integer_zero_node), > + high_positive)) > + return NULL_TREE; > + > + in_p = (n_in_p == in_p); > + } > + else > + { > + /* Otherwise, "or" the range with the range of the input > + that will be interpreted as negative. */ > + if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high, > + 1, fold_convert_loc (loc, arg0_type, > + integer_zero_node), > + high_positive)) > + return NULL_TREE; > + > + in_p = (in_p != n_in_p); > + } > + } > + > + *p_low = n_low; > + *p_high = n_high; > + *p_in_p = in_p; > + return arg0; > + > + default: > + return NULL_TREE; > + } > +} > + > /* Given EXP, a logical expression, set the range it is testing into > variables denoted by PIN_P, PLOW, and PHIGH. Return the expression > actually being tested. *PLOW and *PHIGH will be made of the same > @@ -3804,10 +4049,10 @@ make_range (tree exp, int *pin_p, tree * > bool *strict_overflow_p) > { > enum tree_code code; > - tree arg0 = NULL_TREE, arg1 = NULL_TREE; > - tree exp_type = NULL_TREE, arg0_type = NULL_TREE; > - int in_p, n_in_p; > - tree low, high, n_low, n_high; > + tree arg0, arg1 = NULL_TREE; > + tree exp_type, nexp; > + int in_p; > + tree low, high; > location_t loc = EXPR_LOCATION (exp); > > /* Start with simply saying "EXP != 0" and then look at the code of EXP > @@ -3823,255 +4068,26 @@ make_range (tree exp, int *pin_p, tree * > { > code = TREE_CODE (exp); > exp_type = TREE_TYPE (exp); > + arg0 = NULL_TREE; > > if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) > { > if (TREE_OPERAND_LENGTH (exp) > 0) > arg0 = TREE_OPERAND (exp, 0); > - if (TREE_CODE_CLASS (code) == tcc_comparison > - || TREE_CODE_CLASS (code) == tcc_unary > - || TREE_CODE_CLASS (code) == tcc_binary) > - arg0_type = TREE_TYPE (arg0); > if (TREE_CODE_CLASS (code) == tcc_binary > || TREE_CODE_CLASS (code) == tcc_comparison > || (TREE_CODE_CLASS (code) == tcc_expression > && TREE_OPERAND_LENGTH (exp) > 1)) > arg1 = TREE_OPERAND (exp, 1); > } > + if (arg0 == NULL_TREE) > + break; > > - switch (code) > - { > - case TRUTH_NOT_EXPR: > - in_p = ! in_p, exp = arg0; > - continue; > - > - case EQ_EXPR: case NE_EXPR: > - case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR: > - /* We can only do something if the range is testing for zero > - and if the second operand is an integer constant. Note that > - saying something is "in" the range we make is done by > - complementing IN_P since it will set in the initial case of > - being not equal to zero; "out" is leaving it alone. */ > - if (low == 0 || high == 0 > - || ! integer_zerop (low) || ! integer_zerop (high) > - || TREE_CODE (arg1) != INTEGER_CST) > - break; > - > - switch (code) > - { > - case NE_EXPR: /* - [c, c] */ > - low = high = arg1; > - break; > - case EQ_EXPR: /* + [c, c] */ > - in_p = ! in_p, low = high = arg1; > - break; > - case GT_EXPR: /* - [-, c] */ > - low = 0, high = arg1; > - break; > - case GE_EXPR: /* + [c, -] */ > - in_p = ! in_p, low = arg1, high = 0; > - break; > - case LT_EXPR: /* - [c, -] */ > - low = arg1, high = 0; > - break; > - case LE_EXPR: /* + [-, c] */ > - in_p = ! in_p, low = 0, high = arg1; > - break; > - default: > - gcc_unreachable (); > - } > - > - /* If this is an unsigned comparison, we also know that EXP is > - greater than or equal to zero. We base the range tests we make > - on that fact, so we record it here so we can parse existing > - range tests. We test arg0_type since often the return type > - of, e.g. EQ_EXPR, is boolean. */ > - if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0)) > - { > - if (! merge_ranges (&n_in_p, &n_low, &n_high, > - in_p, low, high, 1, > - build_int_cst (arg0_type, 0), > - NULL_TREE)) > - break; > - > - in_p = n_in_p, low = n_low, high = n_high; > - > - /* If the high bound is missing, but we have a nonzero low > - bound, reverse the range so it goes from zero to the low > bound > - minus 1. */ > - if (high == 0 && low && ! integer_zerop (low)) > - { > - in_p = ! in_p; > - high = range_binop (MINUS_EXPR, NULL_TREE, low, 0, > - integer_one_node, 0); > - low = build_int_cst (arg0_type, 0); > - } > - } > - > - exp = arg0; > - continue; > - > - case NEGATE_EXPR: > - /* (-x) IN [a,b] -> x in [-b, -a] */ > - n_low = range_binop (MINUS_EXPR, exp_type, > - build_int_cst (exp_type, 0), > - 0, high, 1); > - n_high = range_binop (MINUS_EXPR, exp_type, > - build_int_cst (exp_type, 0), > - 0, low, 0); > - if (n_high != 0 && TREE_OVERFLOW (n_high)) > - break; > - goto normalize; > - > - case BIT_NOT_EXPR: > - /* ~ X -> -X - 1 */ > - exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0), > - build_int_cst (exp_type, 1)); > - continue; > - > - case PLUS_EXPR: case MINUS_EXPR: > - if (TREE_CODE (arg1) != INTEGER_CST) > - break; > - > - /* If flag_wrapv and ARG0_TYPE is signed, then we cannot > - move a constant to the other side. */ > - if (!TYPE_UNSIGNED (arg0_type) > - && !TYPE_OVERFLOW_UNDEFINED (arg0_type)) > - break; > - > - /* If EXP is signed, any overflow in the computation is undefined, > - so we don't worry about it so long as our computations on > - the bounds don't overflow. For unsigned, overflow is defined > - and this is exactly the right thing. */ > - n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, > - arg0_type, low, 0, arg1, 0); > - n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, > - arg0_type, high, 1, arg1, 0); > - if ((n_low != 0 && TREE_OVERFLOW (n_low)) > - || (n_high != 0 && TREE_OVERFLOW (n_high))) > - break; > - > - if (TYPE_OVERFLOW_UNDEFINED (arg0_type)) > - *strict_overflow_p = true; > - > - normalize: > - /* Check for an unsigned range which has wrapped around the maximum > - value thus making n_high < n_low, and normalize it. */ > - if (n_low && n_high && tree_int_cst_lt (n_high, n_low)) > - { > - low = range_binop (PLUS_EXPR, arg0_type, n_high, 0, > - integer_one_node, 0); > - high = range_binop (MINUS_EXPR, arg0_type, n_low, 0, > - integer_one_node, 0); > - > - /* If the range is of the form +/- [ x+1, x ], we won't > - be able to normalize it. But then, it represents the > - whole range or the empty set, so make it > - +/- [ -, - ]. */ > - if (tree_int_cst_equal (n_low, low) > - && tree_int_cst_equal (n_high, high)) > - low = high = 0; > - else > - in_p = ! in_p; > - } > - else > - low = n_low, high = n_high; > - > - exp = arg0; > - continue; > - > - CASE_CONVERT: case NON_LVALUE_EXPR: > - if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type)) > - break; > - > - if (! INTEGRAL_TYPE_P (arg0_type) > - || (low != 0 && ! int_fits_type_p (low, arg0_type)) > - || (high != 0 && ! int_fits_type_p (high, arg0_type))) > - break; > - > - n_low = low, n_high = high; > - > - if (n_low != 0) > - n_low = fold_convert_loc (loc, arg0_type, n_low); > - > - if (n_high != 0) > - n_high = fold_convert_loc (loc, arg0_type, n_high); > - > - > - /* If we're converting arg0 from an unsigned type, to exp, > - a signed type, we will be doing the comparison as unsigned. > - The tests above have already verified that LOW and HIGH > - are both positive. > - > - So we have to ensure that we will handle large unsigned > - values the same way that the current signed bounds treat > - negative values. */ > - > - if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type)) > - { > - tree high_positive; > - tree equiv_type; > - /* For fixed-point modes, we need to pass the saturating flag > - as the 2nd parameter. */ > - if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type))) > - equiv_type = lang_hooks.types.type_for_mode > - (TYPE_MODE (arg0_type), > - TYPE_SATURATING (arg0_type)); > - else > - equiv_type = lang_hooks.types.type_for_mode > - (TYPE_MODE (arg0_type), 1); > - > - /* A range without an upper bound is, naturally, unbounded. > - Since convert would have cropped a very large value, use > - the max value for the destination type. */ > - high_positive > - = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type) > - : TYPE_MAX_VALUE (arg0_type); > - > - if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type)) > - high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type, > - fold_convert_loc (loc, arg0_type, > - high_positive), > - build_int_cst (arg0_type, 1)); > - > - /* If the low bound is specified, "and" the range with the > - range for which the original unsigned value will be > - positive. */ > - if (low != 0) > - { > - if (! merge_ranges (&n_in_p, &n_low, &n_high, > - 1, n_low, n_high, 1, > - fold_convert_loc (loc, arg0_type, > - integer_zero_node), > - high_positive)) > - break; > - > - in_p = (n_in_p == in_p); > - } > - else > - { > - /* Otherwise, "or" the range with the range of the input > - that will be interpreted as negative. */ > - if (! merge_ranges (&n_in_p, &n_low, &n_high, > - 0, n_low, n_high, 1, > - fold_convert_loc (loc, arg0_type, > - integer_zero_node), > - high_positive)) > - break; > - > - in_p = (in_p != n_in_p); > - } > - } > - > - exp = arg0; > - low = n_low, high = n_high; > - continue; > - > - default: > - break; > - } > - > - break; > + nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low, > + &high, &in_p, strict_overflow_p); > + if (nexp == NULL_TREE) > + break; > + exp = nexp; > } > > /* If EXP is a constant, we can evaluate whether this is true or false. */ > --- gcc/tree.h.jj 2011-09-20 21:43:07.000000000 +0200 > +++ gcc/tree.h 2011-09-27 16:38:58.000000000 +0200 > @@ -5384,7 +5384,10 @@ extern unsigned int get_pointer_alignmen > extern tree fold_call_stmt (gimple, bool); > extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum > built_in_function); > extern tree make_range (tree, int *, tree *, tree *, bool *); > +extern tree make_range_step (location_t, enum tree_code, tree, tree, tree, > + tree *, tree *, int *, bool *); > extern tree build_range_check (location_t, tree, tree, int, tree, tree); > +extern tree range_binop (enum tree_code, tree, tree, int, tree, int); > extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, > tree, tree); > extern void set_builtin_user_assembler_name (tree decl, const char *asmspec); > --- gcc/Makefile.in.jj 2011-09-18 21:20:04.000000000 +0200 > +++ gcc/Makefile.in 2011-09-29 14:09:42.000000000 +0200 > @@ -2641,7 +2641,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c > $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \ > tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \ > $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \ > - tree-pretty-print.h gimple-pretty-print.h > + tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H) > tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ > $(TREE_H) $(TM_P_H) $(GGC_H) output.h \ > $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \ > --- gcc/tree-ssa-reassoc.c.jj 2011-09-08 11:21:10.000000000 +0200 > +++ gcc/tree-ssa-reassoc.c 2011-09-29 13:33:00.000000000 +0200 > @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. > #include "flags.h" > #include "target.h" > #include "params.h" > +#include "diagnostic-core.h" > > /* This is a simple global reassociation pass. It is, in part, based > on the LLVM pass of the same name (They do some things more/less > @@ -1568,6 +1569,422 @@ optimize_ops_list (enum tree_code opcode > optimize_ops_list (opcode, ops); > } > > +/* The following functions are subroutines to optimize_range_tests and allow > + it to try to change a logical combination of comparisons into a range > + test. > + > + For example, both > + X == 2 || X == 5 || X == 3 || X == 4 > + and > + X >= 2 && X <= 5 > + are converted to > + (unsigned) (X - 2) <= 3 > + > + For more information see comments above fold_test_range in fold-const.c, > + this implementation is for GIMPLE. */ > + > +struct range_entry > +{ > + tree exp; > + tree low; > + tree high; > + bool in_p; > + bool strict_overflow_p; > + unsigned int idx, next; > +}; > + > +/* This is similar to make_range in fold-const.c, but on top of > + GIMPLE instead of trees. */ > + > +static void > +init_range_entry (struct range_entry *r, tree exp) > +{ > + int in_p; > + tree low, high; > + bool is_bool, strict_overflow_p; > + > + r->exp = NULL_TREE; > + r->in_p = false; > + r->strict_overflow_p = false; > + r->low = NULL_TREE; > + r->high = NULL_TREE; > + if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))) > + return; > + > + /* Start with simply saying "EXP != 0" and then look at the code of EXP > + and see if we can refine the range. Some of the cases below may not > + happen, but it doesn't seem worth worrying about this. We "continue" > + the outer loop when we've changed something; otherwise we "break" > + the switch, which will "break" the while. */ > + low = build_int_cst (TREE_TYPE (exp), 0); > + high = low; > + in_p = 0; > + strict_overflow_p = false; > + is_bool = TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE;
Effective boolean are also TYPE_PRECISION () == 1 types. Remember we don't preserve conversions from BOOLEAN_TYPE to such types. > + > + while (1) > + { > + gimple stmt; > + enum tree_code code; > + tree arg0, arg1, exp_type; > + tree nexp; > + location_t loc; > + > + if (TREE_CODE (exp) != SSA_NAME) > + break; > + > + stmt = SSA_NAME_DEF_STMT (exp); > + if (!is_gimple_assign (stmt)) > + break; > + > + code = gimple_assign_rhs_code (stmt); > + arg0 = gimple_assign_rhs1 (stmt); > + arg1 = gimple_assign_rhs2 (stmt); > + exp_type = TREE_TYPE (exp); > + loc = gimple_location (stmt); > + switch (code) > + { > + case BIT_NOT_EXPR: > + if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) See above. > + { > + in_p = !in_p; > + exp = arg0; > + continue; > + } > + break; > + case SSA_NAME: > + exp = arg0; > + continue; > + CASE_CONVERT: > + is_bool |= TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE; Likewise. Though I wonder why !=? Does it matter whether the extension sign- or zero-extends? > + goto do_default; > + case EQ_EXPR: > + case NE_EXPR: > + case LT_EXPR: > + case LE_EXPR: > + case GE_EXPR: > + case GT_EXPR: > + is_bool = true; > + /* FALLTHRU */ > + do_default: > + default: > + if (!is_bool) > + return; > + nexp = make_range_step (loc, code, arg0, arg1, exp_type, > + &low, &high, &in_p, > + &strict_overflow_p); > + if (nexp != NULL_TREE) > + { > + exp = nexp; > + gcc_assert (TREE_CODE (exp) == SSA_NAME); > + continue; > + } > + break; > + } > + break; > + } > + if (is_bool) > + { > + r->exp = exp; > + r->in_p = in_p; > + r->low = low; > + r->high = high; > + r->strict_overflow_p = strict_overflow_p; > + } > +} > + > +/* Comparison function for qsort. Sort entries > + without SSA_NAME exp first, then with SSA_NAMEs sorted > + by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs > + by increasing ->low and if ->low is the same, by increasing > + ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE > + maximum. */ > + > +static int > +range_entry_cmp (const void *a, const void *b) > +{ > + const struct range_entry *p = (const struct range_entry *) a; > + const struct range_entry *q = (const struct range_entry *) b; > + > + if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) > + { > + if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) > + { > + /* Group range_entries for the same SSA_NAME together. */ > + if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) > + return -1; > + else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) > + return 1; > + /* If ->low is different, NULL low goes first, then by > + ascending low. */ > + if (p->low != NULL_TREE) > + { > + if (q->low != NULL_TREE) > + { > + if (integer_onep (range_binop (LT_EXPR, integer_type_node, > + p->low, 0, q->low, 0))) that's just tem = fold_binary (LT_EXPR, boolean_type_node, p->low, q->low); if (tem && integer_onep (tem)) return -1; which avoids building the LT_EXPR tree if it doesn't fold. Similar below. (ISTR some integer_onep () variant that handles a NULL argument ...) > + return -1; > + if (integer_onep (range_binop (GT_EXPR, integer_type_node, > + p->low, 0, q->low, 0))) > + return 1; > + } > + else > + return 1; > + } > + else if (q->low != NULL_TREE) > + return -1; > + /* If ->high is different, NULL high goes last, before that by > + ascending high. */ > + if (p->high != NULL_TREE) > + { > + if (q->high != NULL_TREE) > + { > + if (integer_onep (range_binop (LT_EXPR, integer_type_node, > + p->high, 0, q->high, 0))) > + return -1; > + if (integer_onep (range_binop (GT_EXPR, integer_type_node, > + p->high, 0, q->high, 0))) > + return 1; > + } > + else > + return -1; > + } > + else if (p->high != NULL_TREE) > + return 1; > + /* If both ranges are the same, sort below by ascending idx. */ > + } > + else > + return 1; > + } > + else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) > + return -1; > + > + if (p->idx < q->idx) > + return -1; > + else > + { > + gcc_checking_assert (p->idx > q->idx); > + return 1; > + } > +} > + > +/* Helper routine of optimize_range_test. > + [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for > + RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, > + OPCODE and OPS are arguments of optimize_range_tests. Return > + true if the range merge has been successful. */ > + > +static bool > +update_range_test (struct range_entry *range, struct range_entry *otherrange, > + unsigned int count, enum tree_code opcode, > + VEC (operand_entry_t, heap) **ops, tree exp, bool in_p, > + tree low, tree high, bool strict_overflow_p) > +{ > + tree op = VEC_index (operand_entry_t, *ops, range->idx)->op; > + location_t loc = gimple_location (SSA_NAME_DEF_STMT (op)); > + tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high); > + enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; > + gimple_stmt_iterator gsi; > + > + if (tem == NULL_TREE) > + return false; > + > + if (strict_overflow_p && issue_strict_overflow_warning (wc)) > + warning_at (loc, OPT_Wstrict_overflow, > + "assuming signed overflow does not occur " > + "when simplifying range test"); > + > + if (dump_file && (dump_flags & TDF_DETAILS)) > + { > + struct range_entry *r; > + fprintf (dump_file, "Optimizing range tests "); > + print_generic_expr (dump_file, range->exp, 0); > + fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); > + print_generic_expr (dump_file, range->low, 0); > + fprintf (dump_file, ", "); > + print_generic_expr (dump_file, range->high, 0); > + fprintf (dump_file, "]"); > + for (r = otherrange; r < otherrange + count; r++) > + { > + fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); > + print_generic_expr (dump_file, r->low, 0); > + fprintf (dump_file, ", "); > + print_generic_expr (dump_file, r->high, 0); > + fprintf (dump_file, "]"); > + } > + fprintf (dump_file, "\n into "); > + print_generic_expr (dump_file, tem, 0); > + fprintf (dump_file, "\n"); > + } > + > + if (opcode == BIT_IOR_EXPR) > + tem = invert_truthvalue_loc (loc, tem); > + > + tem = fold_convert_loc (loc, TREE_TYPE (op), tem); > + gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op)); > + tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, > + GSI_SAME_STMT); > + > + VEC_index (operand_entry_t, *ops, range->idx)->op = tem; > + range->exp = exp; > + range->low = low; > + range->high = high; > + range->in_p = in_p; > + range->strict_overflow_p = false; > + > + for (range = otherrange; range < otherrange + count; range++) > + { > + VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node; > + range->exp = NULL_TREE; > + } > + return true; > +} > + > +/* Optimize range tests, similarly how fold_range_test optimizes > + it on trees. The tree code for the binary > + operation between all the operands is OPCODE. */ > + > +static void > +optimize_range_tests (enum tree_code opcode, > + VEC (operand_entry_t, heap) **ops) > +{ > + unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first; > + operand_entry_t oe; > + struct range_entry *ranges; > + bool any_changes = false; > + > + if (length == 1) > + return; > + > + ranges = XNEWVEC (struct range_entry, length); > + for (i = 0; i < length; i++) > + { > + ranges[i].idx = i; > + init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, > i)->op); > + /* For | invert it now, we will invert it again before emitting > + the optimized expression. */ > + if (opcode == BIT_IOR_EXPR) > + ranges[i].in_p = !ranges[i].in_p; > + } > + > + qsort (ranges, length, sizeof (*ranges), range_entry_cmp); > + for (i = 0; i < length; i++) > + if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) > + break; > + > + /* Try to merge ranges. */ > + for (first = i; i < length; i++) > + { > + tree low = ranges[i].low; > + tree high = ranges[i].high; > + int in_p = ranges[i].in_p; > + bool strict_overflow_p = ranges[i].strict_overflow_p; > + > + for (j = i + 1; j < length; j++) > + { That looks quadratic - do we want to limit this with a --param, simply partitioning the array into quadratic chunks? > + if (ranges[i].exp != ranges[j].exp) > + break; Or isn't it too bad because of this check? > + if (!merge_ranges (&in_p, &low, &high, in_p, low, high, > + ranges[j].in_p, ranges[j].low, ranges[j].high)) > + break; And this early out? I suppose some comment on why together with the sorting this is of limited complexity would help. > + strict_overflow_p |= ranges[j].strict_overflow_p; > + } > + > + if (j > i + 1 > + && update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, > + ops, ranges[i].exp, in_p, low, high, > + strict_overflow_p)) > + { > + i = j - 1; > + any_changes = true; > + } > + } > + > + /* Optimize X == CST1 || X == CST2 > + if popcount (CST1 ^ CST2) == 1 into > + (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). > + Similarly for ranges. E.g. > + X != 2 && X != 3 && X != 10 && X != 11 > + will be transformed by the above loop into > + (X - 2U) <= 1U && (X - 10U) <= 1U > + and this loop can transform that into > + ((X & ~8) - 2U) <= 1U. */ > + for (i = first; i < length; i++) > + { > + tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; > + > + if (ranges[i].exp == NULL_TREE || ranges[i].in_p) > + continue; > + type = TREE_TYPE (ranges[i].exp); > + if (!INTEGRAL_TYPE_P (type)) > + continue; > + lowi = ranges[i].low; > + if (lowi == NULL_TREE) > + lowi = TYPE_MIN_VALUE (type); > + highi = ranges[i].high; > + if (highi == NULL_TREE) > + continue; > + for (j = i + 1; j < length && j < i + 64; j++) > + { > + if (ranges[j].exp == NULL_TREE) > + continue; > + if (ranges[i].exp != ranges[j].exp) > + break; > + if (ranges[j].in_p) > + continue; > + lowj = ranges[j].low; > + if (lowj == NULL_TREE) > + continue; > + highj = ranges[j].high; > + if (highj == NULL_TREE) > + highj = TYPE_MAX_VALUE (type); > + if (!integer_onep (range_binop (GT_EXPR, integer_type_node, > + lowj, 0, highi, 0))) > + continue; See above. > + lowxor = range_binop (BIT_XOR_EXPR, NULL_TREE, lowi, 0, lowj, 0); > + if (lowxor == NULL_TREE) > + continue; Likewise. > + gcc_checking_assert (!integer_zerop (lowxor)); > + tem = range_binop (MINUS_EXPR, NULL_TREE, lowxor, 0, > + build_int_cst (type, 1), 0); > + tem = range_binop (BIT_AND_EXPR, NULL_TREE, lowxor, 0, tem, 0); > + if (!integer_zerop (tem)) > + continue; > + highxor = range_binop (BIT_XOR_EXPR, NULL_TREE, highi, 0, highj, 0); > + if (!tree_int_cst_equal (lowxor, highxor)) > + continue; > + tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); > + exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); > + lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); > + highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); > + if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, > + ranges[i].in_p, lowj, highj, > + ranges[i].strict_overflow_p > + || ranges[j].strict_overflow_p)) > + { > + any_changes = true; > + break; > + } > + } > + } > + > + if (any_changes) > + { > + j = 0; > + FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe) > + { > + if (oe->op == error_mark_node) > + continue; > + else if (i != j) > + VEC_replace (operand_entry_t, *ops, j, oe); > + j++; > + } > + VEC_truncate (operand_entry_t, *ops, j); > + } > + > + XDELETEVEC (ranges); > +} > + > /* Return true if OPERAND is defined by a PHI node which uses the LHS > of STMT in it's operands. This is also known as a "destructive > update" operation. */ > @@ -2447,6 +2864,9 @@ reassociate_bb (basic_block bb) > optimize_ops_list (rhs_code, &ops); > } > > + if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) I think we want to check that the type is boolean here, so we don't setup the machinery needlessly? Otherwise it looks like a nice improvement. Thanks, Richard. > + optimize_range_tests (rhs_code, &ops); > + > if (VEC_length (operand_entry_t, ops) == 1) > { > if (dump_file && (dump_flags & TDF_DETAILS)) > --- gcc/testsuite/gcc.dg/pr46309.c.jj 2011-09-29 14:03:28.000000000 +0200 > +++ gcc/testsuite/gcc.dg/pr46309.c 2011-09-29 14:08:32.000000000 +0200 > @@ -0,0 +1,66 @@ > +/* PR tree-optimization/46309 */ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-reassoc-details" } */ > + > +int > +f1 (int a) > +{ > + int v1 = (a == 3); > + int v2 = (a == 1); > + int v3 = (a == 4); > + int v4 = (a == 2); > + return v1 || v2 || v3 || v4; > +} > + > +int > +f2 (int a) > +{ > + int v1 = (a == 1); > + int v2 = (a == 2); > + int v3 = (a == 3); > + int v4 = (a == 4); > + return v1 || v2 || v3 || v4; > +} > + > +int > +f3 (int a) > +{ > + int v1 = (a == 3); > + int v2 = (a == 1); > + return v1 || v2; > +} > + > +int > +f4 (int a) > +{ > + int v1 = (a == 1); > + int v2 = (a == 2); > + return v1 || v2; > +} > + > +int > +f5 (unsigned int a) > +{ > + int v1 = (a <= 31); > + int v2 = (a >= 64 && a <= 95); > + return v1 || v2; > +} > + > +int > +f6 (unsigned int a) > +{ > + int v1 = (a <= 31); > + int v2 = (a >= 64 && a <= 95); > + int v3 = (a >= 128 && a <= 159); > + int v4 = (a >= 192 && a <= 223); > + return v1 || v2 || v3 || v4; > +} > + > +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. > -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */ > +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. > -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */ > +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. > -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */ > +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. > -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */ > +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. > -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */ > +/* { dg-final { scan-tree-dump-times "Optimizing range tests > D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } > */ > +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ > +/* { dg-final { cleanup-tree-dump "reassoc2" } } */ > > Jakub >