On Fri, Sep 30, 2011 at 03:14:12PM +0200, Richard Guenther wrote: > Ah, indeed. I'll have a look at the updated patch.
Here is what I've committed after bootstrapping/regtesting it on x86_64-linux and i686-linux and Richard's approval on IRC. 2011-09-30 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/46309 * fold-const.c (make_range, merge_ranges): Remove prototypes. (make_range_step): New function. (make_range): Use it. * tree.h (make_range_step): New prototypes. * Makefile.in (tree-ssa-reassoc.o): Depend on $(DIAGNOSTIC_CORE_H). * tree-ssa-reassoc.c: Include diagnostic-core.h. (struct range_entry): New type. (init_range_entry, range_entry_cmp, update_range_test, optimize_range_tests): New functions. (reassociate_bb): Call optimize_range_tests. * gcc.dg/pr46309.c: New test. --- gcc/fold-const.c.jj 2011-09-30 13:17:22.000000000 +0200 +++ gcc/fold-const.c 2011-09-30 14:08:30.000000000 +0200 @@ -115,9 +115,6 @@ static int simple_operand_p (const_tree) static tree range_binop (enum tree_code, tree, tree, int, tree, int); static tree range_predecessor (tree); static tree range_successor (tree); -extern tree make_range (tree, int *, tree *, tree *, bool *); -extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, - tree, tree); static tree fold_range_test (location_t, enum tree_code, tree, tree, tree); static tree fold_cond_expr_with_comparison (location_t, tree, tree, tree, tree); static tree unextend (tree, int, int, tree); @@ -3790,6 +3787,255 @@ range_binop (enum tree_code code, tree t return constant_boolean_node (result, type); } +/* Helper routine for make_range. Perform one step for it, return + new expression if the loop should continue or NULL_TREE if it should + stop. */ + +tree +make_range_step (location_t loc, enum tree_code code, tree arg0, tree arg1, + tree exp_type, tree *p_low, tree *p_high, int *p_in_p, + bool *strict_overflow_p) +{ + tree arg0_type = TREE_TYPE (arg0); + tree n_low, n_high, low = *p_low, high = *p_high; + int in_p = *p_in_p, n_in_p; + + switch (code) + { + case TRUTH_NOT_EXPR: + *p_in_p = ! in_p; + return arg0; + + case EQ_EXPR: case NE_EXPR: + case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR: + /* We can only do something if the range is testing for zero + and if the second operand is an integer constant. Note that + saying something is "in" the range we make is done by + complementing IN_P since it will set in the initial case of + being not equal to zero; "out" is leaving it alone. */ + if (low == NULL_TREE || high == NULL_TREE + || ! integer_zerop (low) || ! integer_zerop (high) + || TREE_CODE (arg1) != INTEGER_CST) + return NULL_TREE; + + switch (code) + { + case NE_EXPR: /* - [c, c] */ + low = high = arg1; + break; + case EQ_EXPR: /* + [c, c] */ + in_p = ! in_p, low = high = arg1; + break; + case GT_EXPR: /* - [-, c] */ + low = 0, high = arg1; + break; + case GE_EXPR: /* + [c, -] */ + in_p = ! in_p, low = arg1, high = 0; + break; + case LT_EXPR: /* - [c, -] */ + low = arg1, high = 0; + break; + case LE_EXPR: /* + [-, c] */ + in_p = ! in_p, low = 0, high = arg1; + break; + default: + gcc_unreachable (); + } + + /* If this is an unsigned comparison, we also know that EXP is + greater than or equal to zero. We base the range tests we make + on that fact, so we record it here so we can parse existing + range tests. We test arg0_type since often the return type + of, e.g. EQ_EXPR, is boolean. */ + if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0)) + { + if (! merge_ranges (&n_in_p, &n_low, &n_high, + in_p, low, high, 1, + build_int_cst (arg0_type, 0), + NULL_TREE)) + return NULL_TREE; + + in_p = n_in_p, low = n_low, high = n_high; + + /* If the high bound is missing, but we have a nonzero low + bound, reverse the range so it goes from zero to the low bound + minus 1. */ + if (high == 0 && low && ! integer_zerop (low)) + { + in_p = ! in_p; + high = range_binop (MINUS_EXPR, NULL_TREE, low, 0, + integer_one_node, 0); + low = build_int_cst (arg0_type, 0); + } + } + + *p_low = low; + *p_high = high; + *p_in_p = in_p; + return arg0; + + case NEGATE_EXPR: + /* (-x) IN [a,b] -> x in [-b, -a] */ + n_low = range_binop (MINUS_EXPR, exp_type, + build_int_cst (exp_type, 0), + 0, high, 1); + n_high = range_binop (MINUS_EXPR, exp_type, + build_int_cst (exp_type, 0), + 0, low, 0); + if (n_high != 0 && TREE_OVERFLOW (n_high)) + return NULL_TREE; + goto normalize; + + case BIT_NOT_EXPR: + /* ~ X -> -X - 1 */ + return build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0), + build_int_cst (exp_type, 1)); + + case PLUS_EXPR: + case MINUS_EXPR: + if (TREE_CODE (arg1) != INTEGER_CST) + return NULL_TREE; + + /* If flag_wrapv and ARG0_TYPE is signed, then we cannot + move a constant to the other side. */ + if (!TYPE_UNSIGNED (arg0_type) + && !TYPE_OVERFLOW_UNDEFINED (arg0_type)) + return NULL_TREE; + + /* If EXP is signed, any overflow in the computation is undefined, + so we don't worry about it so long as our computations on + the bounds don't overflow. For unsigned, overflow is defined + and this is exactly the right thing. */ + n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, + arg0_type, low, 0, arg1, 0); + n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, + arg0_type, high, 1, arg1, 0); + if ((n_low != 0 && TREE_OVERFLOW (n_low)) + || (n_high != 0 && TREE_OVERFLOW (n_high))) + return NULL_TREE; + + if (TYPE_OVERFLOW_UNDEFINED (arg0_type)) + *strict_overflow_p = true; + + normalize: + /* Check for an unsigned range which has wrapped around the maximum + value thus making n_high < n_low, and normalize it. */ + if (n_low && n_high && tree_int_cst_lt (n_high, n_low)) + { + low = range_binop (PLUS_EXPR, arg0_type, n_high, 0, + integer_one_node, 0); + high = range_binop (MINUS_EXPR, arg0_type, n_low, 0, + integer_one_node, 0); + + /* If the range is of the form +/- [ x+1, x ], we won't + be able to normalize it. But then, it represents the + whole range or the empty set, so make it + +/- [ -, - ]. */ + if (tree_int_cst_equal (n_low, low) + && tree_int_cst_equal (n_high, high)) + low = high = 0; + else + in_p = ! in_p; + } + else + low = n_low, high = n_high; + + *p_low = low; + *p_high = high; + *p_in_p = in_p; + return arg0; + + CASE_CONVERT: + case NON_LVALUE_EXPR: + if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type)) + return NULL_TREE; + + if (! INTEGRAL_TYPE_P (arg0_type) + || (low != 0 && ! int_fits_type_p (low, arg0_type)) + || (high != 0 && ! int_fits_type_p (high, arg0_type))) + return NULL_TREE; + + n_low = low, n_high = high; + + if (n_low != 0) + n_low = fold_convert_loc (loc, arg0_type, n_low); + + if (n_high != 0) + n_high = fold_convert_loc (loc, arg0_type, n_high); + + /* If we're converting arg0 from an unsigned type, to exp, + a signed type, we will be doing the comparison as unsigned. + The tests above have already verified that LOW and HIGH + are both positive. + + So we have to ensure that we will handle large unsigned + values the same way that the current signed bounds treat + negative values. */ + + if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type)) + { + tree high_positive; + tree equiv_type; + /* For fixed-point modes, we need to pass the saturating flag + as the 2nd parameter. */ + if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type))) + equiv_type + = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), + TYPE_SATURATING (arg0_type)); + else + equiv_type + = lang_hooks.types.type_for_mode (TYPE_MODE (arg0_type), 1); + + /* A range without an upper bound is, naturally, unbounded. + Since convert would have cropped a very large value, use + the max value for the destination type. */ + high_positive + = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type) + : TYPE_MAX_VALUE (arg0_type); + + if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type)) + high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type, + fold_convert_loc (loc, arg0_type, + high_positive), + build_int_cst (arg0_type, 1)); + + /* If the low bound is specified, "and" the range with the + range for which the original unsigned value will be + positive. */ + if (low != 0) + { + if (! merge_ranges (&n_in_p, &n_low, &n_high, 1, n_low, n_high, + 1, fold_convert_loc (loc, arg0_type, + integer_zero_node), + high_positive)) + return NULL_TREE; + + in_p = (n_in_p == in_p); + } + else + { + /* Otherwise, "or" the range with the range of the input + that will be interpreted as negative. */ + if (! merge_ranges (&n_in_p, &n_low, &n_high, 0, n_low, n_high, + 1, fold_convert_loc (loc, arg0_type, + integer_zero_node), + high_positive)) + return NULL_TREE; + + in_p = (in_p != n_in_p); + } + } + + *p_low = n_low; + *p_high = n_high; + *p_in_p = in_p; + return arg0; + + default: + return NULL_TREE; + } +} + /* Given EXP, a logical expression, set the range it is testing into variables denoted by PIN_P, PLOW, and PHIGH. Return the expression actually being tested. *PLOW and *PHIGH will be made of the same @@ -3804,10 +4050,10 @@ make_range (tree exp, int *pin_p, tree * bool *strict_overflow_p) { enum tree_code code; - tree arg0 = NULL_TREE, arg1 = NULL_TREE; - tree exp_type = NULL_TREE, arg0_type = NULL_TREE; - int in_p, n_in_p; - tree low, high, n_low, n_high; + tree arg0, arg1 = NULL_TREE; + tree exp_type, nexp; + int in_p; + tree low, high; location_t loc = EXPR_LOCATION (exp); /* Start with simply saying "EXP != 0" and then look at the code of EXP @@ -3823,255 +4069,26 @@ make_range (tree exp, int *pin_p, tree * { code = TREE_CODE (exp); exp_type = TREE_TYPE (exp); + arg0 = NULL_TREE; if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) { if (TREE_OPERAND_LENGTH (exp) > 0) arg0 = TREE_OPERAND (exp, 0); - if (TREE_CODE_CLASS (code) == tcc_comparison - || TREE_CODE_CLASS (code) == tcc_unary - || TREE_CODE_CLASS (code) == tcc_binary) - arg0_type = TREE_TYPE (arg0); if (TREE_CODE_CLASS (code) == tcc_binary || TREE_CODE_CLASS (code) == tcc_comparison || (TREE_CODE_CLASS (code) == tcc_expression && TREE_OPERAND_LENGTH (exp) > 1)) arg1 = TREE_OPERAND (exp, 1); } + if (arg0 == NULL_TREE) + break; - switch (code) - { - case TRUTH_NOT_EXPR: - in_p = ! in_p, exp = arg0; - continue; - - case EQ_EXPR: case NE_EXPR: - case LT_EXPR: case LE_EXPR: case GE_EXPR: case GT_EXPR: - /* We can only do something if the range is testing for zero - and if the second operand is an integer constant. Note that - saying something is "in" the range we make is done by - complementing IN_P since it will set in the initial case of - being not equal to zero; "out" is leaving it alone. */ - if (low == 0 || high == 0 - || ! integer_zerop (low) || ! integer_zerop (high) - || TREE_CODE (arg1) != INTEGER_CST) - break; - - switch (code) - { - case NE_EXPR: /* - [c, c] */ - low = high = arg1; - break; - case EQ_EXPR: /* + [c, c] */ - in_p = ! in_p, low = high = arg1; - break; - case GT_EXPR: /* - [-, c] */ - low = 0, high = arg1; - break; - case GE_EXPR: /* + [c, -] */ - in_p = ! in_p, low = arg1, high = 0; - break; - case LT_EXPR: /* - [c, -] */ - low = arg1, high = 0; - break; - case LE_EXPR: /* + [-, c] */ - in_p = ! in_p, low = 0, high = arg1; - break; - default: - gcc_unreachable (); - } - - /* If this is an unsigned comparison, we also know that EXP is - greater than or equal to zero. We base the range tests we make - on that fact, so we record it here so we can parse existing - range tests. We test arg0_type since often the return type - of, e.g. EQ_EXPR, is boolean. */ - if (TYPE_UNSIGNED (arg0_type) && (low == 0 || high == 0)) - { - if (! merge_ranges (&n_in_p, &n_low, &n_high, - in_p, low, high, 1, - build_int_cst (arg0_type, 0), - NULL_TREE)) - break; - - in_p = n_in_p, low = n_low, high = n_high; - - /* If the high bound is missing, but we have a nonzero low - bound, reverse the range so it goes from zero to the low bound - minus 1. */ - if (high == 0 && low && ! integer_zerop (low)) - { - in_p = ! in_p; - high = range_binop (MINUS_EXPR, NULL_TREE, low, 0, - integer_one_node, 0); - low = build_int_cst (arg0_type, 0); - } - } - - exp = arg0; - continue; - - case NEGATE_EXPR: - /* (-x) IN [a,b] -> x in [-b, -a] */ - n_low = range_binop (MINUS_EXPR, exp_type, - build_int_cst (exp_type, 0), - 0, high, 1); - n_high = range_binop (MINUS_EXPR, exp_type, - build_int_cst (exp_type, 0), - 0, low, 0); - if (n_high != 0 && TREE_OVERFLOW (n_high)) - break; - goto normalize; - - case BIT_NOT_EXPR: - /* ~ X -> -X - 1 */ - exp = build2_loc (loc, MINUS_EXPR, exp_type, negate_expr (arg0), - build_int_cst (exp_type, 1)); - continue; - - case PLUS_EXPR: case MINUS_EXPR: - if (TREE_CODE (arg1) != INTEGER_CST) - break; - - /* If flag_wrapv and ARG0_TYPE is signed, then we cannot - move a constant to the other side. */ - if (!TYPE_UNSIGNED (arg0_type) - && !TYPE_OVERFLOW_UNDEFINED (arg0_type)) - break; - - /* If EXP is signed, any overflow in the computation is undefined, - so we don't worry about it so long as our computations on - the bounds don't overflow. For unsigned, overflow is defined - and this is exactly the right thing. */ - n_low = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, - arg0_type, low, 0, arg1, 0); - n_high = range_binop (code == MINUS_EXPR ? PLUS_EXPR : MINUS_EXPR, - arg0_type, high, 1, arg1, 0); - if ((n_low != 0 && TREE_OVERFLOW (n_low)) - || (n_high != 0 && TREE_OVERFLOW (n_high))) - break; - - if (TYPE_OVERFLOW_UNDEFINED (arg0_type)) - *strict_overflow_p = true; - - normalize: - /* Check for an unsigned range which has wrapped around the maximum - value thus making n_high < n_low, and normalize it. */ - if (n_low && n_high && tree_int_cst_lt (n_high, n_low)) - { - low = range_binop (PLUS_EXPR, arg0_type, n_high, 0, - integer_one_node, 0); - high = range_binop (MINUS_EXPR, arg0_type, n_low, 0, - integer_one_node, 0); - - /* If the range is of the form +/- [ x+1, x ], we won't - be able to normalize it. But then, it represents the - whole range or the empty set, so make it - +/- [ -, - ]. */ - if (tree_int_cst_equal (n_low, low) - && tree_int_cst_equal (n_high, high)) - low = high = 0; - else - in_p = ! in_p; - } - else - low = n_low, high = n_high; - - exp = arg0; - continue; - - CASE_CONVERT: case NON_LVALUE_EXPR: - if (TYPE_PRECISION (arg0_type) > TYPE_PRECISION (exp_type)) - break; - - if (! INTEGRAL_TYPE_P (arg0_type) - || (low != 0 && ! int_fits_type_p (low, arg0_type)) - || (high != 0 && ! int_fits_type_p (high, arg0_type))) - break; - - n_low = low, n_high = high; - - if (n_low != 0) - n_low = fold_convert_loc (loc, arg0_type, n_low); - - if (n_high != 0) - n_high = fold_convert_loc (loc, arg0_type, n_high); - - - /* If we're converting arg0 from an unsigned type, to exp, - a signed type, we will be doing the comparison as unsigned. - The tests above have already verified that LOW and HIGH - are both positive. - - So we have to ensure that we will handle large unsigned - values the same way that the current signed bounds treat - negative values. */ - - if (!TYPE_UNSIGNED (exp_type) && TYPE_UNSIGNED (arg0_type)) - { - tree high_positive; - tree equiv_type; - /* For fixed-point modes, we need to pass the saturating flag - as the 2nd parameter. */ - if (ALL_FIXED_POINT_MODE_P (TYPE_MODE (arg0_type))) - equiv_type = lang_hooks.types.type_for_mode - (TYPE_MODE (arg0_type), - TYPE_SATURATING (arg0_type)); - else - equiv_type = lang_hooks.types.type_for_mode - (TYPE_MODE (arg0_type), 1); - - /* A range without an upper bound is, naturally, unbounded. - Since convert would have cropped a very large value, use - the max value for the destination type. */ - high_positive - = TYPE_MAX_VALUE (equiv_type) ? TYPE_MAX_VALUE (equiv_type) - : TYPE_MAX_VALUE (arg0_type); - - if (TYPE_PRECISION (exp_type) == TYPE_PRECISION (arg0_type)) - high_positive = fold_build2_loc (loc, RSHIFT_EXPR, arg0_type, - fold_convert_loc (loc, arg0_type, - high_positive), - build_int_cst (arg0_type, 1)); - - /* If the low bound is specified, "and" the range with the - range for which the original unsigned value will be - positive. */ - if (low != 0) - { - if (! merge_ranges (&n_in_p, &n_low, &n_high, - 1, n_low, n_high, 1, - fold_convert_loc (loc, arg0_type, - integer_zero_node), - high_positive)) - break; - - in_p = (n_in_p == in_p); - } - else - { - /* Otherwise, "or" the range with the range of the input - that will be interpreted as negative. */ - if (! merge_ranges (&n_in_p, &n_low, &n_high, - 0, n_low, n_high, 1, - fold_convert_loc (loc, arg0_type, - integer_zero_node), - high_positive)) - break; - - in_p = (in_p != n_in_p); - } - } - - exp = arg0; - low = n_low, high = n_high; - continue; - - default: - break; - } - - break; + nexp = make_range_step (loc, code, arg0, arg1, exp_type, &low, + &high, &in_p, strict_overflow_p); + if (nexp == NULL_TREE) + break; + exp = nexp; } /* If EXP is a constant, we can evaluate whether this is true or false. */ --- gcc/tree.h.jj 2011-09-29 14:25:29.000000000 +0200 +++ gcc/tree.h 2011-09-30 14:08:38.000000000 +0200 @@ -5384,6 +5384,8 @@ extern unsigned int get_pointer_alignmen extern tree fold_call_stmt (gimple, bool); extern tree gimple_fold_builtin_snprintf_chk (gimple, tree, enum built_in_function); extern tree make_range (tree, int *, tree *, tree *, bool *); +extern tree make_range_step (location_t, enum tree_code, tree, tree, tree, + tree *, tree *, int *, bool *); extern tree build_range_check (location_t, tree, tree, int, tree, tree); extern bool merge_ranges (int *, tree *, tree *, int, tree, tree, int, tree, tree); --- gcc/Makefile.in.jj 2011-09-29 14:25:46.000000000 +0200 +++ gcc/Makefile.in 2011-09-30 13:40:52.000000000 +0200 @@ -2650,7 +2650,7 @@ tree-ssa-reassoc.o : tree-ssa-reassoc.c $(TM_H) coretypes.h $(TREE_DUMP_H) $(TREE_PASS_H) $(FLAGS_H) \ tree-iterator.h $(BASIC_BLOCK_H) $(GIMPLE_H) $(TREE_INLINE_H) \ $(VEC_H) langhooks.h alloc-pool.h pointer-set.h $(CFGLOOP_H) \ - tree-pretty-print.h gimple-pretty-print.h + tree-pretty-print.h gimple-pretty-print.h $(DIAGNOSTIC_CORE_H) tree-optimize.o : tree-optimize.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(GGC_H) output.h \ $(DIAGNOSTIC_H) $(BASIC_BLOCK_H) $(FLAGS_H) $(TIMEVAR_H) $(TM_H) \ --- gcc/tree-ssa-reassoc.c.jj 2011-09-29 14:25:29.000000000 +0200 +++ gcc/tree-ssa-reassoc.c 2011-09-30 14:08:00.000000000 +0200 @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. #include "flags.h" #include "target.h" #include "params.h" +#include "diagnostic-core.h" /* This is a simple global reassociation pass. It is, in part, based on the LLVM pass of the same name (They do some things more/less @@ -1568,6 +1569,457 @@ optimize_ops_list (enum tree_code opcode optimize_ops_list (opcode, ops); } +/* The following functions are subroutines to optimize_range_tests and allow + it to try to change a logical combination of comparisons into a range + test. + + For example, both + X == 2 || X == 5 || X == 3 || X == 4 + and + X >= 2 && X <= 5 + are converted to + (unsigned) (X - 2) <= 3 + + For more information see comments above fold_test_range in fold-const.c, + this implementation is for GIMPLE. */ + +struct range_entry +{ + tree exp; + tree low; + tree high; + bool in_p; + bool strict_overflow_p; + unsigned int idx, next; +}; + +/* This is similar to make_range in fold-const.c, but on top of + GIMPLE instead of trees. */ + +static void +init_range_entry (struct range_entry *r, tree exp) +{ + int in_p; + tree low, high; + bool is_bool, strict_overflow_p; + + r->exp = NULL_TREE; + r->in_p = false; + r->strict_overflow_p = false; + r->low = NULL_TREE; + r->high = NULL_TREE; + if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))) + return; + + /* Start with simply saying "EXP != 0" and then look at the code of EXP + and see if we can refine the range. Some of the cases below may not + happen, but it doesn't seem worth worrying about this. We "continue" + the outer loop when we've changed something; otherwise we "break" + the switch, which will "break" the while. */ + low = build_int_cst (TREE_TYPE (exp), 0); + high = low; + in_p = 0; + strict_overflow_p = false; + is_bool = false; + if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) + { + if (TYPE_UNSIGNED (TREE_TYPE (exp))) + is_bool = true; + else + return; + } + else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) + is_bool = true; + + while (1) + { + gimple stmt; + enum tree_code code; + tree arg0, arg1, exp_type; + tree nexp; + location_t loc; + + if (TREE_CODE (exp) != SSA_NAME) + break; + + stmt = SSA_NAME_DEF_STMT (exp); + if (!is_gimple_assign (stmt)) + break; + + code = gimple_assign_rhs_code (stmt); + arg0 = gimple_assign_rhs1 (stmt); + arg1 = gimple_assign_rhs2 (stmt); + exp_type = TREE_TYPE (exp); + loc = gimple_location (stmt); + switch (code) + { + case BIT_NOT_EXPR: + if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) + { + in_p = !in_p; + exp = arg0; + continue; + } + break; + case SSA_NAME: + exp = arg0; + continue; + CASE_CONVERT: + if (is_bool) + goto do_default; + if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) + { + if (TYPE_UNSIGNED (TREE_TYPE (arg0))) + is_bool = true; + else + return; + } + else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) + is_bool = true; + goto do_default; + case EQ_EXPR: + case NE_EXPR: + case LT_EXPR: + case LE_EXPR: + case GE_EXPR: + case GT_EXPR: + is_bool = true; + /* FALLTHRU */ + default: + if (!is_bool) + return; + do_default: + nexp = make_range_step (loc, code, arg0, arg1, exp_type, + &low, &high, &in_p, + &strict_overflow_p); + if (nexp != NULL_TREE) + { + exp = nexp; + gcc_assert (TREE_CODE (exp) == SSA_NAME); + continue; + } + break; + } + break; + } + if (is_bool) + { + r->exp = exp; + r->in_p = in_p; + r->low = low; + r->high = high; + r->strict_overflow_p = strict_overflow_p; + } +} + +/* Comparison function for qsort. Sort entries + without SSA_NAME exp first, then with SSA_NAMEs sorted + by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs + by increasing ->low and if ->low is the same, by increasing + ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE + maximum. */ + +static int +range_entry_cmp (const void *a, const void *b) +{ + const struct range_entry *p = (const struct range_entry *) a; + const struct range_entry *q = (const struct range_entry *) b; + + if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) + { + if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) + { + /* Group range_entries for the same SSA_NAME together. */ + if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) + return -1; + else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) + return 1; + /* If ->low is different, NULL low goes first, then by + ascending low. */ + if (p->low != NULL_TREE) + { + if (q->low != NULL_TREE) + { + tree tem = fold_binary (LT_EXPR, boolean_type_node, + p->low, q->low); + if (tem && integer_onep (tem)) + return -1; + tem = fold_binary (GT_EXPR, boolean_type_node, + p->low, q->low); + if (tem && integer_onep (tem)) + return 1; + } + else + return 1; + } + else if (q->low != NULL_TREE) + return -1; + /* If ->high is different, NULL high goes last, before that by + ascending high. */ + if (p->high != NULL_TREE) + { + if (q->high != NULL_TREE) + { + tree tem = fold_binary (LT_EXPR, boolean_type_node, + p->high, q->high); + if (tem && integer_onep (tem)) + return -1; + tem = fold_binary (GT_EXPR, boolean_type_node, + p->high, q->high); + if (tem && integer_onep (tem)) + return 1; + } + else + return -1; + } + else if (p->high != NULL_TREE) + return 1; + /* If both ranges are the same, sort below by ascending idx. */ + } + else + return 1; + } + else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) + return -1; + + if (p->idx < q->idx) + return -1; + else + { + gcc_checking_assert (p->idx > q->idx); + return 1; + } +} + +/* Helper routine of optimize_range_test. + [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for + RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, + OPCODE and OPS are arguments of optimize_range_tests. Return + true if the range merge has been successful. */ + +static bool +update_range_test (struct range_entry *range, struct range_entry *otherrange, + unsigned int count, enum tree_code opcode, + VEC (operand_entry_t, heap) **ops, tree exp, bool in_p, + tree low, tree high, bool strict_overflow_p) +{ + tree op = VEC_index (operand_entry_t, *ops, range->idx)->op; + location_t loc = gimple_location (SSA_NAME_DEF_STMT (op)); + tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high); + enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; + gimple_stmt_iterator gsi; + + if (tem == NULL_TREE) + return false; + + if (strict_overflow_p && issue_strict_overflow_warning (wc)) + warning_at (loc, OPT_Wstrict_overflow, + "assuming signed overflow does not occur " + "when simplifying range test"); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + struct range_entry *r; + fprintf (dump_file, "Optimizing range tests "); + print_generic_expr (dump_file, range->exp, 0); + fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); + print_generic_expr (dump_file, range->low, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, range->high, 0); + fprintf (dump_file, "]"); + for (r = otherrange; r < otherrange + count; r++) + { + fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); + print_generic_expr (dump_file, r->low, 0); + fprintf (dump_file, ", "); + print_generic_expr (dump_file, r->high, 0); + fprintf (dump_file, "]"); + } + fprintf (dump_file, "\n into "); + print_generic_expr (dump_file, tem, 0); + fprintf (dump_file, "\n"); + } + + if (opcode == BIT_IOR_EXPR) + tem = invert_truthvalue_loc (loc, tem); + + tem = fold_convert_loc (loc, TREE_TYPE (op), tem); + gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op)); + tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, + GSI_SAME_STMT); + + VEC_index (operand_entry_t, *ops, range->idx)->op = tem; + range->exp = exp; + range->low = low; + range->high = high; + range->in_p = in_p; + range->strict_overflow_p = false; + + for (range = otherrange; range < otherrange + count; range++) + { + VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node; + range->exp = NULL_TREE; + } + return true; +} + +/* Optimize range tests, similarly how fold_range_test optimizes + it on trees. The tree code for the binary + operation between all the operands is OPCODE. */ + +static void +optimize_range_tests (enum tree_code opcode, + VEC (operand_entry_t, heap) **ops) +{ + unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first; + operand_entry_t oe; + struct range_entry *ranges; + bool any_changes = false; + + if (length == 1) + return; + + ranges = XNEWVEC (struct range_entry, length); + for (i = 0; i < length; i++) + { + ranges[i].idx = i; + init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op); + /* For | invert it now, we will invert it again before emitting + the optimized expression. */ + if (opcode == BIT_IOR_EXPR) + ranges[i].in_p = !ranges[i].in_p; + } + + qsort (ranges, length, sizeof (*ranges), range_entry_cmp); + for (i = 0; i < length; i++) + if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) + break; + + /* Try to merge ranges. */ + for (first = i; i < length; i++) + { + tree low = ranges[i].low; + tree high = ranges[i].high; + int in_p = ranges[i].in_p; + bool strict_overflow_p = ranges[i].strict_overflow_p; + int update_fail_count = 0; + + for (j = i + 1; j < length; j++) + { + if (ranges[i].exp != ranges[j].exp) + break; + if (!merge_ranges (&in_p, &low, &high, in_p, low, high, + ranges[j].in_p, ranges[j].low, ranges[j].high)) + break; + strict_overflow_p |= ranges[j].strict_overflow_p; + } + + if (j == i + 1) + continue; + + if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, + ops, ranges[i].exp, in_p, low, high, + strict_overflow_p)) + { + i = j - 1; + any_changes = true; + } + /* Avoid quadratic complexity if all merge_ranges calls would succeed, + while update_range_test would fail. */ + else if (update_fail_count == 64) + i = j - 1; + else + ++update_fail_count; + } + + /* Optimize X == CST1 || X == CST2 + if popcount (CST1 ^ CST2) == 1 into + (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). + Similarly for ranges. E.g. + X != 2 && X != 3 && X != 10 && X != 11 + will be transformed by the above loop into + (X - 2U) <= 1U && (X - 10U) <= 1U + and this loop can transform that into + ((X & ~8) - 2U) <= 1U. */ + for (i = first; i < length; i++) + { + tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; + + if (ranges[i].exp == NULL_TREE || ranges[i].in_p) + continue; + type = TREE_TYPE (ranges[i].exp); + if (!INTEGRAL_TYPE_P (type)) + continue; + lowi = ranges[i].low; + if (lowi == NULL_TREE) + lowi = TYPE_MIN_VALUE (type); + highi = ranges[i].high; + if (highi == NULL_TREE) + continue; + for (j = i + 1; j < length && j < i + 64; j++) + { + if (ranges[j].exp == NULL_TREE) + continue; + if (ranges[i].exp != ranges[j].exp) + break; + if (ranges[j].in_p) + continue; + lowj = ranges[j].low; + if (lowj == NULL_TREE) + continue; + highj = ranges[j].high; + if (highj == NULL_TREE) + highj = TYPE_MAX_VALUE (type); + tem = fold_binary (GT_EXPR, boolean_type_node, + lowj, highi); + if (tem == NULL_TREE || !integer_onep (tem)) + continue; + lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); + if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) + continue; + gcc_checking_assert (!integer_zerop (lowxor)); + tem = fold_binary (MINUS_EXPR, type, lowxor, + build_int_cst (type, 1)); + if (tem == NULL_TREE) + continue; + tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem); + if (tem == NULL_TREE || !integer_zerop (tem)) + continue; + highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); + if (!tree_int_cst_equal (lowxor, highxor)) + continue; + tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); + exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); + lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); + highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); + if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, + ranges[i].in_p, lowj, highj, + ranges[i].strict_overflow_p + || ranges[j].strict_overflow_p)) + { + any_changes = true; + break; + } + } + } + + if (any_changes) + { + j = 0; + FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe) + { + if (oe->op == error_mark_node) + continue; + else if (i != j) + VEC_replace (operand_entry_t, *ops, j, oe); + j++; + } + VEC_truncate (operand_entry_t, *ops, j); + } + + XDELETEVEC (ranges); +} + /* Return true if OPERAND is defined by a PHI node which uses the LHS of STMT in it's operands. This is also known as a "destructive update" operation. */ @@ -2447,6 +2899,9 @@ reassociate_bb (basic_block bb) optimize_ops_list (rhs_code, &ops); } + if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) + optimize_range_tests (rhs_code, &ops); + if (VEC_length (operand_entry_t, ops) == 1) { if (dump_file && (dump_flags & TDF_DETAILS)) --- gcc/testsuite/gcc.dg/pr46309.c.jj 2011-09-30 13:40:52.000000000 +0200 +++ gcc/testsuite/gcc.dg/pr46309.c 2011-09-30 13:40:52.000000000 +0200 @@ -0,0 +1,66 @@ +/* PR tree-optimization/46309 */ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-reassoc-details" } */ + +int +f1 (int a) +{ + int v1 = (a == 3); + int v2 = (a == 1); + int v3 = (a == 4); + int v4 = (a == 2); + return v1 || v2 || v3 || v4; +} + +int +f2 (int a) +{ + int v1 = (a == 1); + int v2 = (a == 2); + int v3 = (a == 3); + int v4 = (a == 4); + return v1 || v2 || v3 || v4; +} + +int +f3 (int a) +{ + int v1 = (a == 3); + int v2 = (a == 1); + return v1 || v2; +} + +int +f4 (int a) +{ + int v1 = (a == 1); + int v2 = (a == 2); + return v1 || v2; +} + +int +f5 (unsigned int a) +{ + int v1 = (a <= 31); + int v2 = (a >= 64 && a <= 95); + return v1 || v2; +} + +int +f6 (unsigned int a) +{ + int v1 = (a <= 31); + int v2 = (a >= 64 && a <= 95); + int v3 = (a >= 128 && a <= 159); + int v4 = (a >= 192 && a <= 223); + return v1 || v2 || v3 || v4; +} + +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2. and -.3, 3. and -.4, 4.\[\n\r\]* into" 2 "reassoc1" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.3, 3.\[\n\r\]* into" 1 "reassoc1" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.1, 1. and -.2, 2.\[\n\r\]* into" 1 "reassoc1" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.0, 31. and -.64, 95.\[\n\r\]* into" 2 "reassoc1" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing range tests a_\[0-9\]*.D. -.128, 159. and -.192, 223.\[\n\r\]* into" 1 "reassoc1" } } */ +/* { dg-final { scan-tree-dump-times "Optimizing range tests D.\[0-9\]*_\[0-9\]* -.0, 31. and -.128, 159.\[\n\r\]* into" 1 "reassoc2" } } */ +/* { dg-final { cleanup-tree-dump "reassoc1" } } */ +/* { dg-final { cleanup-tree-dump "reassoc2" } } */ Jakub