On Thu, 26 Apr 2018, Jakub Jelinek wrote: > Hi! > > As explained in the comment below, when doing the inter-bb range test > optimization and are trying to optimize the > a >= 0 && a < b > where b is known to be >= 0 into > (unsigned) a < (unsigned) b, we need to be careful if the a >= 0 condition > is in a different bb from the a < b one and the a >= 0 bb dominates the > block with the def stmt of b; then get_nonzero_bits can return flow > dependent value range that isn't really valid if we optimize the a >= 0 > condition into true and modify the a < b condition to do unsigned > comparison. > > Unfortunately, giving up completely breaks some testcases in the testsuite > distilled from real-world code, so the patch handles the most common cases > where b is known to be non-negative no matter what, in particular > zero extension and masking the MSB off. > > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and 8.1? > Or do we want it for 8.2 only? > The last testcase fails on 7.x branch too.
Looks good to me for 8.1, we eventually will need a RC2 anyways. Thanks, Richard. > 2018-04-26 Jakub Jelinek <ja...@redhat.com> > > PR tree-optimization/85529 > * tree-ssa-reassoc.c (optimize_range_tests_var_bound): Add FIRST_BB > argument. Don't call get_nonzero_bits if opcode is ERROR_MARK_NODE, > rhs2 def stmt's bb is dominated by first_bb and it isn't an obvious > zero extension or masking of the MSB bit. > (optimize_range_tests): Add FIRST_BB argument, pass it through > to optimize_range_tests_var_bound. > (maybe_optimize_range_tests, reassociate_bb): Adjust > optimize_range_tests callers. > > * gcc.c-torture/execute/pr85529-1.c: New test. > * gcc.c-torture/execute/pr85529-2.c: New test. > * gcc.dg/pr85529.c: New test. > > --- gcc/tree-ssa-reassoc.c.jj 2018-03-16 09:06:06.431752536 +0100 > +++ gcc/tree-ssa-reassoc.c 2018-04-26 17:23:14.850769612 +0200 > @@ -3035,7 +3035,8 @@ optimize_range_tests_cmp_bitwise (enum t > static bool > optimize_range_tests_var_bound (enum tree_code opcode, int first, int length, > vec<operand_entry *> *ops, > - struct range_entry *ranges) > + struct range_entry *ranges, > + basic_block first_bb) > { > int i; > bool any_changes = false; > @@ -3143,6 +3144,60 @@ optimize_range_tests_var_bound (enum tre > if (idx == NULL) > continue; > > + /* maybe_optimize_range_tests allows statements without side-effects > + in the basic blocks as long as they are consumed in the same bb. > + Make sure rhs2's def stmt is not among them, otherwise we can't > + use safely get_nonzero_bits on it. E.g. in: > + # RANGE [-83, 1] NONZERO 173 > + # k_32 = PHI <k_47(13), k_12(9)> > + ... > + if (k_32 >= 0) > + goto <bb 5>; [26.46%] > + else > + goto <bb 9>; [73.54%] > + > + <bb 5> [local count: 140323371]: > + # RANGE [0, 1] NONZERO 1 > + _5 = (int) k_32; > + # RANGE [0, 4] NONZERO 4 > + _21 = _5 << 2; > + # RANGE [0, 4] NONZERO 4 > + iftmp.0_44 = (char) _21; > + if (k_32 < iftmp.0_44) > + goto <bb 6>; [84.48%] > + else > + goto <bb 9>; [15.52%] > + the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that > + k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44 > + to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute > + those stmts even for negative k_32 and the value ranges would be no > + longer guaranteed and so the optimization would be invalid. */ > + if (opcode == ERROR_MARK) > + { > + gimple *g = SSA_NAME_DEF_STMT (rhs2); > + basic_block bb2 = gimple_bb (g); > + if (bb2 > + && bb2 != first_bb > + && dominated_by_p (CDI_DOMINATORS, bb2, first_bb)) > + { > + /* As an exception, handle a few common cases. */ > + if (gimple_assign_cast_p (g) > + && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))) > + && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g))) > + && (TYPE_PRECISION (TREE_TYPE (rhs2)) > + > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g))))) > + /* Zero-extension is always ok. */ ; > + else if (is_gimple_assign (g) > + && gimple_assign_rhs_code (g) == BIT_AND_EXPR > + && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST > + && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g)))) > + /* Masking with INTEGER_CST with MSB clear is always ok > + too. */ ; > + else > + continue; > + } > + } > + > wide_int nz = get_nonzero_bits (rhs2); > if (wi::neg_p (nz)) > continue; > @@ -3269,11 +3324,12 @@ optimize_range_tests_var_bound (enum tre > maybe_optimize_range_tests for inter-bb range optimization. > In that case if oe->op is NULL, oe->id is bb->index whose > GIMPLE_COND is && or ||ed into the test, and oe->rank says > - the actual opcode. */ > + the actual opcode. > + FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */ > > static bool > optimize_range_tests (enum tree_code opcode, > - vec<operand_entry *> *ops) > + vec<operand_entry *> *ops, basic_block first_bb) > { > unsigned int length = ops->length (), i, j, first; > operand_entry *oe; > @@ -3353,7 +3409,7 @@ optimize_range_tests (enum tree_code opc > any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length, > ops, ranges); > any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops, > - ranges); > + ranges, first_bb); > > if (any_changes && opcode != ERROR_MARK) > { > @@ -4100,7 +4156,7 @@ maybe_optimize_range_tests (gimple *stmt > break; > } > if (ops.length () > 1) > - any_changes = optimize_range_tests (ERROR_MARK, &ops); > + any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb); > if (any_changes) > { > unsigned int idx, max_idx = 0; > @@ -5855,7 +5911,7 @@ reassociate_bb (basic_block bb) > if (is_vector) > optimize_vec_cond_expr (rhs_code, &ops); > else > - optimize_range_tests (rhs_code, &ops); > + optimize_range_tests (rhs_code, &ops, NULL); > } > > if (rhs_code == MULT_EXPR && !is_vector) > --- gcc/testsuite/gcc.c-torture/execute/pr85529-1.c.jj 2018-04-26 > 16:42:09.437704149 +0200 > +++ gcc/testsuite/gcc.c-torture/execute/pr85529-1.c 2018-04-26 > 16:41:34.769689637 +0200 > @@ -0,0 +1,28 @@ > +/* PR tree-optimization/85529 */ > + > +struct S { int a; }; > + > +int b, c = 1, d, e, f; > +static int g; > +volatile struct S s; > + > +signed char > +foo (signed char i, int j) > +{ > + return i < 0 ? i : i << j; > +} > + > +int > +main () > +{ > + signed char k = -83; > + if (!d) > + goto L; > + k = e || f; > +L: > + for (; b < 1; b++) > + s.a != (k < foo (k, 2) && (c = k = g)); > + if (c != 1) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.c-torture/execute/pr85529-2.c.jj 2018-04-26 > 16:42:12.370705372 +0200 > +++ gcc/testsuite/gcc.c-torture/execute/pr85529-2.c 2018-04-26 > 16:41:19.377683198 +0200 > @@ -0,0 +1,25 @@ > +/* PR tree-optimization/85529 */ > + > +__attribute__((noipa)) int > +foo (int x) > +{ > + x &= 63; > + x -= 50; > + x |= 1; > + if (x < 0) > + return 1; > + int y = x >> 2; > + if (x >= y) > + return 1; > + return 0; > +} > + > +int > +main () > +{ > + int i; > + for (i = 0; i < 63; i++) > + if (foo (i) != 1) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.dg/pr85529.c.jj 2018-04-26 16:42:33.566714252 +0200 > +++ gcc/testsuite/gcc.dg/pr85529.c 2018-04-26 14:56:03.534264237 +0200 > @@ -0,0 +1,27 @@ > +/* PR tree-optimization/85529 */ > +/* { dg-do run } */ > +/* { dg-options "-O2 -fno-ssa-phiopt" } */ > + > +__attribute__((noinline, noclone)) int > +foo (int x) > +{ > + x &= 31; > + x -= 25; > + x *= 2; > + if (x < 0) > + return 1; > + int y = x >> 2; > + if (x >= y) > + return 1; > + return 0; > +} > + > +int > +main () > +{ > + int i; > + for (i = 0; i < 63; i++) > + if (foo (i) != 1) > + __builtin_abort (); > + return 0; > +} > > Jakub > > -- Richard Biener <rguent...@suse.de> SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)