On Thu, 26 Apr 2018, Jakub Jelinek wrote:

> Hi!
> 
> As explained in the comment below, when doing the inter-bb range test
> optimization and are trying to optimize the
> a >= 0 && a < b
> where b is known to be >= 0 into
> (unsigned) a < (unsigned) b, we need to be careful if the a >= 0 condition
> is in a different bb from the a < b one and the a >= 0 bb dominates the
> block with the def stmt of b; then get_nonzero_bits can return flow
> dependent value range that isn't really valid if we optimize the a >= 0
> condition into true and modify the a < b condition to do unsigned
> comparison.
> 
> Unfortunately, giving up completely breaks some testcases in the testsuite
> distilled from real-world code, so the patch handles the most common cases
> where b is known to be non-negative no matter what, in particular
> zero extension and masking the MSB off.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk and 8.1?
> Or do we want it for 8.2 only?
> The last testcase fails on 7.x branch too.

Looks good to me for 8.1, we eventually will need a RC2 anyways.

Thanks,
Richard.

> 2018-04-26  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR tree-optimization/85529
>       * tree-ssa-reassoc.c (optimize_range_tests_var_bound): Add FIRST_BB
>       argument.  Don't call get_nonzero_bits if opcode is ERROR_MARK_NODE,
>       rhs2 def stmt's bb is dominated by first_bb and it isn't an obvious
>       zero extension or masking of the MSB bit.
>       (optimize_range_tests): Add FIRST_BB argument, pass it through
>       to optimize_range_tests_var_bound.
>       (maybe_optimize_range_tests, reassociate_bb): Adjust
>       optimize_range_tests callers.
> 
>       * gcc.c-torture/execute/pr85529-1.c: New test.
>       * gcc.c-torture/execute/pr85529-2.c: New test.
>       * gcc.dg/pr85529.c: New test.
> 
> --- gcc/tree-ssa-reassoc.c.jj 2018-03-16 09:06:06.431752536 +0100
> +++ gcc/tree-ssa-reassoc.c    2018-04-26 17:23:14.850769612 +0200
> @@ -3035,7 +3035,8 @@ optimize_range_tests_cmp_bitwise (enum t
>  static bool
>  optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
>                               vec<operand_entry *> *ops,
> -                             struct range_entry *ranges)
> +                             struct range_entry *ranges,
> +                             basic_block first_bb)
>  {
>    int i;
>    bool any_changes = false;
> @@ -3143,6 +3144,60 @@ optimize_range_tests_var_bound (enum tre
>        if (idx == NULL)
>       continue;
>  
> +      /* maybe_optimize_range_tests allows statements without side-effects
> +      in the basic blocks as long as they are consumed in the same bb.
> +      Make sure rhs2's def stmt is not among them, otherwise we can't
> +      use safely get_nonzero_bits on it.  E.g. in:
> +       # RANGE [-83, 1] NONZERO 173
> +       # k_32 = PHI <k_47(13), k_12(9)>
> +      ...
> +       if (k_32 >= 0)
> +         goto <bb 5>; [26.46%]
> +       else
> +         goto <bb 9>; [73.54%]
> +
> +       <bb 5> [local count: 140323371]:
> +       # RANGE [0, 1] NONZERO 1
> +       _5 = (int) k_32;
> +       # RANGE [0, 4] NONZERO 4
> +       _21 = _5 << 2;
> +       # RANGE [0, 4] NONZERO 4
> +       iftmp.0_44 = (char) _21;
> +       if (k_32 < iftmp.0_44)
> +         goto <bb 6>; [84.48%]
> +       else
> +         goto <bb 9>; [15.52%]
> +      the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
> +      k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
> +      to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
> +      those stmts even for negative k_32 and the value ranges would be no
> +      longer guaranteed and so the optimization would be invalid.  */
> +      if (opcode == ERROR_MARK)
> +     {
> +       gimple *g = SSA_NAME_DEF_STMT (rhs2);
> +       basic_block bb2 = gimple_bb (g);
> +       if (bb2
> +           && bb2 != first_bb
> +           && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
> +         {
> +           /* As an exception, handle a few common cases.  */
> +           if (gimple_assign_cast_p (g)
> +               && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g)))
> +               && TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (g)))
> +               && (TYPE_PRECISION (TREE_TYPE (rhs2))
> +                   > TYPE_PRECISION (TREE_TYPE (gimple_assign_rhs1 (g)))))
> +             /* Zero-extension is always ok.  */ ;
> +           else if (is_gimple_assign (g)
> +                    && gimple_assign_rhs_code (g) == BIT_AND_EXPR
> +                    && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
> +                    && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
> +             /* Masking with INTEGER_CST with MSB clear is always ok
> +                too.  */ ;
> +           else
> +             continue;
> +         }
> +     }
> +
>        wide_int nz = get_nonzero_bits (rhs2);
>        if (wi::neg_p (nz))
>       continue;
> @@ -3269,11 +3324,12 @@ optimize_range_tests_var_bound (enum tre
>     maybe_optimize_range_tests for inter-bb range optimization.
>     In that case if oe->op is NULL, oe->id is bb->index whose
>     GIMPLE_COND is && or ||ed into the test, and oe->rank says
> -   the actual opcode.  */
> +   the actual opcode.
> +   FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
>  
>  static bool
>  optimize_range_tests (enum tree_code opcode,
> -                   vec<operand_entry *> *ops)
> +                   vec<operand_entry *> *ops, basic_block first_bb)
>  {
>    unsigned int length = ops->length (), i, j, first;
>    operand_entry *oe;
> @@ -3353,7 +3409,7 @@ optimize_range_tests (enum tree_code opc
>    any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
>                                                  ops, ranges);
>    any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
> -                                              ranges);
> +                                              ranges, first_bb);
>  
>    if (any_changes && opcode != ERROR_MARK)
>      {
> @@ -4100,7 +4156,7 @@ maybe_optimize_range_tests (gimple *stmt
>       break;
>      }
>    if (ops.length () > 1)
> -    any_changes = optimize_range_tests (ERROR_MARK, &ops);
> +    any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
>    if (any_changes)
>      {
>        unsigned int idx, max_idx = 0;
> @@ -5855,7 +5911,7 @@ reassociate_bb (basic_block bb)
>                 if (is_vector)
>                   optimize_vec_cond_expr (rhs_code, &ops);
>                 else
> -                 optimize_range_tests (rhs_code, &ops);
> +                 optimize_range_tests (rhs_code, &ops, NULL);
>               }
>  
>             if (rhs_code == MULT_EXPR && !is_vector)
> --- gcc/testsuite/gcc.c-torture/execute/pr85529-1.c.jj        2018-04-26 
> 16:42:09.437704149 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr85529-1.c   2018-04-26 
> 16:41:34.769689637 +0200
> @@ -0,0 +1,28 @@
> +/* PR tree-optimization/85529 */
> +
> +struct S { int a; };
> +
> +int b, c = 1, d, e, f;
> +static int g;
> +volatile struct S s;
> +
> +signed char
> +foo (signed char i, int j)
> +{
> +  return i < 0 ? i : i << j;
> +}
> +
> +int
> +main ()
> +{
> +  signed char k = -83;
> +  if (!d)
> +    goto L;
> +  k = e || f;
> +L:
> +  for (; b < 1; b++)
> +    s.a != (k < foo (k, 2) && (c = k = g));
> +  if (c != 1)
> +    __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.c-torture/execute/pr85529-2.c.jj        2018-04-26 
> 16:42:12.370705372 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr85529-2.c   2018-04-26 
> 16:41:19.377683198 +0200
> @@ -0,0 +1,25 @@
> +/* PR tree-optimization/85529 */
> +
> +__attribute__((noipa)) int
> +foo (int x)
> +{
> +  x &= 63;
> +  x -= 50;
> +  x |= 1;
> +  if (x < 0)
> +    return 1;
> +  int y = x >> 2;
> +  if (x >= y)
> +    return 1;
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +  for (i = 0; i < 63; i++)
> +    if (foo (i) != 1)
> +      __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/pr85529.c.jj 2018-04-26 16:42:33.566714252 +0200
> +++ gcc/testsuite/gcc.dg/pr85529.c    2018-04-26 14:56:03.534264237 +0200
> @@ -0,0 +1,27 @@
> +/* PR tree-optimization/85529 */
> +/* { dg-do run } */
> +/* { dg-options "-O2 -fno-ssa-phiopt" } */
> +
> +__attribute__((noinline, noclone)) int
> +foo (int x)
> +{
> +  x &= 31;
> +  x -= 25;
> +  x *= 2;
> +  if (x < 0)
> +    return 1;
> +  int y = x >> 2;
> +  if (x >= y)
> +    return 1;
> +  return 0;
> +}
> +
> +int
> +main ()
> +{
> +  int i;
> +  for (i = 0; i < 63; i++)
> +    if (foo (i) != 1)
> +      __builtin_abort ();
> +  return 0;
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)

Reply via email to