Richard Biener <rguent...@suse.de> writes:
> niter analysis uses multiple_of_p which currently assumes
> operations like MULT_EXPR do not wrap.  We've got to rely on this
> for optimizing size expressions like those in DECL_SIZE and those
> generally use unsigned arithmetic with no indication that they
> are not expected to wrap.  To preserve that the following adds
> a parameter to multiple_of_p, defaulted to true, indicating that
> the TOP expression is not expected to wrap for outer computations
> in TYPE.  This mostly follows a patch proposed by Bin last year
> with the conversion behavior added.
>
> Applying to all users the new effect is that upon type conversions
> in the TOP expression the behavior will switch to honor
> TYPE_OVERFLOW_UNDEFINED for the converted sub-expressions.
>
> The patch also changes the occurance in niter analysis that we
> know is problematic and we have testcases for to pass false
> to multiple_of_p.  The patch also contains a change to the
> PR72817 fix from Bin to avoid regressing gcc.dg/tree-ssa/loop-42.c.
>
> The intent for stage1 is to introduce a size_multiple_of_p and
> internalize the added parameter so all multiple_of_p users will
> honor TYPE_OVERFLOW_UNDEFINED and users dealing with size expressions
> need to be switched to size_multiple_of_p.
>
> Boostrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
> 2022-01-26  Richard Biener  <rguent...@suse.de>
>
>       PR tree-optimization/100499
>       * fold-const.h (multiple_of_p): Add nowrap parameter, defaulted
>       to true.
>       * fold-const.cc (multiple_of_p): Likewise.  Honor it for
>       MULT_EXPR, PLUS_EXPR and MINUS_EXPR and pass it along,
>       switching to false for conversions.
>       * tree-ssa-loop-niter.cc (number_of_iterations_ne): Do not
>       claim the outermost expression does not wrap when calling
>       multiple_of_p.  Refactor the check done to check the
>       original IV, avoiding a bias that might wrap.
>
>       * gcc.dg/torture/pr100499-1.c: New testcase.
>       * gcc.dg/torture/pr100499-2.c: Likewise.
>       * gcc.dg/torture/pr100499-3.c: Likewise.

LGTM FWIW, but…

> Co-authored-by: Bin Cheng  <bin.ch...@linux.alibaba.com>
> ---
>  gcc/fold-const.cc                         | 80 +++++++++++++++--------
>  gcc/fold-const.h                          |  2 +-
>  gcc/testsuite/gcc.dg/torture/pr100499-1.c | 27 ++++++++
>  gcc/testsuite/gcc.dg/torture/pr100499-2.c | 16 +++++
>  gcc/testsuite/gcc.dg/torture/pr100499-3.c | 14 ++++
>  gcc/tree-ssa-loop-niter.cc                | 52 ++++++---------
>  6 files changed, 130 insertions(+), 61 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/torture/pr100499-3.c
>
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index 12732d39c79..2578a86ca1a 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -14073,10 +14073,16 @@ fold_binary_initializer_loc (location_t loc, 
> tree_code code, tree type,
>       SAVE_EXPR (I) * SAVE_EXPR (J)
>  
>     (where the same SAVE_EXPR (J) is used in the original and the
> -   transformed version).  */
> +   transformed version).
> +
> +   NOWRAP specifies whether all outer operations in TYPE should
> +   be considered not wrapping.  Any type conversion within TOP acts
> +   as a barrier and we will fall back to NOWRAP being false.
> +   NOWRAP is mostly used to treat expressions in TYPE_SIZE and friends
> +   as not wrapping even though they are generally using unsigned arithmetic. 
>  */
>  
>  int
> -multiple_of_p (tree type, const_tree top, const_tree bottom)
> +multiple_of_p (tree type, const_tree top, const_tree bottom, bool nowrap)
>  {
>    gimple *stmt;
>    tree op1, op2;
> @@ -14094,10 +14100,17 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>        a multiple of BOTTOM then TOP is a multiple of BOTTOM.  */
>        if (!integer_pow2p (bottom))
>       return 0;
> -      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> -           || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> +      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
> +           || multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
>  
>      case MULT_EXPR:
> +      /* If the multiplication can wrap we cannot recurse further unless
> +      the bottom is a power of two which is where wrapping does not
> +      matter.  */
> +      if (!nowrap
> +       && !TYPE_OVERFLOW_UNDEFINED (type)
> +       && !integer_pow2p (bottom))
> +     return 0;
>        if (TREE_CODE (bottom) == INTEGER_CST)
>       {
>         op1 = TREE_OPERAND (top, 0);
> @@ -14106,24 +14119,24 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>           std::swap (op1, op2);
>         if (TREE_CODE (op2) == INTEGER_CST)
>           {
> -           if (multiple_of_p (type, op2, bottom))
> +           if (multiple_of_p (type, op2, bottom, nowrap))
>               return 1;
>             /* Handle multiple_of_p ((x * 2 + 2) * 4, 8).  */
> -           if (multiple_of_p (type, bottom, op2))
> +           if (multiple_of_p (type, bottom, op2, nowrap))
>               {
>                 widest_int w = wi::sdiv_trunc (wi::to_widest (bottom),
>                                                wi::to_widest (op2));
>                 if (wi::fits_to_tree_p (w, TREE_TYPE (bottom)))
>                   {
>                     op2 = wide_int_to_tree (TREE_TYPE (bottom), w);
> -                   return multiple_of_p (type, op1, op2);
> +                   return multiple_of_p (type, op1, op2, nowrap);
>                   }
>               }
> -           return multiple_of_p (type, op1, bottom);
> +           return multiple_of_p (type, op1, bottom, nowrap);
>           }
>       }
> -      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> -           || multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> +      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
> +           || multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
>  
>      case LSHIFT_EXPR:
>        /* Handle X << CST as X * (1 << CST) and only process the constant.  */
> @@ -14135,29 +14148,38 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>             wide_int mul_op
>               = wi::one (TYPE_PRECISION (type)) << wi::to_wide (op1);
>             return multiple_of_p (type,
> -                                 wide_int_to_tree (type, mul_op), bottom);
> +                                 wide_int_to_tree (type, mul_op), bottom,
> +                                 nowrap);
>           }
>       }
>        return 0;
>  
>      case MINUS_EXPR:
> -      /* It is impossible to prove if op0 - op1 is multiple of bottom
> -      precisely, so be conservative here checking if both op0 and op1
> -      are multiple of bottom.  Note we check the second operand first
> -      since it's usually simpler.  */
> -      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> -           && multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> -
>      case PLUS_EXPR:
> -      /* The same as MINUS_EXPR, but handle cases like op0 + 0xfffffffd
> -      as op0 - 3 if the expression has unsigned type.  For example,
> -      (X / 3) + 0xfffffffd is multiple of 3, but 0xfffffffd is not.  */
> +      /* If the addition or subtraction can wrap we cannot recurse further
> +      unless bottom is a power of two which is where wrapping does not
> +      matter.  */
> +      if (!nowrap
> +       && !TYPE_OVERFLOW_UNDEFINED (type)
> +       && !integer_pow2p (bottom))
> +     return 0;
> +
> +      /* Handle cases like op0 + 0xfffffffd as op0 - 3 if the expression has
> +      unsigned type.  For example, (X / 3) + 0xfffffffd is multiple of 3,
> +      but 0xfffffffd is not.  */
>        op1 = TREE_OPERAND (top, 1);
> -      if (TYPE_UNSIGNED (type)
> +      if (TREE_CODE (top) == PLUS_EXPR
> +       && nowrap
> +       && TYPE_UNSIGNED (type)
>         && TREE_CODE (op1) == INTEGER_CST && tree_int_cst_sign_bit (op1))
>       op1 = fold_build1 (NEGATE_EXPR, type, op1);
> -      return (multiple_of_p (type, op1, bottom)
> -           && multiple_of_p (type, TREE_OPERAND (top, 0), bottom));
> +
> +      /* It is impossible to prove if op0 +- op1 is multiple of bottom
> +      precisely, so be conservative here checking if both op0 and op1
> +      are multiple of bottom.  Note we check the second operand first
> +      since it's usually simpler.  */
> +      return (multiple_of_p (type, op1, bottom, nowrap)
> +           && multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap));
>  
>      CASE_CONVERT:
>        /* Can't handle conversions from non-integral or wider integral type.  
> */
> @@ -14165,15 +14187,17 @@ multiple_of_p (tree type, const_tree top, 
> const_tree bottom)
>         || (TYPE_PRECISION (type)
>             < TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (top, 0)))))
>       return 0;
> +      /* NOWRAP only extends to operations in the outermost type so
> +      make sure to strip it off here.  */
>        return multiple_of_p (TREE_TYPE (TREE_OPERAND (top, 0)),
> -                         TREE_OPERAND (top, 0), bottom);
> +                         TREE_OPERAND (top, 0), bottom, false);
>  
>      case SAVE_EXPR:
> -      return multiple_of_p (type, TREE_OPERAND (top, 0), bottom);
> +      return multiple_of_p (type, TREE_OPERAND (top, 0), bottom, nowrap);
>  
>      case COND_EXPR:
> -      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom)
> -           && multiple_of_p (type, TREE_OPERAND (top, 2), bottom));
> +      return (multiple_of_p (type, TREE_OPERAND (top, 1), bottom, nowrap)
> +           && multiple_of_p (type, TREE_OPERAND (top, 2), bottom, nowrap));
>  
>      case INTEGER_CST:
>        if (TREE_CODE (bottom) != INTEGER_CST
> diff --git a/gcc/fold-const.h b/gcc/fold-const.h
> index 394a67ece79..d36bfc813cf 100644
> --- a/gcc/fold-const.h
> +++ b/gcc/fold-const.h
> @@ -96,7 +96,7 @@ extern void fold_overflow_warning (const char*, enum 
> warn_strict_overflow_code);
>  extern enum tree_code fold_div_compare (enum tree_code, tree, tree,
>                                       tree *, tree *, bool *);
>  extern bool operand_equal_p (const_tree, const_tree, unsigned int flags = 0);
> -extern int multiple_of_p (tree, const_tree, const_tree);
> +extern int multiple_of_p (tree, const_tree, const_tree, bool = true);
>  #define omit_one_operand(T1,T2,T3)\
>     omit_one_operand_loc (UNKNOWN_LOCATION, T1, T2, T3)
>  extern tree omit_one_operand_loc (location_t, tree, tree, tree);
> diff --git a/gcc/testsuite/gcc.dg/torture/pr100499-1.c 
> b/gcc/testsuite/gcc.dg/torture/pr100499-1.c
> new file mode 100644
> index 00000000000..9511c323505
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr100499-1.c
> @@ -0,0 +1,27 @@
> +/* { dg-do run } */
> +
> +typedef __UINT16_TYPE__ uint16_t;
> +typedef __INT32_TYPE__ int32_t;
> +static uint16_t g_2823 = 0xEC75L;
> +static uint16_t g_116 = 0xBC07L;
> +
> +static uint16_t
> +safe_mul_func_uint16_t_u_u(uint16_t ui1, uint16_t ui2)
> +{
> +  return ((unsigned int)ui1) * ((unsigned int)ui2);
> +}
> +
> +int main ()
> +{
> +  uint16_t l_2815 = 0xffff;
> +  uint16_t *l_2821 = &g_116;
> +  uint16_t *l_2822 = &g_2823;
> +
> +lbl_2826:
> +  l_2815 &= 0x1eae;
> +  if (safe_mul_func_uint16_t_u_u(((*l_2821) = l_2815), (--(*l_2822))))
> +    goto lbl_2826;
> +  if (g_2823 != 32768)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/torture/pr100499-2.c 
> b/gcc/testsuite/gcc.dg/torture/pr100499-2.c
> new file mode 100644
> index 00000000000..999f931806a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr100499-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do run } */
> +
> +unsigned char ag = 55;
> +unsigned i;
> +int main()
> +{
> +  unsigned char c;
> +  unsigned char a = ag;
> +d:
> +  c = a-- * 52;
> +  if (c)
> +    goto d;
> +  if (a != 255)
> +    __builtin_abort ();
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.dg/torture/pr100499-3.c 
> b/gcc/testsuite/gcc.dg/torture/pr100499-3.c
> new file mode 100644
> index 00000000000..01be1e50690
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr100499-3.c
> @@ -0,0 +1,14 @@
> +/* { dg-do run } */
> +
> +int a = 0;
> +unsigned char b = 0;
> +
> +int main() {
> +  a - 6;
> +  for (; a >= -13; a = a - 8)
> +    while((unsigned char)(b-- * 6))
> +      ;
> +  if (b != 127)
> +    __builtin_abort();
> +  return 0;
> +}
> diff --git a/gcc/tree-ssa-loop-niter.cc b/gcc/tree-ssa-loop-niter.cc
> index d33095b8e03..318d10c8fac 100644
> --- a/gcc/tree-ssa-loop-niter.cc
> +++ b/gcc/tree-ssa-loop-niter.cc
> @@ -1042,17 +1042,21 @@ number_of_iterations_ne (class loop *loop, tree type, 
> affine_iv *iv,
>           new_base = base - step > FINAL ; step < 0
>                                            && base - step doesn't overflow
>  
> -       2') |FINAL - new_base| is an exact multiple of step.
> -
> -     Please refer to PR34114 as an example of loop-ch's impact, also refer
> -     to PR72817 as an example why condition 2') is necessary.
> +     Please refer to PR34114 as an example of loop-ch's impact.
>  
>       Note, for NE_EXPR, base equals to FINAL is a special case, in
> -     which the loop exits immediately, and the iv does not overflow.  */
> +     which the loop exits immediately, and the iv does not overflow.
> +
> +     Also note, we prove condition 2) by checking base and final seperately
> +     along with condition 1) or 1').  */
>    if (!niter->control.no_overflow
> -      && (integer_onep (s) || multiple_of_p (type, c, s)))
> +      && (integer_onep (s)
> +       || (multiple_of_p (type, fold_convert (niter_type, iv->base), s,
> +                          false)
> +           && multiple_of_p (type, fold_convert (niter_type, final), s,
> +                             false))))

…maybe it would be worth evaluating these folds unconditionally at the
start of the function, since they're needed in the calculation of "c"
as well.

I guess that's a pre-existing thing though.  You're just replacing folds
later on rather than adding to the total, so probably not worth another
respin.

Thanks,
Richard

>      {
> -      tree t, cond, new_c, relaxed_cond = boolean_false_node;
> +      tree t, cond, relaxed_cond = boolean_false_node;
>  
>        if (tree_int_cst_sign_bit (iv->step))
>       {
> @@ -1066,12 +1070,8 @@ number_of_iterations_ne (class loop *loop, tree type, 
> affine_iv *iv,
>             if (integer_nonzerop (t))
>               {
>                 t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
> -               new_c = fold_build2 (MINUS_EXPR, niter_type,
> -                                    fold_convert (niter_type, t),
> -                                    fold_convert (niter_type, final));
> -               if (multiple_of_p (type, new_c, s))
> -                 relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node,
> -                                             t, final);
> +               relaxed_cond = fold_build2 (GT_EXPR, boolean_type_node, t,
> +                                           final);
>               }
>           }
>       }
> @@ -1087,12 +1087,8 @@ number_of_iterations_ne (class loop *loop, tree type, 
> affine_iv *iv,
>             if (integer_nonzerop (t))
>               {
>                 t = fold_build2 (MINUS_EXPR, type, iv->base, iv->step);
> -               new_c = fold_build2 (MINUS_EXPR, niter_type,
> -                                    fold_convert (niter_type, final),
> -                                    fold_convert (niter_type, t));
> -               if (multiple_of_p (type, new_c, s))
> -                 relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node,
> -                                             t, final);
> +               relaxed_cond = fold_build2 (LT_EXPR, boolean_type_node, t,
> +                                           final);
>               }
>           }
>       }
> @@ -1102,19 +1098,11 @@ number_of_iterations_ne (class loop *loop, tree type, 
> affine_iv *iv,
>       t = simplify_using_initial_conditions (loop, relaxed_cond);
>  
>        if (t && integer_onep (t))
> -     niter->control.no_overflow = true;
> -    }
> -
> -  /* First the trivial cases -- when the step is 1.  */
> -  if (integer_onep (s))
> -    {
> -      niter->niter = c;
> -      return true;
> -    }
> -  if (niter->control.no_overflow && multiple_of_p (type, c, s))
> -    {
> -      niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, c, s);
> -      return true;
> +     {
> +       niter->control.no_overflow = true;
> +       niter->niter = fold_build2 (EXACT_DIV_EXPR, niter_type, c, s);
> +       return true;
> +     }
>      }
>  
>    /* Let nsd (step, size of mode) = d.  If d does not divide c, the loop

Reply via email to