On 2021-07-01 15:22, Bin.Cheng wrote:
On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:

For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
    cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, is this ok for trunk?

gcc/ChangeLog:

        PR tree-optimization/101145
        * tree-ssa-loop-niter.c
        (number_of_iterations_until_wrap): New function.
        (number_of_iterations_lt): Invoke above function.
        (adjust_cond_for_loop_until_wrap):
        Merge to number_of_iterations_until_wrap.
        (number_of_iterations_cond): Update invokes for
        adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

        PR tree-optimization/101145
        * gcc.dg/vect/pr101145.c: New test.
        * gcc.dg/vect/pr101145.inc: New test.
        * gcc.dg/vect/pr101145_1.c: New test.
        * gcc.dg/vect/pr101145_2.c: New test.
        * gcc.dg/vect/pr101145_3.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr101145.c | 187 +++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +++++++++
 gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
 gcc/tree-ssa-loop-niter.c              | 150 +++++++++++---------
 6 files changed, 380 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c


diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b5add827018..06db6a36ef8 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, affine_iv *iv1,
     }
 }

+/* Determines number of iterations of loop whose ending condition
+   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
+   The number of iterations is stored to NITER.  */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0, + affine_iv *iv1, class tree_niter_desc *niter)
+{
+  tree niter_type = unsigned_type_for (type);
+  tree max, min;
+
+  if (POINTER_TYPE_P (type))
+    {
+      max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
+      min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
+    }
+  else
+    {
+      max = TYPE_MAX_VALUE (type);
+      min = TYPE_MIN_VALUE (type);
+    }
+
+  tree high = max, low = min, one = build_int_cst (niter_type, 1);
+  tree step;
+
+  /* n < {base, C}. */
+ if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
+      && !tree_int_cst_sign_bit (iv1->step))
+    {
+      step = iv1->step;
+ niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
max/iv1->base could be of pointer type, not sure if this is canonical though.
Thanks. Pointer needs careful attention. I added case pr101145_3.c for pointer, as test, the iteration number is 7: 0xffffffffffffffe4 - 0xffffffffffffffff,
where pointer type is pointer to int: "int *".  It works as expected.
I notice in number_of_iterations_lt, there are code likes:
    delta = fold_build2 (MINUS_EXPR, niter_type,
                         fold_convert (niter_type, iv1->base),
                         fold_convert (niter_type, iv0->base));
This would also be ok.


+      if (TREE_CODE (iv1->base) == INTEGER_CST)
+       low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
+      else if (TREE_CODE (iv0->base) == INTEGER_CST)
+       low = iv0->base;
+    }
+  /* {base, -C} < n. */
+  else if (TREE_CODE (iv0->step) == INTEGER_CST
+ && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
+    {
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step); + niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
+      if (TREE_CODE (iv0->base) == INTEGER_CST)
+       high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
+      else if (TREE_CODE (iv1->base) == INTEGER_CST)
+       high = iv1->base;
+    }
+  else
+    return false;
+
+  /* (delta + step - 1) / step */
+  step = fold_convert (niter_type, step);
+  niter->niter = fold_convert (niter_type, niter->niter);
+ niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step); + niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, step);
+
+  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
+  m = fold_convert (niter_type, m);
+  mpz_t mstep, tmp, mmax;
+  mpz_init (mstep);
+  mpz_init (tmp);
+  mpz_init (mmax);
+  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
+  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
+  mpz_add (tmp, mmax, mstep);
+  mpz_sub_ui (tmp, tmp, 1);
+  mpz_fdiv_q (tmp, tmp, mstep);
+ niter->max = widest_int::from (wi::from_mpz (niter_type, tmp, false),
+                                TYPE_SIGN (niter_type));
This computation is similar to function number_of_iterations_lt, could
we factor it out into an independent function?
Thanks!  Will update as you said.


+  mpz_clear (mstep);
+  mpz_clear (tmp);
+
+  niter->may_be_zero
+    = fold_build2 (LE_EXPR, boolean_type_node, iv1->base, iv0->base);
If iv0->base and iv1->base are constant and iv1->base <= iv0->base,
the number of iteration is actually zero, but here we rely on
may_be_zero (== true), which loses information.  Could we specially
handle this case and do a fast return?
As tests, if iv1->base <= iv0->base in user source code, this function
will not be called, even number_of_iterations_exit_assumptions will not
be called.  But it is still may be possible to run into this function if
some previous optimizations generate constant bases.
Then a fast return would be useful, I will update the patch.


Could you test this on some more targets(x86, aarch64) please? Otherwise LGTM.
I will have some tests on other targets. Hope there is no difference between
targets for this patch :)

BR,
Jiufu Guo


Thanks,
bin
+
+  niter->control.no_overflow = false;
+
+  return true;
+}
+
 /* Determines number of iterations of loop whose ending condition
    is IV0 < IV1.  TYPE is the type of the iv.  The number of
    iterations is stored to NITER.  BNDS bounds the difference
@@ -1501,6 +1581,11 @@ number_of_iterations_lt (class loop *loop, tree type, affine_iv *iv0,
       niter->bound = iv0->base;
     }

+  /* {base, -C} < n,  or n < {base, C} */
+  if (tree_int_cst_sign_bit (iv0->step)
+ || (!integer_zerop (iv1->step) && !tree_int_cst_sign_bit (iv1->step))) + return number_of_iterations_until_wrap (loop, type, iv0, iv1, niter);
+
   delta = fold_build2 (MINUS_EXPR, niter_type,
                       fold_convert (niter_type, iv1->base),
                       fold_convert (niter_type, iv0->base));
@@ -1665,62 +1750,6 @@ dump_affine_iv (FILE *file, affine_iv *iv)
     }
 }

-/* Given exit condition IV0 CODE IV1 in TYPE, this function adjusts
-   the condition for loop-until-wrap cases.  For example:
-     (unsigned){8, -1}_loop < 10        => {0, 1} != 9
-     10 < (unsigned){0, max - 7}_loop   => {0, 1} != 8
-   Return true if condition is successfully adjusted.  */
-
-static bool
-adjust_cond_for_loop_until_wrap (tree type, affine_iv *iv0, tree_code *code,
-                                affine_iv *iv1)
-{
-  /* Only support simple cases for the moment.  */
-  if (TREE_CODE (iv0->base) != INTEGER_CST
-      || TREE_CODE (iv1->base) != INTEGER_CST)
-    return false;
-
-  tree niter_type = unsigned_type_for (type), high, low;
-  /* Case: i-- < 10.  */
-  if (integer_zerop (iv1->step))
-    {
-      /* TODO: Should handle case in which abs(step) != 1.  */
-      if (!integer_minus_onep (iv0->step))
-       return false;
-      /* Give up on infinite loop.  */
-      if (*code == LE_EXPR
-         && tree_int_cst_equal (iv1->base, TYPE_MAX_VALUE (type)))
-       return false;
-      high = fold_build2 (PLUS_EXPR, niter_type,
-                         fold_convert (niter_type, iv0->base),
-                         build_int_cst (niter_type, 1));
-      low = fold_convert (niter_type, TYPE_MIN_VALUE (type));
-    }
-  else if (integer_zerop (iv0->step))
-    {
-      /* TODO: Should handle case in which abs(step) != 1.  */
-      if (!integer_onep (iv1->step))
-       return false;
-      /* Give up on infinite loop.  */
-      if (*code == LE_EXPR
-         && tree_int_cst_equal (iv0->base, TYPE_MIN_VALUE (type)))
-       return false;
-      high = fold_convert (niter_type, TYPE_MAX_VALUE (type));
-      low = fold_build2 (MINUS_EXPR, niter_type,
-                        fold_convert (niter_type, iv1->base),
-                        build_int_cst (niter_type, 1));
-    }
-  else
-    gcc_unreachable ();
-
-  iv0->base = low;
-  iv0->step = fold_convert (niter_type, integer_one_node);
-  iv1->base = high;
-  iv1->step = build_int_cst (niter_type, 0);
-  *code = NE_EXPR;
-  return true;
-}
-
/* Determine the number of iterations according to condition (for staying inside loop) which compares two induction variables using comparison operator CODE. The induction variable on left side of the comparison
@@ -1855,15 +1884,6 @@ number_of_iterations_cond (class loop *loop,
       return true;
     }

- /* Handle special case loops: while (i-- < 10) and while (10 < i++) by
-     adjusting iv0, iv1 and code.  */
-  if (code != NE_EXPR
-      && (tree_int_cst_sign_bit (iv0->step)
-         || (!integer_zerop (iv1->step)
-             && !tree_int_cst_sign_bit (iv1->step)))
-      && !adjust_cond_for_loop_until_wrap (type, iv0, &code, iv1))
-    return false;
-
/* OK, now we know we have a senseful loop. Handle several cases, depending
      on what comparison operator is used.  */
   bound_difference (loop, iv1->base, iv0->base, &bnds);
--
2.17.1

Reply via email to