[PATCH] Analyze niter for until-wrap condition [PR101145]

2021-06-30 Thread Jiufu Guo via Gcc-patches
For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, is this ok for trunk?

gcc/ChangeLog:

PR tree-optimization/101145
* tree-ssa-loop-niter.c
(number_of_iterations_until_wrap): New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 +
 gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
 gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
 gcc/tree-ssa-loop-niter.c  | 150 +++-
 6 files changed, 380 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{  // infininate 
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = 32;
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n + 32;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n - 1;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+fail++;
+  
+  l = n - 32;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+fail++;
+
+  l = UINT_MAX;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n + 32;
+  res = foo_1 (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n + 32;
+  res = foo_2 (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n;
+  res = foo_3 (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n - 1;
+  res = foo_3 (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n - 2;
+  res = foo_3 (a, b, l, n);
+  if (res != l + 1)
+fail++;
+
+  res = foo_5 (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  n = 64;
+  l = n - 32;
+  res = bar (a, b, l, n);
+  res++;
+  if (res != 0)
+fail++;
+
+  l = n;
+  res = bar (a, b, l, n);
+  res++;
+

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-01 Thread Bin.Cheng via Gcc-patches
On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
 wrote:
>
> For code like:
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i > val; ++i)
> cnt++;
>   return cnt;
> }
>
> The number of iterations should be about UINT_MAX - start.
>
> There is function adjust_cond_for_loop_until_wrap which
> handles similar work for const bases.
> Like adjust_cond_for_loop_until_wrap, this patch enhance
> function number_of_iterations_cond/number_of_iterations_lt
> to analyze number of iterations for this kind of loop.
>
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>
> gcc/ChangeLog:
>
> PR tree-optimization/101145
> * tree-ssa-loop-niter.c
> (number_of_iterations_until_wrap): New function.
> (number_of_iterations_lt): Invoke above function.
> (adjust_cond_for_loop_until_wrap):
> Merge to number_of_iterations_until_wrap.
> (number_of_iterations_cond): Update invokes for
> adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/101145
> * gcc.dg/vect/pr101145.c: New test.
> * gcc.dg/vect/pr101145.inc: New test.
> * gcc.dg/vect/pr101145_1.c: New test.
> * gcc.dg/vect/pr101145_2.c: New test.
> * gcc.dg/vect/pr101145_3.c: New test.
> ---
>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 +
>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>  gcc/tree-ssa-loop-niter.c  | 150 +++-
>  6 files changed, 380 insertions(+), 65 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>

> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b5add827018..06db6a36ef8 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv *iv0, 
> affine_iv *iv1,
>  }
>  }
>
> +/* Determines number of iterations of loop whose ending condition
> +   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
> +   The number of iterations is stored to NITER.  */
> +
> +static bool
> +number_of_iterations_until_wrap (class loop *, tree type, affine_iv *iv0,
> +affine_iv *iv1, class tree_niter_desc *niter)
> +{
> +  tree niter_type = unsigned_type_for (type);
> +  tree max, min;
> +
> +  if (POINTER_TYPE_P (type))
> +{
> +  max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
> +  min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
> +}
> +  else
> +{
> +  max = TYPE_MAX_VALUE (type);
> +  min = TYPE_MIN_VALUE (type);
> +}
> +
> +  tree high = max, low = min, one = build_int_cst (niter_type, 1);
> +  tree step;
> +
> +  /* n < {base, C}. */
> +  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == INTEGER_CST
> +  && !tree_int_cst_sign_bit (iv1->step))
> +{
> +  step = iv1->step;
> +  niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, iv1->base);
max/iv1->base could be of pointer type, not sure if this is canonical though.

> +  if (TREE_CODE (iv1->base) == INTEGER_CST)
> +   low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
> +  else if (TREE_CODE (iv0->base) == INTEGER_CST)
> +   low = iv0->base;
> +}
> +  /* {base, -C} < n. */
> +  else if (TREE_CODE (iv0->step) == INTEGER_CST
> +  && tree_int_cst_sign_bit (iv0->step) && integer_zerop (iv1->step))
> +{
> +  step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), iv0->step);
> +  niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, min);
> +  if (TREE_CODE (iv0->base) == INTEGER_CST)
> +   high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
> +  else if (TREE_CODE (iv1->base) == INTEGER_CST)
> +   high = iv1->base;
> +}
> +  else
> +return false;
> +
> +  /* (delta + step - 1) / step */
> +  step = fold_convert (niter_type, step);
> +  niter->niter = fold_convert (niter_type, niter->niter);
> +  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, step);
> +  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, niter->niter, 
> step);
> +
> +  tree m = fold_build2 (MINUS_EXPR, niter_type, high, low);
> +  m = fold_convert (niter_type, m);
> +  mpz_t mstep, tmp, mmax;
> +  mpz_init (mstep);
> +  mpz_init (tmp);
> +  mpz_init (mmax);
> +  wi::to_mpz (wi::to_wide (step), mstep, UNSIGNED);
> +  wi::to_mpz (wi::to_wide (m), mmax, UNSIGNED);
> +  mpz_add (tmp, mmax, mstep);
> +  mpz_sub_ui (tmp, 

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-01 Thread guojiufu via Gcc-patches

On 2021-07-01 15:22, Bin.Cheng wrote:

On Thu, Jul 1, 2021 at 10:06 AM Jiufu Guo via Gcc-patches
 wrote:


For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, is this ok for trunk?

gcc/ChangeLog:

PR tree-optimization/101145
* tree-ssa-loop-niter.c
(number_of_iterations_until_wrap): New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 
+

 gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
 gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
 gcc/tree-ssa-loop-niter.c  | 150 +++-
 6 files changed, 380 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c




diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b5add827018..06db6a36ef8 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv 
*iv0, affine_iv *iv1,

 }
 }

+/* Determines number of iterations of loop whose ending condition
+   is IV0 < IV1 which likes:  {base, -C} < n,  or n < {base, C}.
+   The number of iterations is stored to NITER.  */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv 
*iv0,
+affine_iv *iv1, class tree_niter_desc 
*niter)

+{
+  tree niter_type = unsigned_type_for (type);
+  tree max, min;
+
+  if (POINTER_TYPE_P (type))
+{
+  max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
+  min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
+}
+  else
+{
+  max = TYPE_MAX_VALUE (type);
+  min = TYPE_MIN_VALUE (type);
+}
+
+  tree high = max, low = min, one = build_int_cst (niter_type, 1);
+  tree step;
+
+  /* n < {base, C}. */
+  if (integer_zerop (iv0->step) && TREE_CODE (iv1->step) == 
INTEGER_CST

+  && !tree_int_cst_sign_bit (iv1->step))
+{
+  step = iv1->step;
+  niter->niter = fold_build2 (MINUS_EXPR, niter_type, max, 
iv1->base);
max/iv1->base could be of pointer type, not sure if this is canonical 
though.
Thanks.  Pointer needs careful attention.  I added case pr101145_3.c for 
pointer,
as test, the iteration number is 7: 0xffe4 - 
0x,

where pointer type is pointer to int: "int *".  It works as expected.
I notice in number_of_iterations_lt, there are code likes:
delta = fold_build2 (MINUS_EXPR, niter_type,
 fold_convert (niter_type, iv1->base),
 fold_convert (niter_type, iv0->base));
This would also be ok.




+  if (TREE_CODE (iv1->base) == INTEGER_CST)
+   low = fold_build2 (MINUS_EXPR, type, iv1->base, one);
+  else if (TREE_CODE (iv0->base) == INTEGER_CST)
+   low = iv0->base;
+}
+  /* {base, -C} < n. */
+  else if (TREE_CODE (iv0->step) == INTEGER_CST
+  && tree_int_cst_sign_bit (iv0->step) && integer_zerop 
(iv1->step))

+{
+  step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv0->step), 
iv0->step);
+  niter->niter = fold_build2 (MINUS_EXPR, niter_type, iv0->base, 
min);

+  if (TREE_CODE (iv0->base) == INTEGER_CST)
+   high = fold_build2 (PLUS_EXPR, type, iv0->base, one);
+  else if (TREE_CODE (iv1->base) == INTEGER_CST)
+   high = iv1->base;
+}
+  else
+return false;
+
+  /* (delta + step - 1) / step */
+  step = fold_convert (niter_type, step);
+  niter->niter = fold_convert (niter_type, niter->niter);
+  niter->niter = fold_build2 (PLUS_EXPR, niter_type, niter->niter, 
step);
+  niter->niter = fold_build2 (FLOOR_DIV_EXPR, niter_type, 
niter->niter, step);

+
+  tree m = fold_build2 (MINUS_EX

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-01 Thread Richard Biener
On Thu, 1 Jul 2021, Jiufu Guo wrote:

> For code like:
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i > val; ++i)
> cnt++;
>   return cnt;
> }
> 
> The number of iterations should be about UINT_MAX - start.

For

unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i >= val; ++i)
cnt++;
  return cnt;
}

and val == 0 the loop never terminates.  I don't see anywhere
in the patch that you disregard GE_EXPR and I remember
the code handles GE as well as GT?  From a quick look this is
also not covered by a testcase you add - not exactly sure
how it would materialize in a miscompilation.

> There is function adjust_cond_for_loop_until_wrap which
> handles similar work for const bases.
> Like adjust_cond_for_loop_until_wrap, this patch enhance
> function number_of_iterations_cond/number_of_iterations_lt
> to analyze number of iterations for this kind of loop.
> 
> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/101145
>   * tree-ssa-loop-niter.c
>   (number_of_iterations_until_wrap): New function.
>   (number_of_iterations_lt): Invoke above function.
>   (adjust_cond_for_loop_until_wrap):
>   Merge to number_of_iterations_until_wrap.
>   (number_of_iterations_cond): Update invokes for
>   adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/101145
>   * gcc.dg/vect/pr101145.c: New test.
>   * gcc.dg/vect/pr101145.inc: New test.
>   * gcc.dg/vect/pr101145_1.c: New test.
>   * gcc.dg/vect/pr101145_2.c: New test.
>   * gcc.dg/vect/pr101145_3.c: New test.
> ---
>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 +
>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>  gcc/tree-ssa-loop-niter.c  | 150 +++-
>  6 files changed, 380 insertions(+), 65 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
> 
> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
> b/gcc/testsuite/gcc.dg/vect/pr101145.c
> new file mode 100644
> index 000..74031b031cf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> @@ -0,0 +1,187 @@
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> +#include 
> +
> +unsigned __attribute__ ((noinline))
> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (n < ++l)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
> +{
> +  while (UINT_MAX - 64 < ++l)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  l = UINT_MAX - 32;
> +  while (n < ++l)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (n <= ++l)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{  // infininate 
> +  while (0 <= ++l)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  //no loop
> +  l = UINT_MAX;
> +  while (n < ++l)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  while (--l < n)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
> +{
> +  while (--l < 64)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +unsigned __attribute__ ((noinline))
> +bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
> +{
> +  l = 32;
> +  while (--l < n)
> +*a++ = *b++ + 1;
> +  return l;
> +}
> +
> +
> +int a[3200], b[3200];
> +int fail;
> +
> +int
> +main ()
> +{
> +  unsigned l, n;
> +  unsigned res;
> +  /* l > n*/
> +  n = UINT_MAX - 64;
> +  l = n + 32;
> +  res = foo (a, b, l, n);
> +  if (res != 0)
> +fail++;
> +
> +  l = n;
> +  res = foo (a, b, l, n);
> +  if (res != 0)
> +fail++;
> +
> +  l = n - 1;
> +  res = foo (a, b, l, n);
> +  if (res != l

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-01 Thread guojiufu via Gcc-patches

On 2021-07-01 20:35, Richard Biener wrote:

On Thu, 1 Jul 2021, Jiufu Guo wrote:


For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.


For

unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i >= val; ++i)
cnt++;
  return cnt;
}

and val == 0 the loop never terminates.  I don't see anywhere
in the patch that you disregard GE_EXPR and I remember
the code handles GE as well as GT?  From a quick look this is
also not covered by a testcase you add - not exactly sure
how it would materialize in a miscompilation.


In number_of_iterations_cond, there is code:
   if (code == GE_EXPR || code == GT_EXPR
|| (code == NE_EXPR && integer_zerop (iv0->step)))
  {
std::swap (iv0, iv1);
code = swap_tree_comparison (code);
  }
It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
and LE (val <= i) is converted to LT (val - 1 < i).
So, the code is added to number_of_iterations_lt.

But, this patch leads mis-compilation for unsigned "i >= val" as
above transforms: converting LE (val <= i) to LT (val - 1 < i)
seems not appropriate (e.g where val=0).
Thanks for pointing out this!!!

I would investigate a way to handle this correctly.
A possible way maybe just to return false for this kind of LE.

Any suggestions?




There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, is this ok for trunk?

gcc/ChangeLog:

PR tree-optimization/101145
* tree-ssa-loop-niter.c
(number_of_iterations_until_wrap): New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 
+

 gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
 gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
 gcc/tree-ssa-loop-niter.c  | 150 +++-
 6 files changed, 380 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c

new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{  // infininate
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsign

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-01 Thread Bin.Cheng via Gcc-patches
On Thu, Jul 1, 2021 at 10:15 PM guojiufu via Gcc-patches
 wrote:
>
> On 2021-07-01 20:35, Richard Biener wrote:
> > On Thu, 1 Jul 2021, Jiufu Guo wrote:
> >
> >> For code like:
> >> unsigned foo(unsigned val, unsigned start)
> >> {
> >>   unsigned cnt = 0;
> >>   for (unsigned i = start; i > val; ++i)
> >> cnt++;
> >>   return cnt;
> >> }
> >>
> >> The number of iterations should be about UINT_MAX - start.
> >
> > For
> >
> > unsigned foo(unsigned val, unsigned start)
> > {
> >   unsigned cnt = 0;
> >   for (unsigned i = start; i >= val; ++i)
> > cnt++;
> >   return cnt;
> > }
> >
> > and val == 0 the loop never terminates.  I don't see anywhere
> > in the patch that you disregard GE_EXPR and I remember
> > the code handles GE as well as GT?  From a quick look this is
> > also not covered by a testcase you add - not exactly sure
> > how it would materialize in a miscompilation.
>
> In number_of_iterations_cond, there is code:
> if (code == GE_EXPR || code == GT_EXPR
> || (code == NE_EXPR && integer_zerop (iv0->step)))
>{
>  std::swap (iv0, iv1);
>  code = swap_tree_comparison (code);
>}
> It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
> and LE (val <= i) is converted to LT (val - 1 < i).
> So, the code is added to number_of_iterations_lt.
>
> But, this patch leads mis-compilation for unsigned "i >= val" as
> above transforms: converting LE (val <= i) to LT (val - 1 < i)
> seems not appropriate (e.g where val=0).
I don't know where the exact code is, but IIRC, number_of_iteration
handles boundary conditions when transforming <= into <.  You may
check it out.

> Thanks for pointing out this!!!
>
> I would investigate a way to handle this correctly.
> A possible way maybe just to return false for this kind of LE.
IIRC, it checks the boundary conditions, either returns false or
simply introduces more assumptions.
>
> Any suggestions?
>
> >
> >> There is function adjust_cond_for_loop_until_wrap which
> >> handles similar work for const bases.
> >> Like adjust_cond_for_loop_until_wrap, this patch enhance
> >> function number_of_iterations_cond/number_of_iterations_lt
> >> to analyze number of iterations for this kind of loop.
> >>
> >> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
> >>
> >> gcc/ChangeLog:
> >>
> >>  PR tree-optimization/101145
> >>  * tree-ssa-loop-niter.c
> >>  (number_of_iterations_until_wrap): New function.
> >>  (number_of_iterations_lt): Invoke above function.
> >>  (adjust_cond_for_loop_until_wrap):
> >>  Merge to number_of_iterations_until_wrap.
> >>  (number_of_iterations_cond): Update invokes for
> >>  adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  PR tree-optimization/101145
> >>  * gcc.dg/vect/pr101145.c: New test.
> >>  * gcc.dg/vect/pr101145.inc: New test.
> >>  * gcc.dg/vect/pr101145_1.c: New test.
> >>  * gcc.dg/vect/pr101145_2.c: New test.
> >>  * gcc.dg/vect/pr101145_3.c: New test.
> >> ---
> >>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187
> >> +
> >>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
> >>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
> >>  gcc/tree-ssa-loop-niter.c  | 150 +++-
> >>  6 files changed, 380 insertions(+), 65 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> new file mode 100644
> >> index 000..74031b031cf
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> @@ -0,0 +1,187 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#include 
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  while (n < ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
> >> unsigned)
> >> +{
> >> +  while (UINT_MAX - 64 < ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  l = UINT_MAX - 32;
> >> +  while (n < ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsign

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-01 Thread guojiufu via Gcc-patches

On 2021-07-02 08:51, Bin.Cheng wrote:

On Thu, Jul 1, 2021 at 10:15 PM guojiufu via Gcc-patches
 wrote:


On 2021-07-01 20:35, Richard Biener wrote:
> On Thu, 1 Jul 2021, Jiufu Guo wrote:
>
>> For code like:
>> unsigned foo(unsigned val, unsigned start)
>> {
>>   unsigned cnt = 0;
>>   for (unsigned i = start; i > val; ++i)
>> cnt++;
>>   return cnt;
>> }
>>
>> The number of iterations should be about UINT_MAX - start.
>
> For
>
> unsigned foo(unsigned val, unsigned start)
> {
>   unsigned cnt = 0;
>   for (unsigned i = start; i >= val; ++i)
> cnt++;
>   return cnt;
> }
>
> and val == 0 the loop never terminates.  I don't see anywhere
> in the patch that you disregard GE_EXPR and I remember
> the code handles GE as well as GT?  From a quick look this is
> also not covered by a testcase you add - not exactly sure
> how it would materialize in a miscompilation.

In number_of_iterations_cond, there is code:
if (code == GE_EXPR || code == GT_EXPR
|| (code == NE_EXPR && integer_zerop (iv0->step)))
   {
 std::swap (iv0, iv1);
 code = swap_tree_comparison (code);
   }
It converts "GT/GE" (i >= val) to "LT/LE" (val <= i),
and LE (val <= i) is converted to LT (val - 1 < i).
So, the code is added to number_of_iterations_lt.

But, this patch leads mis-compilation for unsigned "i >= val" as
above transforms: converting LE (val <= i) to LT (val - 1 < i)
seems not appropriate (e.g where val=0).

I don't know where the exact code is, but IIRC, number_of_iteration
handles boundary conditions when transforming <= into <.  You may
check it out.

Yes, in number_of_iterations_le, there is code to check MAX/MIN
if (integer_nonzerop (iv0->step))
  assumption = fold_build2 (NE_EXPR, boolean_type_node,
iv1->base, TYPE_MAX_VALUE (type));
else
  assumption = fold_build2 (NE_EXPR, boolean_type_node,
iv0->base, TYPE_MIN_VALUE (type));

Checking why this code does not help.



Thanks for pointing out this!!!

I would investigate a way to handle this correctly.
A possible way maybe just to return false for this kind of LE.

IIRC, it checks the boundary conditions, either returns false or
simply introduces more assumptions.

Thanks! Adding more assumptions would help.
The below code also runs into infinite, more assumptions may help this 
code.


__attribute__ ((noinline))
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; val <= i; i+=16)
cnt++;
  return cnt;
}

foo (4, 8);

Thanks again!


BR,
Jiufu Guo


Any suggestions?

>
>> There is function adjust_cond_for_loop_until_wrap which
>> handles similar work for const bases.
>> Like adjust_cond_for_loop_until_wrap, this patch enhance
>> function number_of_iterations_cond/number_of_iterations_lt
>> to analyze number of iterations for this kind of loop.
>>
>> Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
>>
>> gcc/ChangeLog:
>>
>>  PR tree-optimization/101145
>>  * tree-ssa-loop-niter.c
>>  (number_of_iterations_until_wrap): New function.
>>  (number_of_iterations_lt): Invoke above function.
>>  (adjust_cond_for_loop_until_wrap):
>>  Merge to number_of_iterations_until_wrap.
>>  (number_of_iterations_cond): Update invokes for
>>  adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
>>
>> gcc/testsuite/ChangeLog:
>>
>>  PR tree-optimization/101145
>>  * gcc.dg/vect/pr101145.c: New test.
>>  * gcc.dg/vect/pr101145.inc: New test.
>>  * gcc.dg/vect/pr101145_1.c: New test.
>>  * gcc.dg/vect/pr101145_2.c: New test.
>>  * gcc.dg/vect/pr101145_3.c: New test.
>> ---
>>  gcc/testsuite/gcc.dg/vect/pr101145.c   | 187
>> +
>>  gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
>>  gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
>>  gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
>>  gcc/tree-ssa-loop-niter.c  | 150 +++-
>>  6 files changed, 380 insertions(+), 65 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
>>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> new file mode 100644
>> index 000..74031b031cf
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
>> @@ -0,0 +1,187 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#include 
>> +
>> +unsigned __attribute__ ((noinline))
>> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
>> n)
>> +{
>> +  while (n < ++l)
>> +*a++ = *b++ + 1;
>> +  return l;
>> +

Re: [PATCH] Analyze niter for until-wrap condition [PR101145]

2021-07-02 Thread guojiufu via Gcc-patches

On 2021-07-01 20:35, Richard Biener wrote:

On Thu, 1 Jul 2021, Jiufu Guo wrote:


For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.


For

unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i >= val; ++i)
cnt++;
  return cnt;
}

and val == 0 the loop never terminates.  I don't see anywhere
in the patch that you disregard GE_EXPR and I remember
the code handles GE as well as GT?  From a quick look this is
also not covered by a testcase you add - not exactly sure
how it would materialize in a miscompilation.


Find a similar issue on the below code with the trunk.
The below code should run infinite, but it exits quickly.

#include 
__attribute__ ((noinline))
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i <= val; i+=16)
cnt++;
  return cnt;
}

int main()
{
  return foo (UINT_MAX-7, 8);
}

Just opened https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101291

BR,
Jiufu Guo.


There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, is this ok for trunk?

gcc/ChangeLog:

PR tree-optimization/101145
* tree-ssa-loop-niter.c
(number_of_iterations_until_wrap): New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c   | 187 
+

 gcc/testsuite/gcc.dg/vect/pr101145.inc |  63 +
 gcc/testsuite/gcc.dg/vect/pr101145_1.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c |  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c |  15 ++
 gcc/tree-ssa-loop-niter.c  | 150 +++-
 6 files changed, 380 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c

new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{  // infininate
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = 32;
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n +