[PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-07-07 Thread Jiufu Guo via Gcc-patches
Changes since v1:
* Update assumptions for niter, add more test cases check
* Use widest_int/wide_int instead mpz to do +-/
* Move some early check for quick return

For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?

gcc/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c  | 187 ++
 gcc/testsuite/gcc.dg/vect/pr101145.inc|  63 
 gcc/testsuite/gcc.dg/vect/pr101145_1.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
 gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
 gcc/tree-ssa-loop-niter.c | 157 ++
 9 files changed, 463 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{  // infininate 
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  l = 32;
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n + 32;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n - 1;
+  res = foo (a, b, l, n);
+  if (res != l + 1)
+fail++;
+ 

Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-08-03 Thread guojiufu via Gcc-patches

Hi,

I would like to have a ping on this.

https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574596.html

BR,
Jiufu

On 2021-07-15 08:17, guojiufu via Gcc-patches wrote:

Hi,

I would like to have an early ping on this with more mail addresses.

BR,
Jiufu.

On 2021-07-07 20:47, Jiufu Guo wrote:

Changes since v1:
* Update assumptions for niter, add more test cases check
* Use widest_int/wide_int instead mpz to do +-/
* Move some early check for quick return

For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?

gcc/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c  | 187 
++

 gcc/testsuite/gcc.dg/vect/pr101145.inc|  63 
 gcc/testsuite/gcc.dg/vect/pr101145_1.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
 gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
 gcc/tree-ssa-loop-niter.c | 157 ++
 9 files changed, 463 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{  // infininate
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = 32;
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+in

Re: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-07-14 Thread guojiufu via Gcc-patches

Hi,

I would like to have an early ping on this with more mail addresses.

BR,
Jiufu.

On 2021-07-07 20:47, Jiufu Guo wrote:

Changes since v1:
* Update assumptions for niter, add more test cases check
* Use widest_int/wide_int instead mpz to do +-/
* Move some early check for quick return

For code like:
unsigned foo(unsigned val, unsigned start)
{
  unsigned cnt = 0;
  for (unsigned i = start; i > val; ++i)
cnt++;
  return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?

gcc/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.

gcc/testsuite/ChangeLog:

2021-07-07  Jiufu Guo  

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
 gcc/testsuite/gcc.dg/vect/pr101145.c  | 187 ++
 gcc/testsuite/gcc.dg/vect/pr101145.inc|  63 
 gcc/testsuite/gcc.dg/vect/pr101145_1.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_2.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145_3.c|  15 ++
 gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
 gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
 gcc/tree-ssa-loop-niter.c | 157 ++
 9 files changed, 463 insertions(+), 65 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{  // infininate
+  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned n)
+{
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned)
+{
+  while (--l < 64)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned 
n)

+{
+  l = 32;
+  while (--l < n)
+*a++ = *b++ + 1;
+  return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+  unsigned l, n;
+  unsigned res;
+  /* l > n*/
+  n = UINT_MAX - 64;
+  l = n + 32;
+  res = foo (a, b, l, n);
+  if (res != 0)
+fail++;
+
+  l = n;
+

Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-08-15 Thread Bin.Cheng via Gcc-patches
On Wed, Aug 4, 2021 at 10:42 AM guojiufu  wrote:
>
> Hi,
>
> I would like to have a ping on this.
>
> https://gcc.gnu.org/pipermail/gcc-patches/2021-July/574596.html
Sorry for being late in replying.

>
> BR,
> Jiufu
>
> On 2021-07-15 08:17, guojiufu via Gcc-patches wrote:
> > Hi,
> >
> > I would like to have an early ping on this with more mail addresses.
> >
> > BR,
> > Jiufu.
> >
> > On 2021-07-07 20:47, Jiufu Guo wrote:
> >> Changes since v1:
> >> * Update assumptions for niter, add more test cases check
> >> * Use widest_int/wide_int instead mpz to do +-/
> >> * Move some early check for quick return
> >>
> >> For code like:
> >> unsigned foo(unsigned val, unsigned start)
> >> {
> >>   unsigned cnt = 0;
> >>   for (unsigned i = start; i > val; ++i)
> >> cnt++;
> >>   return cnt;
> >> }
> >>
> >> The number of iterations should be about UINT_MAX - start.
> >>
> >> There is function adjust_cond_for_loop_until_wrap which
> >> handles similar work for const bases.
> >> Like adjust_cond_for_loop_until_wrap, this patch enhance
> >> function number_of_iterations_cond/number_of_iterations_lt
> >> to analyze number of iterations for this kind of loop.
> >>
> >> Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
> >> Is this ok for trunk?
> >>
> >> gcc/ChangeLog:
> >>
> >> 2021-07-07  Jiufu Guo  
> >>
> >>  PR tree-optimization/101145
> >>  * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
> >>  New function.
> >>  (number_of_iterations_lt): Invoke above function.
> >>  (adjust_cond_for_loop_until_wrap):
> >>  Merge to number_of_iterations_until_wrap.
> >>  (number_of_iterations_cond): Update invokes for
> >>  adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> 2021-07-07  Jiufu Guo  
> >>
> >>  PR tree-optimization/101145
> >>  * gcc.dg/vect/pr101145.c: New test.
> >>  * gcc.dg/vect/pr101145.inc: New test.
> >>  * gcc.dg/vect/pr101145_1.c: New test.
> >>  * gcc.dg/vect/pr101145_2.c: New test.
> >>  * gcc.dg/vect/pr101145_3.c: New test.
> >>  * gcc.dg/vect/pr101145inf.c: New test.
> >>  * gcc.dg/vect/pr101145inf.inc: New test.
> >>  * gcc.dg/vect/pr101145inf_1.c: New test.
> >> ---
> >>  gcc/testsuite/gcc.dg/vect/pr101145.c  | 187
> >> ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145.inc|  63 
> >>  gcc/testsuite/gcc.dg/vect/pr101145_1.c|  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_2.c|  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145_3.c|  15 ++
> >>  gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
> >>  gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
> >>  gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
> >>  gcc/tree-ssa-loop-niter.c | 157 ++
> >>  9 files changed, 463 insertions(+), 65 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
> >>  create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c
> >>
> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> new file mode 100644
> >> index 000..74031b031cf
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
> >> @@ -0,0 +1,187 @@
> >> +/* { dg-require-effective-target vect_int } */
> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> +#include 
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  while (n < ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
> >> unsigned)
> >> +{
> >> +  while (UINT_MAX - 64 < ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  l = UINT_MAX - 32;
> >> +  while (n < ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{
> >> +  while (n <= ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
> >> n)
> >> +{  // infininate
> >> +  while (0 <= ++l)
> >> +*a++ = *b++ + 1;
> >> +  return l;
> >> +}
> >> +
> >> +unsigned __attribute__ ((noinline))
> >> +foo_5 (int *__restr

Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-08-16 Thread Jiufu Guo via Gcc-patches

"Bin.Cheng"  writes:

On Wed, Aug 4, 2021 at 10:42 AM guojiufu 
 wrote:


Hi,


cut...

>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, 
>> TYPE n)

>> +{
>> +  for (l = L_BASE; n < l; l += C)
>> +*a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, 
>> TYPE n)

>> +{
>> +  for (l = L_BASE_DOWN; l < n; l -= C)
I noticed that both L_BASE and L_BASE_DOWN are defined as l, 
which
makes this test a bit confusing.  Could you clean the use of l, 
for

example, by using an auto var for the loop index invariable?
Otherwise the patch looks good to me.  Thanks very much for the 
work.

Thanks a lot for your help to review!
L_BASE.. are not needed.  Updated the patch which use
a new index var 'i' for loop instead param 'l':

 TYPE i;
 for (i = l; n < i; i += C)

I updated the patch as below.
Bootstrap & regress pass on powerpc64 and powerpc64le.

For code like:
unsigned foo(unsigned val, unsigned start)
{
 unsigned cnt = 0;
 for (unsigned i = start; i > val; ++i)
   cnt++;
 return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?

gcc/ChangeLog:

2021-08-16  Jiufu Guo  

PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
	adjust_cond_for_loop_until_wrap and 
	number_of_iterations_lt.


gcc/testsuite/ChangeLog:

2021-08-16  Jiufu Guo  

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr101145.c  | 187 
++

gcc/testsuite/gcc.dg/vect/pr101145.inc|  65 
gcc/testsuite/gcc.dg/vect/pr101145_1.c|  13 ++
gcc/testsuite/gcc.dg/vect/pr101145_2.c|  13 ++
gcc/testsuite/gcc.dg/vect/pr101145_3.c|  13 ++
gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
gcc/tree-ssa-loop-niter.c | 157 
++

9 files changed, 459 insertions(+), 65 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c

new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)
+{  // infininate 
+  while (0 <= ++l)

+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  //no loop
+  l = UINT_MAX;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinli

Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-08-16 Thread Jiufu Guo via Gcc-patches

Jiufu Guo  writes:


"Bin.Cheng"  writes:

On Wed, Aug 4, 2021 at 10:42 AM guojiufu 
 wrote:


Hi,


cut...

>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE 
>> l, >> TYPE n)

>> +{
>> +  for (l = L_BASE; n < l; l += C)
>> +*a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE 
>> l, >> TYPE n)

>> +{
>> +  for (l = L_BASE_DOWN; l < n; l -= C)
I noticed that both L_BASE and L_BASE_DOWN are defined as l, 
which
makes this test a bit confusing.  Could you clean the use of l, 
for

example, by using an auto var for the loop index invariable?
Otherwise the patch looks good to me.  Thanks very much for the 
work.

Thanks a lot for your help to review!
L_BASE.. are not needed.  Updated the patch which use
a new index var 'i' for loop instead param 'l':

 TYPE i;
 for (i = l; n < i; i += C)

I updated the patch as below.
Bootstrap & regress pass on powerpc64 and powerpc64le.
I mean it also pass powerpc64(BE includes 32bit). 


BR,
Jiufu


For code like:
unsigned foo(unsigned val, unsigned start)
{
 unsigned cnt = 0;
 for (unsigned i = start; i > val; ++i)
   cnt++;
 return cnt;
}

The number of iterations should be about UINT_MAX - start.

There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.

Bootstrap and regtest pass on powerpc64le, x86_64 and aarch64.
Is this ok for trunk?

gcc/ChangeLog:

2021-08-16  Jiufu Guo  

PR tree-optimization/101145
* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
	adjust_cond_for_loop_until_wrap and 
number_of_iterations_lt.


gcc/testsuite/ChangeLog:

2021-08-16  Jiufu Guo  

PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
* gcc.dg/vect/pr101145inf.c: New test.
* gcc.dg/vect/pr101145inf.inc: New test.
* gcc.dg/vect/pr101145inf_1.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr101145.c  | 187 
++

gcc/testsuite/gcc.dg/vect/pr101145.inc|  65 
gcc/testsuite/gcc.dg/vect/pr101145_1.c|  13 ++
gcc/testsuite/gcc.dg/vect/pr101145_2.c|  13 ++
gcc/testsuite/gcc.dg/vect/pr101145_3.c|  13 ++
gcc/testsuite/gcc.dg/vect/pr101145inf.c   |  25 +++
gcc/testsuite/gcc.dg/vect/pr101145inf.inc |  28 
gcc/testsuite/gcc.dg/vect/pr101145inf_1.c |  23 +++
gcc/tree-ssa-loop-niter.c | 157 
++

9 files changed, 459 insertions(+), 65 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145inf_1.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c 
b/gcc/testsuite/gcc.dg/vect/pr101145.c

new file mode 100644
index 000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include 
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned)

+{
+  while (UINT_MAX - 64 < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  l = UINT_MAX - 32;
+  while (n < ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  while (n <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{  // infininate +  while (0 <= ++l)
+*a++ = *b++ + 1;
+  return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, 
unsigned n)

+{
+  //no loop
+  l = UINT_MAX;
+ 

Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-08-24 Thread guojiufu via Gcc-patches

On 2021-08-16 09:33, Bin.Cheng wrote:
On Wed, Aug 4, 2021 at 10:42 AM guojiufu  
wrote:



...

>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> new file mode 100644
>> index 000..6eed3fa8aca
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
>> @@ -0,0 +1,63 @@
>> +TYPE __attribute__ ((noinline))
>> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> +  for (l = L_BASE; n < l; l += C)
>> +*a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +TYPE __attribute__ ((noinline))
>> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
>> +{
>> +  for (l = L_BASE_DOWN; l < n; l -= C)

I noticed that both L_BASE and L_BASE_DOWN are defined as l, which
makes this test a bit confusing.  Could you clean the use of l, for
example, by using an auto var for the loop index invariable?
Otherwise the patch looks good to me.  Thanks very much for the work.


Hi,

Sorry for bothering you here.
I feel this would be an approval (with the comment) already :)

With the change code to make it a little clear as:
  TYPE i;
  for (i = l; n < i; i += C)

it may be ok to commit the patch to the trunk, right?

BR,
Jiufu



Thanks,
bin

>> +*a++ = *b++ + 1;
>> +  return l;
>> +}
>> +
>> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
>> +
>> +int a[1000], b[1000];
>> +int fail;
>> +
>> +int

...

>> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> new file mode 100644
>> index 000..94f6b99b893
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-require-effective-target vect_int } */
>> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
>> +#define TYPE signed char
>> +#define MIN -128
>> +#define MAX 127
>> +#define N_BASE (MAX - 32)
>> +#define N_BASE_DOWN (MIN + 32)
>> +
>> +#define C 3
>> +#define L_BASE l
>> +#define L_BASE_DOWN l
>> +


Re: Ping: [PATCH v2] Analyze niter for until-wrap condition [PR101145]

2021-08-24 Thread Bin.Cheng via Gcc-patches
On Wed, Aug 25, 2021 at 11:26 AM guojiufu  wrote:
>
> On 2021-08-16 09:33, Bin.Cheng wrote:
> > On Wed, Aug 4, 2021 at 10:42 AM guojiufu 
> > wrote:
> >>
> ...
> >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> >> b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> >> new file mode 100644
> >> >> index 000..6eed3fa8aca
> >> >> --- /dev/null
> >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
> >> >> @@ -0,0 +1,63 @@
> >> >> +TYPE __attribute__ ((noinline))
> >> >> +foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> >> >> +{
> >> >> +  for (l = L_BASE; n < l; l += C)
> >> >> +*a++ = *b++ + 1;
> >> >> +  return l;
> >> >> +}
> >> >> +
> >> >> +TYPE __attribute__ ((noinline))
> >> >> +bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
> >> >> +{
> >> >> +  for (l = L_BASE_DOWN; l < n; l -= C)
> > I noticed that both L_BASE and L_BASE_DOWN are defined as l, which
> > makes this test a bit confusing.  Could you clean the use of l, for
> > example, by using an auto var for the loop index invariable?
> > Otherwise the patch looks good to me.  Thanks very much for the work.
>
> Hi,
>
> Sorry for bothering you here.
> I feel this would be an approval (with the comment) already :)
>
> With the change code to make it a little clear as:
>TYPE i;
>for (i = l; n < i; i += C)
>
> it may be ok to commit the patch to the trunk, right?
Yes please.  Thanks again for working on this.

Thanks,
bin
>
> BR,
> Jiufu
>
> >
> > Thanks,
> > bin
> >> >> +*a++ = *b++ + 1;
> >> >> +  return l;
> >> >> +}
> >> >> +
> >> >> +int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
> >> >> +
> >> >> +int a[1000], b[1000];
> >> >> +int fail;
> >> >> +
> >> >> +int
> ...
> >> >> diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> >> b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> >> new file mode 100644
> >> >> index 000..94f6b99b893
> >> >> --- /dev/null
> >> >> +++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
> >> >> @@ -0,0 +1,15 @@
> >> >> +/* { dg-require-effective-target vect_int } */
> >> >> +/* { dg-options "-O3 -fdump-tree-vect-details" } */
> >> >> +#define TYPE signed char
> >> >> +#define MIN -128
> >> >> +#define MAX 127
> >> >> +#define N_BASE (MAX - 32)
> >> >> +#define N_BASE_DOWN (MIN + 32)
> >> >> +
> >> >> +#define C 3
> >> >> +#define L_BASE l
> >> >> +#define L_BASE_DOWN l
> >> >> +