There is function adjust_cond_for_loop_until_wrap which
handles similar work for const bases.
Like adjust_cond_for_loop_until_wrap, this patch enhance
function number_of_iterations_cond/number_of_iterations_lt
to analyze number of iterations for this kind of loop.
Bootstrap and regtest pass on powerpc64le, is this ok for trunk?
gcc/ChangeLog:
PR tree-optimization/101145
* tree-ssa-loop-niter.c
(number_of_iterations_until_wrap): New function.
(number_of_iterations_lt): Invoke above function.
(adjust_cond_for_loop_until_wrap):
Merge to number_of_iterations_until_wrap.
(number_of_iterations_cond): Update invokes for
adjust_cond_for_loop_until_wrap and number_of_iterations_lt.
gcc/testsuite/ChangeLog:
PR tree-optimization/101145
* gcc.dg/vect/pr101145.c: New test.
* gcc.dg/vect/pr101145.inc: New test.
* gcc.dg/vect/pr101145_1.c: New test.
* gcc.dg/vect/pr101145_2.c: New test.
* gcc.dg/vect/pr101145_3.c: New test.
---
gcc/testsuite/gcc.dg/vect/pr101145.c | 187
+++++++++++++++++++++++++
gcc/testsuite/gcc.dg/vect/pr101145.inc | 63 +++++++++
gcc/testsuite/gcc.dg/vect/pr101145_1.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145_2.c | 15 ++
gcc/testsuite/gcc.dg/vect/pr101145_3.c | 15 ++
gcc/tree-ssa-loop-niter.c | 150 +++++++++++---------
6 files changed, 380 insertions(+), 65 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145.inc
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_1.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_2.c
create mode 100644 gcc/testsuite/gcc.dg/vect/pr101145_3.c
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.c
b/gcc/testsuite/gcc.dg/vect/pr101145.c
new file mode 100644
index 00000000000..74031b031cf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.c
@@ -0,0 +1,187 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#include <limits.h>
+
+unsigned __attribute__ ((noinline))
+foo (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
unsigned)
+{
+ while (UINT_MAX - 64 < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ l = UINT_MAX - 32;
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_3 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ while (n <= ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_4 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{ // infininate
+ while (0 <= ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+foo_5 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ //no loop
+ l = UINT_MAX;
+ while (n < ++l)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ while (--l < n)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_1 (int *__restrict__ a, int *__restrict__ b, unsigned l,
unsigned)
+{
+ while (--l < 64)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+unsigned __attribute__ ((noinline))
+bar_2 (int *__restrict__ a, int *__restrict__ b, unsigned l, unsigned
n)
+{
+ l = 32;
+ while (--l < n)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+
+int a[3200], b[3200];
+int fail;
+
+int
+main ()
+{
+ unsigned l, n;
+ unsigned res;
+ /* l > n*/
+ n = UINT_MAX - 64;
+ l = n + 32;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 1;
+ res = foo (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ l = n - 32;
+ res = foo (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ l = UINT_MAX;
+ res = foo (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n + 32;
+ res = foo_1 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n + 32;
+ res = foo_2 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = foo_3 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 1;
+ res = foo_3 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ l = n - 2;
+ res = foo_3 (a, b, l, n);
+ if (res != l + 1)
+ fail++;
+
+ res = foo_5 (a, b, l, n);
+ if (res != 0)
+ fail++;
+
+ n = 64;
+ l = n - 32;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ l = n;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ l = n + 1;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != l)
+ fail++;
+
+ l = 0;
+ res = bar (a, b, l, n);
+ res++;
+ if (res != l)
+ fail++;
+
+ l = 32;
+ res = bar_1 (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ res = bar_1 (a, b, l, n);
+ res++;
+ if (res != 0)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 7 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145.inc
b/gcc/testsuite/gcc.dg/vect/pr101145.inc
new file mode 100644
index 00000000000..6eed3fa8aca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145.inc
@@ -0,0 +1,63 @@
+TYPE __attribute__ ((noinline))
+foo_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+ for (l = L_BASE; n < l; l += C)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+TYPE __attribute__ ((noinline))
+bar_sign (int *__restrict__ a, int *__restrict__ b, TYPE l, TYPE n)
+{
+ for (l = L_BASE_DOWN; l < n; l -= C)
+ *a++ = *b++ + 1;
+ return l;
+}
+
+int __attribute__ ((noinline)) neq (int a, int b) { return a != b; }
+
+int a[1000], b[1000];
+int fail;
+
+int
+main ()
+{
+ TYPE res;
+ TYPE l;
+ TYPE n;
+ n = N_BASE;
+ l = n - C;
+ res = foo_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n;
+ res = foo_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n + C;
+ res = foo_sign (a, b, l, n);
+ if (neq ((res - MIN) / C, 0))
+ fail++;
+
+ n = N_BASE_DOWN;
+ l = n - C;
+ res = bar_sign (a, b, l, n);
+ if (neq ((MAX - res) / C, 0))
+ fail++;
+
+ l = n;
+ res = bar_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ l = n + C;
+ res = bar_sign (a, b, l, n);
+ if (res != l)
+ fail++;
+
+ if (fail)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_1.c
b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
new file mode 100644
index 00000000000..94f6b99b893
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_1.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE signed char
+#define MIN -128
+#define MAX 127
+#define N_BASE (MAX - 32)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 3
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_2.c
b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
new file mode 100644
index 00000000000..d3cfc9e01e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_2.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE unsigned char
+#define MIN 0
+#define MAX 255
+#define N_BASE (MAX - 32 + 1)
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 2
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
} */
diff --git a/gcc/testsuite/gcc.dg/vect/pr101145_3.c
b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
new file mode 100644
index 00000000000..e2cf9b1f7e6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr101145_3.c
@@ -0,0 +1,15 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -fdump-tree-vect-details" } */
+#define TYPE int *
+#define MIN ((TYPE)0)
+#define MAX ((TYPE)((long long)-1))
+#define N_BASE ((TYPE) (-32))
+#define N_BASE_DOWN (MIN + 32)
+
+#define C 1
+#define L_BASE l
+#define L_BASE_DOWN l
+
+#include "pr101145.inc"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" }
} */
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b5add827018..06db6a36ef8 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1473,6 +1473,86 @@ assert_loop_rolls_lt (tree type, affine_iv
*iv0, affine_iv *iv1,
}
}
+/* Determines number of iterations of loop whose ending condition
+ is IV0 < IV1 which likes: {base, -C} < n, or n < {base, C}.
+ The number of iterations is stored to NITER. */
+
+static bool
+number_of_iterations_until_wrap (class loop *, tree type, affine_iv
*iv0,
+ affine_iv *iv1, class tree_niter_desc *niter)
+{
+ tree niter_type = unsigned_type_for (type);
+ tree max, min;
+
+ if (POINTER_TYPE_P (type))
+ {
+ max = fold_convert (type, TYPE_MAX_VALUE (niter_type));
+ min = fold_convert (type, TYPE_MIN_VALUE (niter_type));
+ }
+ else
+ {
+ max = TYPE_MAX_VALUE (type);
+ min = TYPE_MIN_VALUE (type);
+ }