This fixes a miscompilation issue introduced by the enablement of
combined loop peeling and versioning. A test case that reproduces the
issue is included in the patch.

When performing loop peeling, GCC usually inserts a skip-vector check.
This ensures that after peeling, there are enough remaining iterations
to enter the main vectorized loop. Previously, the check was omitted if
loop versioning for alignment was applied. It was safe before because
versioning and peeling for alignment were mutually exclusive.

However, with combined peeling and versioning enabled, this is not safe
any more. A loop may be peeled and versioned at the same time. Without
the skip-vector check, the main vectorized loop can be entered even if
its iteration count is zero. This can cause the loop running many more
iterations than needed, resulting in incorrect results.

To fix this, the patch updates the condition of omitting the skip-vector
check to when versioning is performed alone without peeling.

This patch is bootstrapped and regression-tested on x86_64-linux-gnu,
arm-linux-gnueabihf and aarch64-linux-gnu.

        PR tree-optimization/121020

gcc/ChangeLog:

        * tree-vect-loop-manip.cc (vect_do_peeling): Update the
          condition of omitting the skip-vector check.
        * tree-vectorizer.h (LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING):
          Add a helper macro.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect-early-break_138-pr121020.c: New test.
---
 .../vect/vect-early-break_138-pr121020.c      | 52 +++++++++++++++++++
 gcc/tree-vect-loop-manip.cc                   |  2 +-
 gcc/tree-vectorizer.h                         |  4 ++
 3 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
new file mode 100644
index 00000000000..86661e445a8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_138-pr121020.c
@@ -0,0 +1,52 @@
+/* PR tree-optimization/121020 */
+/* { dg-do run } */
+/* { dg-options "-O3 --vect-cost-model=unlimited" } */
+/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+__attribute__((noipa))
+bool equal (uint64_t *restrict p, uint64_t *restrict q, int length)
+{
+  for (int i = 0; i < length; i++) {
+    if (*(p + i) != *(q + i))
+      return false;
+  }
+  return true;
+}
+
+int main ()
+{
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+    fprintf (stderr, "sysconf failed\n");
+    return 0;
+  }
+
+  /* Allocate a whole page of memory.  */
+  void *mem = mmap (NULL, pgsz, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    fprintf (stderr, "mmap failed\n");
+    return 0;
+  }
+  uint64_t *p1 = (uint64_t *) mem;
+  uint64_t *p2 = (uint64_t *) mem + 32;
+
+  /* The first 16 elements pointed to by p1 and p2 are the same.  */
+  for (int i = 0; i < 32; i++) {
+    *(p1 + i) = 0;
+    *(p2 + i) = (i < 16 ? 0 : -1);
+  }
+
+  /* All calls to equal should return true.  */
+  for (int len = 0; len < 16; len++) {
+    if (!equal (p1 + 1, p2 + 1, len))
+      __builtin_abort();
+  }
+}
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 2d01a4b0ed1..7fcbc1ad2eb 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3295,7 +3295,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
                      ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
                                  bound_prolog + bound_epilog)
-                     : (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+                     : (!LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING (loop_vinfo)
                         || vect_epilogues));
 
   /* Epilog loop must be executed if the number of iterations for epilog
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 799d5fed7a9..bbfeecacf5b 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1168,6 +1168,10 @@ public:
    || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)          \
    || LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (L))
 
+#define LOOP_VINFO_USE_VERSIONING_WITHOUT_PEELING(L)   \
+  ((L)->may_misalign_stmts.length () > 0               \
+   && !LOOP_VINFO_ALLOW_MUTUAL_ALIGNMENT (L))
+
 #define LOOP_VINFO_NITERS_KNOWN_P(L)          \
   (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
 
-- 
2.43.0

Reply via email to