This patch fixes a segmentation fault issue that can occur in vectorized
loops with an early break. When GCC vectorizes such loops, it may insert
a versioning check to ensure that data references (DRs) with speculative
loads are aligned. The check normally requires DRs to be aligned to the
vector mode size, which prevents generated vector load instructions from
crossing page boundaries.

However, this is not sufficient when a single scalar load is vectorized
into multiple loads within the same iteration. In such cases, even if
none of the vector loads crosses page boundaries, subsequent loads after
the first one may still access memory beyond current valid page.

Consider the following loop as an example:

        while (i < MAX_COMPARE) {
          if (*(p + i) != *(q + i))
            return i;
          i++;
        }

When compiled with "-O3 -march=znver2" on x86, the vectorized loop may
include instructions like:

        vmovdqa (%rcx,%rax), %ymm0
        vmovdqa 32(%rcx,%rax), %ymm1
        vpcmpeqq (%rdx,%rax), %ymm0, %ymm0
        vpcmpeqq 32(%rdx,%rax), %ymm1, %ymm1

Note two speculative vector loads are generated for each DR (p and q).
The first vmovdqa and vpcmpeqq are safe due to the vector size (32-byte)
alignment, but the following ones (at offset 32) may not be safe because
they could read from the beginning of the next memory page, potentially
leading to segmentation faults.

To avoid the issue, this patch increases the alignment requirement for
speculative loads to DR_TARGET_ALIGNMENT. It ensures all vector loads in
the same vector iteration access memory within the same page.

This patch is bootstrapped and regression-tested on x86_64-linux-gnu,
arm-linux-gnueabihf and aarch64-linux-gnu.

        PR tree-optimization/121190

gcc/ChangeLog:

        * tree-vect-data-refs.cc (vect_enhance_data_refs_alignment):
          Increase alignment requirement for speculative loads.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/vect-early-break_52.c: Update an unsafe test.
        * gcc.dg/vect/vect-early-break_137.c-pr121190: New test.
---
 .../vect/vect-early-break_137-pr121190.c      | 60 +++++++++++++++++++
 .../gcc.dg/vect/vect-early-break_52.c         |  2 +-
 gcc/tree-vect-data-refs.cc                    | 15 ++++-
 3 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
new file mode 100644
index 00000000000..da11146c578
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_137-pr121190.c
@@ -0,0 +1,60 @@
+/* PR tree-optimization/121190 */
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+/* { dg-additional-options "-march=znver2" { target x86_64-*-* i?86-*-* } } */
+/* { dg-require-effective-target mmap } */
+/* { dg-require-effective-target vect_early_break } */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#define MAX_COMPARE 5000
+
+__attribute__((noipa))
+int diff (uint64_t *restrict p, uint64_t *restrict q)
+{
+  int i = 0;
+  while (i < MAX_COMPARE) {
+    if (*(p + i) != *(q + i))
+      return i;
+    i++;
+  }
+  return -1;
+}
+
+int main ()
+{
+  long pgsz = sysconf (_SC_PAGESIZE);
+  if (pgsz == -1) {
+    fprintf (stderr, "sysconf failed\n");
+    return 0;
+  }
+
+  /* Allocate 2 consecutive pages of memory and let p1 and p2 point to the
+     beginning of each.  */
+  void *mem = mmap (NULL, pgsz * 2, PROT_READ | PROT_WRITE,
+                   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+  if (mem == MAP_FAILED) {
+    fprintf (stderr, "mmap failed\n");
+    return 0;
+  }
+  uint64_t *p1 = (uint64_t *) mem;
+  uint64_t *p2 = (uint64_t *) mem + pgsz / sizeof (uint64_t);
+
+  /* Fill the first page with zeros, except for its last 64 bits.  */
+  memset (p1, 0, pgsz);
+  *(p2 - 1) = -1;
+
+  /* Make the 2nd page not accessable.  */
+  mprotect (p2, pgsz, PROT_NONE);
+
+  /* Calls to diff should not read the 2nd page.  */
+  for (int i = 1; i <= 20; i++) {
+    if (diff (p2 - i, p1) != i - 1)
+      __builtin_abort ();
+  }
+}
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
index 86a632f2a82..6abfcd6580e 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_52.c
@@ -18,4 +18,4 @@ int main1 (short X)
     }
 }
 
-/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { 
target { ! "x86_64-*-* i?86-*-*" } } } } */
+/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { 
target { ! "x86_64-*-* i?86-*-* arm*-*-*" } } } } */
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index c84cd29116e..829af2fd28d 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -2964,12 +2964,25 @@ vect_enhance_data_refs_alignment (loop_vec_info 
loop_vinfo)
                   break;
                 }
 
+             /* Normally, we require DRs to be aligned to the vector mode size.
+                However, this is not sufficient when a statement involves safe
+                speculative read.  In such cases, a single scalar load can be
+                vectorized into multiple vector loads in one loop iteration.
+                Even if the first vector load is safe, subsequent loads might
+                still access an invalid memory page.  We increase the alignment
+                requirement to prevent this.  */
+             poly_uint64 required_align_size;
+             if (dr_safe_speculative_read_required (stmt_info))
+               required_align_size = DR_TARGET_ALIGNMENT (dr_info);
+             else
+               required_align_size = GET_MODE_SIZE (TYPE_MODE (vectype));
+
              /* At present we don't support versioning for alignment
                 with variable VF, since there's no guarantee that the
                 VF is a power of two.  We could relax this if we added
                 a way of enforcing a power-of-two size.  */
              unsigned HOST_WIDE_INT size;
-             if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
+             if (!required_align_size.is_constant (&size))
                {
                  do_versioning = false;
                  break;
-- 
2.43.0

Reply via email to