https://gcc.gnu.org/g:8af2e8e49d6e5d33c01c2beaead4933bc286974c

commit r17-837-g8af2e8e49d6e5d33c01c2beaead4933bc286974c
Author: Tamar Christina <[email protected]>
Date:   Wed May 27 10:53:07 2026 +0100

    vect: Don't generate scalar epilogue if not needed [PR120352]
    
    The example loop
    
    #define N 4
    int a[N] = {0,0,0,1};
    int b[N] = {0,0,0,1};
    
    __attribute__((noipa, noinline))
    int foo ()
    {
      for (int i = 0; i < N; i++)
        {
          if (a[i] > b[i])
            return 1;
        }
      return 0;
    }
    
    compiled with -O3 -march=armv9-a generates
    
    foo:
            adrp    x2, .LANCHOR0
            add     x1, x2, :lo12:.LANCHOR0
            ptrue   p7.b, vl16
            mov     w0, 0
            ldr     q30, [x2, #:lo12:.LANCHOR0]
            ldr     q31, [x1, 16]
            cmpgt   p7.s, p7/z, z30.s, z31.s
            b.any   .L7
            ret
    .L7:
            ldr     w2, [x2, #:lo12:.LANCHOR0]
            ldr     w0, [x1, 16]
            cmp     w2, w0
            bgt     .L4
            ldr     w0, [x1, 4]
            ldr     w2, [x1, 20]
            cmp     w2, w0
            blt     .L4
            ldr     w0, [x1, 8]
            ldr     w2, [x1, 24]
            cmp     w2, w0
            blt     .L4
            ldr     w2, [x1, 12]
            ldr     w0, [x1, 28]
            cmp     w2, w0
            cset    w0, gt
            ret
    .L4:
            mov     w0, 1
            ret
    
    Which when we find an element, in order to return 1 we still go to scalar.
    Obviously the scalar code is completely unneeded.
    
    This patch teaches the vectorizer that when
    
    1. We have no live values
    2. We only have one exit (this is a restriction that will be lifted in a 
later
       patch and is there because we need masking to avoid false positives, but 
see
       testcase vect-early-break-no-epilog_11.c)
    3. The loop has no side-effects
    
    then we don't need the scalar epilogue at all.
    
    e.g. for the above we now generate
    
    foo:
            adrp    x0, .LANCHOR0
            add     x0, x0, :lo12:.LANCHOR0
            ptrue   p7.s, vl4
            ldp     q31, q30, [x0]
            cmplt   p15.s, p7/z, z30.s, z31.s
            cset    w0, any
            ret
    
    gcc/ChangeLog:
    
            PR tree-optimization/120352
            * tree-vectorizer.h (LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG): New.
            (class _loop_vec_info): Add early_break_needs_epilogue.
            * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): 
Detect
            usage of stores.
            * tree-vect-loop-manip.cc (vect_do_peeling): Use them.
            * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Likewise.
            (vect_create_loop_vinfo): Likewise.
            (vect_update_ivs_after_vectorizer_for_early_breaks): Likewise.
            * tree-vect-stmts.cc (vect_stmt_relevant_p): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            PR tree-optimization/120352
            * gcc.dg/vect/vect-early-break-no-epilog_1.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_10.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_11.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_2.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_3.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_4.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_5.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_6.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_7.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_8.c: New test.
            * gcc.dg/vect/vect-early-break-no-epilog_9.c: New test.
            * gcc.target/aarch64/noeffect.c: New test.
            * gcc.target/aarch64/noeffect10.c: New test.
            * gcc.target/aarch64/noeffect11.c: New test.
            * gcc.target/aarch64/noeffect2.c: New test.
            * gcc.target/aarch64/noeffect3.c: New test.
            * gcc.target/aarch64/noeffect4.c: New test.
            * gcc.target/aarch64/noeffect5.c: New test.
            * gcc.target/aarch64/noeffect6.c: New test.
            * gcc.target/aarch64/noeffect7.c: New test.
            * gcc.target/aarch64/noeffect8.c: New test.
            * gcc.target/aarch64/noeffect9.c: New test.
            * gcc.target/aarch64/sve/noeffect.c: New test.
            * gcc.target/aarch64/sve/noeffect10.c: New test.
            * gcc.target/aarch64/sve/noeffect11.c: New test.
            * gcc.target/aarch64/sve/noeffect2.c: New test.
            * gcc.target/aarch64/sve/noeffect3.c: New test.
            * gcc.target/aarch64/sve/noeffect4.c: New test.
            * gcc.target/aarch64/sve/noeffect5.c: New test.
            * gcc.target/aarch64/sve/noeffect6.c: New test.
            * gcc.target/aarch64/sve/noeffect7.c: New test.
            * gcc.target/aarch64/sve/noeffect8.c: New test.
            * gcc.target/aarch64/sve/noeffect9.c: New test.

Diff:
---
 .../gcc.dg/vect/vect-early-break-no-epilog_1.c     | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_10.c    | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_11.c    | 51 ++++++++++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_2.c     | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_3.c     | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_4.c     | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_5.c     | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_6.c     | 21 ++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_7.c     | 25 +++++++
 .../gcc.dg/vect/vect-early-break-no-epilog_8.c     | 18 +++++
 .../gcc.dg/vect/vect-early-break-no-epilog_9.c     | 22 ++++++
 gcc/testsuite/gcc.target/aarch64/noeffect.c        | 30 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect10.c      | 30 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect11.c      | 70 +++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect2.c       | 33 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect3.c       | 30 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect4.c       | 30 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect5.c       | 30 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect6.c       | 30 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect7.c       | 36 ++++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect8.c       | 32 +++++++++
 gcc/testsuite/gcc.target/aarch64/noeffect9.c       | 36 ++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect.c    | 27 ++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c  | 27 ++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c  | 78 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c   | 32 +++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c   | 34 ++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c   | 33 +++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c   | 33 +++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c   | 33 +++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c   | 36 ++++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c   | 32 +++++++++
 gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c   | 36 ++++++++++
 gcc/tree-vect-data-refs.cc                         |  7 ++
 gcc/tree-vect-loop-manip.cc                        | 17 +++--
 gcc/tree-vect-loop.cc                              | 17 +++--
 gcc/tree-vect-stmts.cc                             |  1 +
 gcc/tree-vectorizer.h                              |  5 ++
 38 files changed, 1090 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c
new file mode 100644
index 000000000000..cf9178375db6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_1.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } 
} */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c
new file mode 100644
index 000000000000..86b753122dd8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_10.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } 
} */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c
new file mode 100644
index 000000000000..3a6b72fa5acb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_11.c
@@ -0,0 +1,51 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do run } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_long } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 0;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 1;
+}
+
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 1;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+  static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+  if (f1 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  if (f2 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c
new file mode 100644
index 000000000000..6563ceaae534
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_2.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" } 
} */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c
new file mode 100644
index 000000000000..dfb398da9b78
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_3.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c
new file mode 100644
index 000000000000..99bb66f9fd05
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_4.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+int foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c
new file mode 100644
index 000000000000..ec3eec5cfa13
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_5.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c
new file mode 100644
index 000000000000..46d6a8953cfe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_6.c
@@ -0,0 +1,21 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return a[i];
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c
new file mode 100644
index 000000000000..6d94312f4a60
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_7.c
@@ -0,0 +1,25 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-march=armv8-a+sve" { target { aarch64*-*-* } } } 
*/
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < (N / 2); i += 2)
+    {
+      if (a[i] > b[i])
+       return 1;
+
+      if (a[i + 1] > b[i + 1])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { target { 
aarch64*-*-* } } } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" { target { aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c
new file mode 100644
index 000000000000..3236cdb66ff3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_8.c
@@ -0,0 +1,18 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < n; i++)
+    {
+      c[i] = a[i] + b[i];
+      if (i > 1000)
+       break;
+    }
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+/* { dg-final { scan-tree-dump-not "early break does not require epilog" 
"vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c
new file mode 100644
index 000000000000..c788a684d750
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break-no-epilog_9.c
@@ -0,0 +1,22 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_early_break } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i] && a[i] > c[i])
+       return 1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "early break does not require epilog" "vect" { 
xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect.c
new file mode 100644
index 000000000000..ba3329973b24
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+/*
+** foo:
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\]
+**     ldr     q[0-9]+, \[x[0-9]+, 16\]
+**     cmgt    v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cmp     x[0-9]+, 0
+**     cset    w0, ne
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect10.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect10.c
new file mode 100644
index 000000000000..03f3e48a729a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect10.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+, #:lo12:\.LANCHOR0\]
+**     ldr     q[0-9]+, \[x[0-9]+, 16\]
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cmp     x[0-9]+, 0
+**     cset    w0, ne
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect11.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect11.c
new file mode 100644
index 000000000000..82c2f00c7cb1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect11.c
@@ -0,0 +1,70 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**     ...
+**     cmhi    v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d
+**     ...
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 0;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 1;
+}
+
+/*
+** f2:
+**     ...
+**     cmhi    v[0-9]+\.2d, v[0-9]+\.2d, v[0-9]+\.2d
+**     ...
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 1;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+int main (void)
+{
+  static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+  static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+  if (f1 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  if (f2 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect2.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect2.c
new file mode 100644
index 000000000000..08c531fb18c8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect2.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     q[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     add     x[0-9]+, x[0-9]+, 16
+**     cmgt    v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     fmov    x[0-9]+, d[0-9]+
+**     cbz     x[0-9]+, \.L[0-9]+
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect3.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect3.c
new file mode 100644
index 000000000000..886ad7bda2f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect3.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, w[0-9]+, sxtw 2\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect4.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect4.c
new file mode 100644
index 000000000000..276843c9bbd8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect4.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect5.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect5.c
new file mode 100644
index 000000000000..c15e52ebdfc4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect5.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect6.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect6.c
new file mode 100644
index 000000000000..9da4f496a02e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect6.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     cmgt    v[0-9]+\.8h, v[0-9]+\.8h, v[0-9]+\.8h
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     ldrsh   w[0-9]+, \[[^\n]+\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return a[i];
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect7.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect7.c
new file mode 100644
index 000000000000..16fc921117aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect7.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     add     x[0-9]+, x[0-9]+, 4000
+**     add     x[0-9]+, x[0-9]+, 2000
+**     b       \.L[0-9]+
+**     ldr     w[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < (N / 2); i += 2)
+    {
+      if (a[i] > b[i])
+       return 1;
+
+      if (a[i + 1] > b[i + 1])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect8.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect8.c
new file mode 100644
index 000000000000..ada79391a8ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect8.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** add:
+**     ...
+**     cmeq    v[0-9]+\.4s, v[0-9]+\.4s, #0
+**     umaxp   v[0-9]+\.4s, v[0-9]+\.4s, v[0-9]+\.4s
+**     ...
+**     ldr     q[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     str     q[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     add     w[0-9]+, w[0-9]+, w[0-9]+
+**     str     w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     ret
+**     ...
+*/
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < n; i++)
+    {
+      c[i] = a[i] + b[i];
+      if (i > 1000)
+       break;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/noeffect9.c 
b/gcc/testsuite/gcc.target/aarch64/noeffect9.c
new file mode 100644
index 000000000000..0ce0380e182f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/noeffect9.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+/*
+** foo:
+**     ...
+**     add     x[0-9]+, x[0-9]+, 4000
+**     add     x[0-9]+, x[0-9]+, 3648
+**     mov     x0, 0
+**     b       \.L[0-9]+
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i] && a[i] > c[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c
new file mode 100644
index 000000000000..f7109b1483cb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 4
+int a[N] = {0, 0, 0, 1};
+int b[N] = {0, 0, 0, 1};
+
+/*
+** foo:
+**     ...
+**     ldp     q[0-9]+, q[0-9]+, \[x[0-9]+\]
+**     cmplt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     ptest   p[0-9]+, p[0-9]+\.b
+**     cset    w0, any
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c
new file mode 100644
index 000000000000..39ab9489b8e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect10.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 8
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ldp     q[0-9]+, q[0-9]+, \[x[0-9]+\]
+**     cmplt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ptest   p[0-9]+, p[0-9]+\.b
+**     cset    w0, any
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c
new file mode 100644
index 000000000000..5c8c3565da62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect11.c
@@ -0,0 +1,78 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** f1:
+**     ...
+**     whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+
+**     ...
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     cmplo   p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ...
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa))
+int f1 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 0;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 1;
+}
+
+/*
+** f2:
+**     ...
+**     whilelo p[0-9]+\.d, x[0-9]+, x[0-9]+
+**     ...
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ld1d    z[0-9]+\.d, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     cmplo   p[0-9]+\.d, p[0-9]+/z, z[0-9]+\.d, z[0-9]+\.d
+**     ...
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ldr     x[0-9]+, \[x[0-9]+, x[0-9]+, lsl 3\]
+**     ...
+**     cmp     x[0-9]+, x[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa))
+int f2 (const unsigned long *restrict a, const unsigned long *b, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      if (a[i] < b[i])
+       return 1;
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
+
+int main (void)
+{
+  static unsigned long a[3] __attribute__ ((aligned (16))) = {10, 1, 0};
+  static unsigned long b[3] __attribute__ ((aligned (16))) = {9, 2, 0};
+
+  if (f1 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  if (f2 (a, b, 3) != 1)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c
new file mode 100644
index 000000000000..5bb1badde3c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect2.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmpgt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     b\.none \.L[0-9]+
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c
new file mode 100644
index 000000000000..c5f81bb0d303
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect3.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     whilelo p[0-9]+\.s, w[0-9]+, w[0-9]+
+**     ...
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmpgt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c
new file mode 100644
index 000000000000..735b54523b4d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect4.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld1h    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     ld1h    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     cmpgt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ...
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c
new file mode 100644
index 000000000000..bfaec5805785
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect5.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     cmpgt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ...
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return i;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c
new file mode 100644
index 000000000000..14438830453a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect6.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+short a[N] = {0};
+short b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     ld1h    z[0-9]+\.h, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 1\]
+**     cmpgt   p[0-9]+\.h, p[0-9]+/z, z[0-9]+\.h, z[0-9]+\.h
+**     ...
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ldrsh   w[0-9]+, \[x[0-9]+, x[0-9]+\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+short foo (void)
+{
+  for (unsigned short i = 0; i < N; i++)
+    {
+      if (a[i] > b[i])
+       return a[i];
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c
new file mode 100644
index 000000000000..9dab90e72b25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect7.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+
+/*
+** foo:
+**     ...
+**     ld2w    \{z[0-9]+\.s - z[0-9]+\.s\}, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 
2\]
+**     ...
+**     cmpgt   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, z[0-9]+\.s
+**     ptest   p[0-9]+, p[0-9]+\.b
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     ret
+**     ...
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < (N / 2); i += 2)
+    {
+      if (a[i] > b[i])
+       return 1;
+
+      if (a[i + 1] > b[i + 1])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c
new file mode 100644
index 000000000000..79979882b371
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect8.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+/*
+** add:
+**     ...
+**     cmpeq   p[0-9]+\.s, p[0-9]+/z, z[0-9]+\.s, #1
+**     ...
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ld1w    z[0-9]+\.s, p[0-9]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     add     z[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s
+**     st1w    z[0-9]+\.s, p[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ldr     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     add     w[0-9]+, w[0-9]+, w[0-9]+
+**     str     w[0-9]+, \[x[0-9]+, x[0-9]+, lsl 2\]
+**     ...
+**     ret
+**     ...
+*/
+void
+add (int n, int *__restrict a, int *__restrict b, int *__restrict c)
+{
+  for (int i = 0; i < n; i++)
+    {
+      c[i] = a[i] + b[i];
+      if (i > 1000)
+       break;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c 
b/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c
new file mode 100644
index 000000000000..bfc5ed7d4e53
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/noeffect9.c
@@ -0,0 +1,36 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8-a+sve 
--param=aarch64-autovec-preference=sve-only -msve-vector-bits=scalable" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#define N 1000
+int a[N] = {0};
+int b[N] = {0};
+int c[N] = {0};
+
+/*
+** foo:
+**     ...
+**     add     x[0-9]+, x[0-9]+, 4000
+**     add     x[0-9]+, x[0-9]+, 3648
+**     mov     x0, 0
+**     b       \.L[0-9]+
+**     ...
+**     ldr     w[0-9]+, \[x[0-9]+[^\n]*\]
+**     ...
+**     cmp     w[0-9]+, w[0-9]+
+**     ...
+**     mov     w0, 1
+**     ret
+**     mov     w0, 0
+**     ret
+*/
+__attribute__ ((noipa, noinline))
+int foo (void)
+{
+  for (int i = 0; i < N; i++)
+    {
+      if (a[i] > b[i] && a[i] > c[i])
+       return 1;
+    }
+  return 0;
+}
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index da65f1d652cf..03ac4c141d08 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -889,6 +889,13 @@ vect_analyze_early_break_dependences (loop_vec_info 
loop_vinfo)
                             dest_bb->index);
 
   LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb;
+  /* Check if loop has a side-effect (stores), force scalar epilogue.  */
+  for (auto dr : LOOP_VINFO_DATAREFS (loop_vinfo))
+    if (DR_IS_WRITE (dr))
+      {
+       LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
+       break;
+      }
 
   if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())
     {
diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 3aae0dea25b0..9653ad43e0d3 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3306,12 +3306,17 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
 
   /* For early breaks the scalar loop needs to execute at most VF times
      to find the element that caused the break.  */
-  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+  if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+      && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
     bound_epilog = vf;
 
   bool epilog_peeling = maybe_ne (bound_epilog, 0U);
   poly_uint64 bound_scalar = bound_epilog;
 
+  if (!LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) && dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "early break does not require epilog.\n");
+
   if (!prolog_peeling && !epilog_peeling)
     return NULL;
 
@@ -3501,11 +3506,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
       /* Peel prolog and put it on preheader edge of loop.  */
       edge scalar_e = LOOP_VINFO_SCALAR_MAIN_EXIT (loop_vinfo);
       edge prolog_e = NULL;
+      bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
       prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e,
                                                       scalar_loop, scalar_e,
                                                       e, &prolog_e, true, NULL,
                                                       uncounted_p, uncounted_p,
-                                                      true);
+                                                      early_break_peel_p);
 
       gcc_assert (prolog);
       prolog->force_vectorize = false;
@@ -3617,11 +3623,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
       edge epilog_e = vect_epilogues ? e : scalar_e;
       edge new_epilog_e = NULL;
       auto_vec<basic_block> doms;
+      bool early_break_peel_p = LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo);
       epilog
        = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
                                                  &new_epilog_e, true, &doms,
                                                  uncounted_p, false,
-                                                 true);
+                                                 early_break_peel_p);
 
       LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e;
       gcc_assert (epilog);
@@ -3671,6 +3678,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
          /* Handle any remaining dominator updates needed after
             inserting the loop skip edge above.  */
          if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+             && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
              && prolog_peeling)
            {
              /* Adding a skip edge to skip a loop with multiple exits
@@ -3818,7 +3826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
tree nitersm1,
            = make_ssa_name (LOOP_VINFO_EARLY_BRK_IV_TYPE (loop_vinfo));
 
          if (!(LOOP_VINFO_NITERS_UNCOUNTED_P (loop_vinfo)
-               && get_loop_exit_edges (loop).length () == 1))
+               && get_loop_exit_edges (loop).length () == 1)
+             && LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo))
          {
            basic_block exit_bb = NULL;
            edge update_e = NULL;
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ac7e08cf205c..dded8b9aabff 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -775,7 +775,9 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     drs_advanced_by (NULL_TREE),
     vec_loop_main_exit (NULL),
     vec_epilogue_loop_main_exit (NULL),
-    scalar_loop_main_exit (NULL)
+    scalar_loop_main_exit (NULL),
+    early_break_needs_epilogue (false),
+    early_break_niters_var (NULL)
 {
   /* CHECKME: We want to visit all BBs before their successors (except for
      latch blocks, for which this assertion wouldn't hold).  In the simple
@@ -1705,6 +1707,13 @@ vect_create_loop_vinfo (class loop *loop, 
vec_info_shared *shared,
   LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
     = !LOOP_VINFO_LOOP_CONDS (loop_vinfo).is_empty ();
 
+  /* At the moment we can't support no epilogs for multiple exits, result of
+     the first compare should be masked by that of the second.  We can only
+     allow it if the early exits have the same live values.  for differing
+     values we have to calculate a third mask to disambiguate. */
+  LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo)
+    = LOOP_VINFO_LOOP_CONDS (loop_vinfo).length () > 1;
+
   if (info->inner_loop_cond)
     {
       /* If we have an estimate on the number of iterations of the inner
@@ -11058,11 +11067,11 @@ vect_update_ivs_after_vectorizer_for_early_breaks 
(loop_vec_info loop_vinfo)
 {
   DUMP_VECT_SCOPE ("vect_update_ivs_after_vectorizer_for_early_breaks");
 
-  if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
+  if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+      /* If no peeling was done then we have no IV to update.  */
+      || !LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo))
     return;
 
-  gcc_assert (LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo));
-
   tree phi_var = LOOP_VINFO_EARLY_BRK_NITERS_VAR (loop_vinfo);
   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 09ee794300be..4c9d871a31b8 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -413,6 +413,7 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, 
loop_vec_info loop_vinfo,
              gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
 
               *live_p = true;
+             LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG (loop_vinfo) = true;
            }
        }
     }
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6d7393809013..b8a287825f43 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1241,6 +1241,10 @@ public:
      For counted loops, this IV controls the natural exits of the loop.  */
   edge scalar_loop_main_exit;
 
+  /* Indicate if the multiple exit loop has any side-effects that require it to
+     have a scalar epilogue.  */
+  bool early_break_needs_epilogue;
+
   /* Used to store the list of stores needing to be moved if doing early
      break vectorization as they would violate the scalar loop semantics if
      vectorized in their current location.  These are stored in order that they
@@ -1325,6 +1329,7 @@ public:
 #define LOOP_VINFO_PEELING_FOR_GAPS(L)     (L)->peeling_for_gaps
 #define LOOP_VINFO_PEELING_FOR_NITER(L)    (L)->peeling_for_niter
 #define LOOP_VINFO_EARLY_BREAKS(L)         (L)->early_breaks
+#define LOOP_VINFO_EARLY_BRK_NEEDS_EPILOG(L) (L)->early_break_needs_epilogue
 #define LOOP_VINFO_EARLY_BRK_STORES(L)     (L)->early_break_stores
 #define LOOP_VINFO_EARLY_BREAKS_VECT_PEELED(L)  \
   ((single_pred ((L)->loop->latch) != (L)->vec_loop_main_exit->src) \

Reply via email to