The example in the PR

#include <vector>

std::vector<bool> x, y;
int main() { return x == y; }

now vectorizes but the attributes on std::vector indicate that the vector is
aligned to the natural vector alignment.  In C this is equivalent to the
testcase

int f (int a[12], int b[12], int n)
{
    a = __builtin_assume_aligned (a, 16);
    b = __builtin_assume_aligned (b, 16);
    for (int i = 0; i < n; i++)
      {
        if (b[i] == 0)
          return 0;
        if (a[0] > b[i])
          return 1;
      }
    return 2;
}

Here the load a[0] is loop invariant, and the vectorizer hoists this out of the
loop into the pre-header.  For early break this isn't safe to do as a[0] is
conditionally valid based on the conditions in the block preceding it.  As such
we need some guarantee that the load is valid before we can hoist it or the load
needs to be unconditional (e.g. in the loop header block).

Conceptually alignment peeling can provide this guarantee since making it
through the prologue means the invariant value was loaded at least once and so
we know the address is valid.  At the moment however there's no real defined
order between how GCC inserts conditions in the pre-header, so having tried to
change the order a few times the load always ends up before the prologue.  So
for now I marked it as a missed optimization.

Since we still can hoist invariant loads if in the header, I didn't change
LOOP_VINFO_NO_DATA_DEPENDENCIES since that would be global and instead I
modified the usage site of LOOP_VINFO_NO_DATA_DEPENDENCIES.

Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.

Pushed to master.

Thanks,
Tamar

gcc/ChangeLog:

        PR tree-optimization/122868
        * tree-vect-stmts.cc (vectorizable_load): Don't hoist loop invariant
        conditional loads unless in header.

gcc/testsuite/ChangeLog:

        PR tree-optimization/122868
        * gcc.dg/vect/vect-early-break_140-pr122868_1.c: New test.
        * gcc.dg/vect/vect-early-break_140-pr122868_2.c: New test.
        * gcc.dg/vect/vect-early-break_140-pr122868_3.c: New test.
        * gcc.dg/vect/vect-early-break_140-pr122868_4.c: New test.

---
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..80264bd4f31c85d3eaca11430c7edeabcb635296
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c
@@ -0,0 +1,39 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_sizes_16B_8B } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+#ifdef __arm__
+    a = __builtin_assume_aligned (a, 8);
+    b = __builtin_assume_aligned (b, 8);
+#else
+    a = __builtin_assume_aligned (a, 16);
+    b = __builtin_assume_aligned (b, 16);
+#endif
+    for (int i = 0; i < n; i++)
+      {
+        if (b[i] == 0)
+          return 0;
+        if (a[0] > b[i])
+          return 1;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int *a = 0;
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump "not hoisting invariant load due to early 
break" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c
new file mode 100644
index 
0000000000000000000000000000000000000000..90222fcffd7c98a4187053326cd6f88bfd2bcb63
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c
@@ -0,0 +1,31 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+    for (int i = 0; i < n; i++)
+      {
+        if (b[i] == 0)
+          return 0;
+        if (a[0] > b[i])
+          return 1;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int *a = 0;
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to early 
break" 0 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c
new file mode 100644
index 
0000000000000000000000000000000000000000..670804f8ce537a1381714a44e4b1d42b66ed6b61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c
@@ -0,0 +1,39 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_sizes_16B_8B } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+#ifdef __arm__
+    a = __builtin_assume_aligned (a, 8);
+    b = __builtin_assume_aligned (b, 8);
+#else
+    a = __builtin_assume_aligned (a, 16);
+    b = __builtin_assume_aligned (b, 16);
+#endif
+    for (int i = 0; i < n; i++)
+      {
+        if (a[0] > b[i])
+          return 0;
+        if (b[i] == 0)
+          return 1;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int a[12] = {1};
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to early 
break" 0 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c 
b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c
new file mode 100644
index 
0000000000000000000000000000000000000000..de2aff287f4fa146ef8cb7e476f63a877e51fedf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c
@@ -0,0 +1,31 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+    for (int i = 0; i < n; i++)
+      {
+        if (a[0] > b[i])
+          return 0;
+        if (b[i] == 0)
+          return 0;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int a[12] = {1};
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to early 
break" 0 "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 
1d7e50afcde1096d5598b43ab8d49454eb68385b..a47bbd3345b1e291d0d3ae571cf5666b66b02706
 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9880,6 +9880,34 @@ vectorizable_load (vec_info *vinfo,
         transform time.  */
       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
                      && !nested_in_vect_loop);
+
+      /* It is unsafe to hoist a conditional load over the conditions that make
+        it valid.  When early break this means that any invariant load can't be
+        hoisted unless it's in the loop header or if we know something else has
+        verified the load is valid to do.  Alignment peeling would do this
+        since getting through the prologue means the load was done at least
+        once and so the vector main body is free to hoist it.  However today
+        GCC will hoist the load above the PFA loop.  As such that makes it
+        still invalid and so we can't allow it today.  */
+      auto stmt_bb
+       = gimple_bb (STMT_VINFO_STMT (
+                       vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[0])));
+      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+         && !DR_SCALAR_KNOWN_BOUNDS (dr_info)
+         && stmt_bb != loop->header)
+       {
+         if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+             && dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "not hoisting invariant load due to early break"
+                            "constraints\n");
+         else if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "not hoisting invariant load due to early break"
+                            "constraints\n");
+         hoist_p = false;
+       }
+
       bool uniform_p = true;
       for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
        {


-- 
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..80264bd4f31c85d3eaca11430c7edeabcb635296
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_1.c
@@ -0,0 +1,39 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_sizes_16B_8B } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+#ifdef __arm__
+    a = __builtin_assume_aligned (a, 8);
+    b = __builtin_assume_aligned (b, 8);
+#else
+    a = __builtin_assume_aligned (a, 16);
+    b = __builtin_assume_aligned (b, 16);
+#endif
+    for (int i = 0; i < n; i++)
+      {
+        if (b[i] == 0)
+          return 0;
+        if (a[0] > b[i])
+          return 1;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int *a = 0;
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump "not hoisting invariant load due to early break" "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..90222fcffd7c98a4187053326cd6f88bfd2bcb63
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_2.c
@@ -0,0 +1,31 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+    for (int i = 0; i < n; i++)
+      {
+        if (b[i] == 0)
+          return 0;
+        if (a[0] > b[i])
+          return 1;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int *a = 0;
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to early break" 0 "vect" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..670804f8ce537a1381714a44e4b1d42b66ed6b61
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_3.c
@@ -0,0 +1,39 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_sizes_16B_8B } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+#ifdef __arm__
+    a = __builtin_assume_aligned (a, 8);
+    b = __builtin_assume_aligned (b, 8);
+#else
+    a = __builtin_assume_aligned (a, 16);
+    b = __builtin_assume_aligned (b, 16);
+#endif
+    for (int i = 0; i < n; i++)
+      {
+        if (a[0] > b[i])
+          return 0;
+        if (b[i] == 0)
+          return 1;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int a[12] = {1};
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to early break" 0 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..de2aff287f4fa146ef8cb7e476f63a877e51fedf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_140-pr122868_4.c
@@ -0,0 +1,31 @@
+/* { dg-add-options vect_early_break } */
+/* { dg-require-effective-target vect_early_break_hw } */
+/* { dg-require-effective-target vect_int } */
+
+/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
+
+#include "tree-vect.h"
+
+__attribute__ ((noipa))
+int f (int a[12], int b[12], int n)
+{
+    for (int i = 0; i < n; i++)
+      {
+        if (a[0] > b[i])
+          return 0;
+        if (b[i] == 0)
+          return 0;
+      }
+    return 2;
+}
+
+int main ()
+{
+   check_vect ();
+
+   int a[12] = {1};
+   int b[12] = {0};
+   return f (a, b, 10);
+}
+
+/* { dg-final { scan-tree-dump-times "not hoisting invariant load due to early break" 0 "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 1d7e50afcde1096d5598b43ab8d49454eb68385b..a47bbd3345b1e291d0d3ae571cf5666b66b02706 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -9880,6 +9880,34 @@ vectorizable_load (vec_info *vinfo,
 	 transform time.  */
       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
 		      && !nested_in_vect_loop);
+
+      /* It is unsafe to hoist a conditional load over the conditions that make
+	 it valid.  When early break this means that any invariant load can't be
+	 hoisted unless it's in the loop header or if we know something else has
+	 verified the load is valid to do.  Alignment peeling would do this
+	 since getting through the prologue means the load was done at least
+	 once and so the vector main body is free to hoist it.  However today
+	 GCC will hoist the load above the PFA loop.  As such that makes it
+	 still invalid and so we can't allow it today.  */
+      auto stmt_bb
+	= gimple_bb (STMT_VINFO_STMT (
+			vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[0])));
+      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
+	  && !DR_SCALAR_KNOWN_BOUNDS (dr_info)
+	  && stmt_bb != loop->header)
+	{
+	  if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+	      && dump_enabled_p ())
+	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			     "not hoisting invariant load due to early break"
+			     "constraints\n");
+	  else if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "not hoisting invariant load due to early break"
+			     "constraints\n");
+	  hoist_p = false;
+	}
+
       bool uniform_p = true;
       for (stmt_vec_info sinfo : SLP_TREE_SCALAR_STMTS (slp_node))
 	{

Reply via email to