The r16-4558-g1b387bd8978577 change added a check that does not
match what the commit message says.  The following fixes this,
resolving the testcases.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.  I'm
trying to bootstrap and test on aarch64 as well.

        PR tree-optimization/122474
        * tree-vect-loop.cc (vectorizable_reduction): Adjust condition
        guarding the check for .VEC_SHL_INSERT.

        * gcc.target/aarch64/sve2/pr123053.c: New testcase.
        * gcc.target/riscv/rvv/pr122474.c: Likewise.
---
 .../gcc.target/aarch64/sve2/pr123053.c        | 25 +++++++++++++++++++
 gcc/testsuite/gcc.target/riscv/rvv/pr122474.c | 15 +++++++++++
 gcc/tree-vect-loop.cc                         | 12 ++++++---
 3 files changed, 48 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/pr122474.c

diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c 
b/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
new file mode 100644
index 00000000000..9f2fc438a62
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr123053.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -march=armv9-a" } */
+
+#include <stdint.h>
+
+struct
+{
+    int32_t f3;
+    int64_t f4;
+} g_137, g_138;
+struct
+{
+    int8_t f0;
+} g_174;
+int32_t g_67;
+extern uint32_t g_179[];
+uint16_t func_71()
+{
+    for (; g_174.f0; g_174.f0 -= 1)
+    {
+        g_137.f3 = 0;
+        for (; g_137.f3 <= 4; g_137.f3 += 1)
+            for (g_67 = 3; g_67; g_67 -= 1) g_179[g_67] || (g_138.f4 = 0);
+    }
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c 
b/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
new file mode 100644
index 00000000000..314b0f93d71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/pr122474.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -O3 -fwrapv -w" } */
+
+_Bool a;
+void b(unsigned c[][8][8][8][8][8]) {
+  for (int d = 1; d; d += 3)
+    for (int e = 0; e < 11; e += 2)
+      for (short f = 0; f < 1; f = 30482)
+        for (short g = 0; g < 014; g++)
+          a = ({
+            int h = a;
+            int i = c[2][f][d][d][d][d] ^ c[g][g][1][2][g][g];
+            i ? h : i;
+          });
+}
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index aa59cd1a39d..d3bb788d866 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7653,12 +7653,16 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
   /* For double reductions, and for SLP reductions with a neutral value,
      we construct a variable-length initial vector by loading a vector
      full of the neutral value and then shift-and-inserting the start
-     values into the low-numbered elements.  */
+     values into the low-numbered elements.  This is however not needed
+     if the target can do the reduction using the new optabs, and the initial
+     reduction value matches the neutral value and we have one SLP lane
+     while not having a reduction chain.  */
   if ((double_reduc || neutral_op)
       && !nunits_out.is_constant ()
-      && (SLP_TREE_LANES (slp_node) != 1 && !reduc_chain)
-      && (!neutral_op
-         || !operand_equal_p (neutral_op,
+      && !(SLP_TREE_LANES (slp_node) == 1
+          && !reduc_chain
+          && neutral_op
+          && operand_equal_p (neutral_op,
                               vect_phi_initial_value (reduc_def_phi)))
       && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
                                          vectype_out, OPTIMIZE_FOR_SPEED))
-- 
2.51.0

Reply via email to