This refines the fix for PR102226 to do the mode conversion
from V2DI to VNx2DI separately from the sign-conversion, retaining
the signedness of the saved accumulator as before the original fix.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2021-09-15  Richard Biener <rguent...@suse.de>

        PR tree-optimization/102318
        * tree-vect-loop.c (vect_transform_cycle_phi): Revert
        previous change and do the mode conversion separately from
        the sign conversion.

        * gcc.dg/vect/pr102318.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr102318.c | 21 +++++++++++++++++++++
 gcc/tree-vect-loop.c                 | 13 +++++++++++--
 2 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr102318.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr102318.c 
b/gcc/testsuite/gcc.dg/vect/pr102318.c
new file mode 100644
index 00000000000..cc58efacecd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr102318.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+void
+vec_slp_int16_t (short int *restrict a, short int *restrict b, int n)
+{
+  short int x0 = b[0];
+  short int x1 = b[1];
+  short int x2 = b[2];
+  short int x3 = b[3];
+  for (int i = 0; i < n; ++i)
+  {
+    x0 += a[i * 4];
+    x1 += a[i * 4 + 1];
+    x2 += a[i * 4 + 2];
+    x3 += a[i * 4 + 3];
+  }
+  b[0] = x0;
+  b[1] = x1;
+  b[2] = x2;
+  b[3] = x3;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index c9dcc647d2c..5a5b8da2e77 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7755,11 +7755,20 @@ vect_transform_cycle_phi (loop_vec_info loop_vinfo,
                                                  (reduc_info),
                                                &stmts);
            }
-         if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
-           def = gimple_convert (&stmts, vectype_out, def);
+         /* The epilogue loop might use a different vector mode, like
+            VNx2DI vs. V2DI.  */
+         if (TYPE_MODE (vectype_out) != TYPE_MODE (TREE_TYPE (def)))
+           {
+             tree reduc_type = build_vector_type_for_mode
+               (TREE_TYPE (TREE_TYPE (def)), TYPE_MODE (vectype_out));
+             def = gimple_convert (&stmts, reduc_type, def);
+           }
          /* Adjust the input so we pick up the partially reduced value
             for the skip edge in vect_create_epilog_for_reduction.  */
          accumulator->reduc_input = def;
+         /* And the reduction could be carried out using a different sign.  */
+         if (!useless_type_conversion_p (vectype_out, TREE_TYPE (def)))
+           def = gimple_convert (&stmts, vectype_out, def);
          if (loop_vinfo->main_loop_edge)
            {
              /* While we'd like to insert on the edge this will split
-- 
2.31.1

Reply via email to