The SLP reduction subgroup analysis can succeed for size-1 groups,
but this leads to poor code generation.  Size-1 cases should fall
back to single-lane reduction instead.

Handle size-1 groups by returning false from the analysis function,
and add a loop exit check to avoid unnecessary processing.

        PR tree-optimization/123343

gcc/ChangeLog:

        * tree-vect-slp.cc (vect_analyze_slp_reduction_group): Return
        false for group_size <= 1 at entry.
        (vect_analyze_slp_reductions): Add loop exit check for
        group_size <= 1.

gcc/testsuite/ChangeLog:

        * gcc.dg/vect/slp-reduc-var.c: New testcase.

Signed-off-by: Zhongyao Chen <[email protected]>
---
 gcc/testsuite/gcc.dg/vect/slp-reduc-var.c | 25 +++++++++++++++++++++++
 gcc/tree-vect-slp.cc                      |  7 ++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-var.c

diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-var.c 
b/gcc/testsuite/gcc.dg/vect/slp-reduc-var.c
new file mode 100644
index 00000000000..bb322b1a428
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-var.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 --param vect-epilogues-nomask=0" } */
+#include <stdint-gcc.h>
+
+uint64_t
+x264_pixel_var_8x8 (uint8_t *pix, int i_stride)
+{
+  uint32_t sum = 0, sqr = 0;
+  for (int y = 0; y < 8; y++)
+    {
+      for (int x = 0; x < 8; x++)
+       {
+         sum += pix[x];
+         sqr += pix[x] * pix[x];
+       }
+      pix += i_stride;
+    }
+  return sum + ((uint64_t)sqr << 32);
+}
+
+/* Verify that size-1 reductions fall back to single-lane reduction chains.  */
+/* { dg-final { scan-tree-dump "Starting SLP discovery of reduction chain" 
"vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed" 
"vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of size 2 reduction group" 
"vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of size 1 reduction group" 
"vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e4e0320c678..3536a06b8c6 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4736,8 +4736,11 @@ vect_analyze_slp_reduction_group (loop_vec_info 
loop_vinfo,
                                  unsigned max_tree_size, unsigned *limit,
                                  bool *matches)
 {
-  /* Try to form a reduction group.  */
+  /* Try to form a reduction group.  Size-1 groups are not suitable
+     for SLP reduction and should fall back to single-lane reduction.  */
   unsigned int group_size = scalar_stmts.length ();
+  if (group_size <= 1)
+    return false;
   if (!matches)
     matches = XALLOCAVEC (bool, group_size);
   poly_uint64 max_nunits = 1;
@@ -4887,6 +4890,8 @@ vect_analyze_slp_reductions (loop_vec_info loop_vinfo,
              }
          scalar_stmts.truncate (j);
          group_size = scalar_stmts.length ();
+         if (group_size <= 1)
+           break;
          if (vect_analyze_slp_reduction_group (loop_vinfo, scalar_stmts,
                                                bst_map, max_tree_size, limit,
                                                matches))
-- 
2.43.0

Reply via email to