The SLP reduction subgroup analysis can succeed for size-1 groups,
but this leads to poor code generation. Size-1 cases should fall
back to single-lane reduction instead.
Handle size-1 groups by returning false from the analysis function,
and add a loop exit check to avoid unnecessary processing.
PR tree-optimization/123343
gcc/ChangeLog:
* tree-vect-slp.cc (vect_analyze_slp_reduction_group): Return
false for group_size <= 1 at entry.
(vect_analyze_slp_reductions): Add loop exit check for
group_size <= 1.
gcc/testsuite/ChangeLog:
* gcc.dg/vect/slp-reduc-var.c: New testcase.
Signed-off-by: Zhongyao Chen <[email protected]>
---
gcc/testsuite/gcc.dg/vect/slp-reduc-var.c | 25 +++++++++++++++++++++++
gcc/tree-vect-slp.cc | 7 ++++++-
2 files changed, 31 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-var.c
diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-var.c
b/gcc/testsuite/gcc.dg/vect/slp-reduc-var.c
new file mode 100644
index 00000000000..bb322b1a428
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-var.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O3 --param vect-epilogues-nomask=0" } */
+#include <stdint-gcc.h>
+
+uint64_t
+x264_pixel_var_8x8 (uint8_t *pix, int i_stride)
+{
+ uint32_t sum = 0, sqr = 0;
+ for (int y = 0; y < 8; y++)
+ {
+ for (int x = 0; x < 8; x++)
+ {
+ sum += pix[x];
+ sqr += pix[x] * pix[x];
+ }
+ pix += i_stride;
+ }
+ return sum + ((uint64_t)sqr << 32);
+}
+
+/* Verify that size-1 reductions fall back to single-lane reduction chains. */
+/* { dg-final { scan-tree-dump "Starting SLP discovery of reduction chain"
"vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of reduction chain failed"
"vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of size 2 reduction group"
"vect" } } */
+/* { dg-final { scan-tree-dump-not "SLP discovery of size 1 reduction group"
"vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index e4e0320c678..3536a06b8c6 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -4736,8 +4736,11 @@ vect_analyze_slp_reduction_group (loop_vec_info
loop_vinfo,
unsigned max_tree_size, unsigned *limit,
bool *matches)
{
- /* Try to form a reduction group. */
+ /* Try to form a reduction group. Size-1 groups are not suitable
+ for SLP reduction and should fall back to single-lane reduction. */
unsigned int group_size = scalar_stmts.length ();
+ if (group_size <= 1)
+ return false;
if (!matches)
matches = XALLOCAVEC (bool, group_size);
poly_uint64 max_nunits = 1;
@@ -4887,6 +4890,8 @@ vect_analyze_slp_reductions (loop_vec_info loop_vinfo,
}
scalar_stmts.truncate (j);
group_size = scalar_stmts.length ();
+ if (group_size <= 1)
+ break;
if (vect_analyze_slp_reduction_group (loop_vinfo, scalar_stmts,
bst_map, max_tree_size, limit,
matches))
--
2.43.0