The following prevents non-grouped load SLP in case the element to splat is from a gather operation. While it should be possible to support this it is not similar to the single element interleaving case I was trying to mimic here.
Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. PR tree-optimization/110443 * tree-vect-slp.cc (vect_build_slp_tree_1): Reject non-grouped gather loads. * gcc.dg/torture/pr110443.c: New testcase. --- gcc/testsuite/gcc.dg/torture/pr110443.c | 21 +++++++++++++++++++++ gcc/tree-vect-slp.cc | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr110443.c diff --git a/gcc/testsuite/gcc.dg/torture/pr110443.c b/gcc/testsuite/gcc.dg/torture/pr110443.c new file mode 100644 index 00000000000..61cf705869c --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr110443.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ + +typedef struct { + float real; + float imag; +} complex_t; +extern unsigned char fftorder[]; +float *a52_imdct_256_data; +int a52_imdct_256_i, a52_imdct_256_k; +float a52_imdct_256_b_r; +void a52_imdct_256() +{ + complex_t buf1[64]; + a52_imdct_256_i = 0; + for (; a52_imdct_256_i < 64; a52_imdct_256_i++) { + a52_imdct_256_k = fftorder[a52_imdct_256_i]; + buf1[a52_imdct_256_i].real = buf1[a52_imdct_256_i].imag = + a52_imdct_256_data[a52_imdct_256_k]; + } + a52_imdct_256_b_r = buf1[0].real * buf1[0].imag; +} diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 8cb1ac1f319..355d078d66e 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1291,7 +1291,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, vectorization. For loop vectorization we can handle splats the same we handle single element interleaving. */ && (is_a <bb_vec_info> (vinfo) - || stmt_info != first_stmt_info)) + || stmt_info != first_stmt_info + || STMT_VINFO_GATHER_SCATTER_P (stmt_info))) { /* Not grouped load. */ if (dump_enabled_p ()) -- 2.35.3