The following adds handling of gaps by representing them with NULL entries in SLP_TREE_SCALAR_STMTS for the unpermuted load node.
The SLP discovery changes could be elided if we manually build the load node instead. * tree-vect-slp.cc (vect_build_slp_tree_1): Handle NULL stmt. (vect_build_slp_tree_2): Likewise. Release load permutation when there's a NULL in SLP_TREE_SCALAR_STMTS and assert there's no actual permutation in that case. (vect_lower_load_permutations): Handle gaps in loads. * gcc.dg/vect/slp-51.c: New testcase. --- gcc/testsuite/gcc.dg/vect/slp-51.c | 17 +++++++++++ gcc/tree-vect-slp.cc | 49 ++++++++++++++++++------------ 2 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/slp-51.c diff --git a/gcc/testsuite/gcc.dg/vect/slp-51.c b/gcc/testsuite/gcc.dg/vect/slp-51.c new file mode 100644 index 00000000000..91ae763be30 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/slp-51.c @@ -0,0 +1,17 @@ +/* { dg-do compile } */ + +void foo (int * __restrict x, int *y) +{ + x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__); + y = __builtin_assume_aligned (y, __BIGGEST_ALIGNMENT__); + for (int i = 0; i < 1024; ++i) + { + x[4*i+0] = y[4*i+0]; + x[4*i+1] = y[4*i+2] * 2; + x[4*i+2] = y[4*i+0] + 3; + x[4*i+3] = y[4*i+2] * 2 - 5; + } +} + +/* Check we can handle SLP with gaps and an interleaving scheme. */ +/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_int && vect_int_mult } } } } */ diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 6f3822af950..fdefee90e92 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -1080,10 +1080,15 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, stmt_vec_info stmt_info; FOR_EACH_VEC_ELT (stmts, i, stmt_info) { - gimple *stmt = stmt_info->stmt; swap[i] = 0; matches[i] = false; + if (!stmt_info) + { + matches[i] = true; + continue; + } + gimple *stmt = stmt_info->stmt; if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt); @@ -1984,10 +1989,16 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]); bool any_permute = false; + bool any_null = false; FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) { int load_place; - if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) + if (! load_info) + { + load_place = j; + any_null = true; + } + else if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) load_place = vect_get_place_in_interleaving_chain (load_info, first_stmt_info); else @@ -1996,6 +2007,11 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, any_permute |= load_place != j; load_permutation.quick_push (load_place); } + if (any_null) + { + gcc_assert (!any_permute); + load_permutation.release (); + } if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) { @@ -3978,24 +3994,11 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo, stmt_vec_info first = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (loads[0])[0]); - /* ??? In principle we have to consider a gap up to the next full - vector, but we have to actually represent a scalar stmt for the - gaps value so delay handling this. The same is true for - inbetween gaps which the load places in the load-permutation - represent. It's probably not worth trying an intermediate packing - to vectors without gap even if that might handle some more cases. - Instead get the gap case correct in some way. */ - unsigned group_lanes = 0; - for (stmt_vec_info s = first; s; s = DR_GROUP_NEXT_ELEMENT (s)) - { - if ((s == first && DR_GROUP_GAP (s) != 0) - || (s != first && DR_GROUP_GAP (s) != 1)) - return; - group_lanes++; - } /* Only a power-of-two number of lanes matches interleaving with N levels. + The non-SLP path also supports DR_GROUP_SIZE == 3. ??? An even number of lanes could be reduced to 1<<ceil_log2(N)-1 lanes at each step. */ + unsigned group_lanes = DR_GROUP_SIZE (first); if (exact_log2 (group_lanes) == -1) return; @@ -4017,11 +4020,19 @@ vect_lower_load_permutations (loop_vec_info loop_vinfo, continue; /* First build (and possibly re-use) a load node for the - unpermuted group. */ + unpermuted group. Gaps in the middle and on the end are + represented with NULL stmts. */ vec<stmt_vec_info> stmts; stmts.create (group_lanes); for (stmt_vec_info s = first; s; s = DR_GROUP_NEXT_ELEMENT (s)) - stmts.quick_push (s); + { + if (s != first) + for (unsigned i = 1; i < DR_GROUP_GAP (s); ++i) + stmts.quick_push (NULL); + stmts.quick_push (s); + } + for (unsigned i = 0; i < DR_GROUP_GAP (first); ++i) + stmts.quick_push (NULL); poly_uint64 max_nunits; bool *matches = XALLOCAVEC (bool, group_lanes); unsigned limit = 1; -- 2.35.3