This fixes vectorized PHI latch edge updating and delay it until all of the loop is code generated to deal with the case that the latch def is a PHI in the same block.
Boostrapped and tested on x86_64-unknown-linux-gnu, pushed. 2020-08-26 Richard Biener <rguent...@suse.de> PR tree-optimization/96698 * tree-vectorizer.h (loop_vec_info::reduc_latch_defs): New. (loop_vec_info::reduc_latch_slp_defs): Likewise. * tree-vect-stmts.c (vect_transform_stmt): Only record stmts to update PHI latches from, perform the update ... * tree-vect-loop.c (vect_transform_loop): ... here after vectorizing those PHIs. (info_for_reduction): Properly handle non-reduction PHIs. * gcc.dg/vect/pr96698.c: New testcase. --- gcc/testsuite/gcc.dg/vect/pr96698.c | 19 ++++++++++++++++ gcc/tree-vect-loop.c | 35 ++++++++++++++++++++++++++++- gcc/tree-vect-stmts.c | 29 +++++------------------- gcc/tree-vectorizer.h | 5 +++++ 4 files changed, 63 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/pr96698.c diff --git a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c new file mode 100644 index 00000000000..1d141c1dfff --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/pr96698.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ + +void test(int a, int* i) +{ + for (; a < 5; ++a) + { + int b = 0; + int c = 0; + for (; b != -11; b--) + for (int d = 0; d ==0; d++) + { + *i += c & a; + c = b; + } + } +} + +/* We should be able to vectorize the inner cycle. */ +/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target vect_int } } } */ diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index a92813eb2ac..50abb2b2f3c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -4646,7 +4646,8 @@ info_for_reduction (vec_info *vinfo, stmt_vec_info stmt_info) { stmt_info = vect_orig_stmt (stmt_info); gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info)); - if (!is_a <gphi *> (stmt_info->stmt)) + if (!is_a <gphi *> (stmt_info->stmt) + || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); gphi *phi = as_a <gphi *> (stmt_info->stmt); if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) @@ -9031,6 +9032,38 @@ vect_transform_loop (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) } } + /* Fill in backedge defs of reductions. */ + for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i) + { + stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i]; + stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); + vec<gimple *> &phi_info + = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info)); + vec<gimple *> &vec_stmt + = STMT_VINFO_VEC_STMTS (stmt_info); + gcc_assert (phi_info.length () == vec_stmt.length ()); + gphi *phi + = dyn_cast <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt); + edge e = loop_latch_edge (gimple_bb (phi_info[0])->loop_father); + for (unsigned j = 0; j < phi_info.length (); ++j) + add_phi_arg (as_a <gphi *> (phi_info[j]), + gimple_get_lhs (vec_stmt[j]), e, + gimple_phi_arg_location (phi, e->dest_idx)); + } + for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i) + { + slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first; + slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second; + gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); + e = loop_latch_edge (gimple_bb (phi)->loop_father); + gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () + == SLP_TREE_VEC_STMTS (slp_node).length ()); + for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j) + add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]), + vect_get_slp_vect_def (slp_node, j), + e, gimple_phi_arg_location (phi, e->dest_idx)); + } + /* Stub out scalar statements that must not survive vectorization. Doing this here helps with grouped statements, or statements that are involved in patterns. */ diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 65e30bac424..f6532558693 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -10912,8 +10912,8 @@ vect_transform_stmt (vec_info *vinfo, if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type) return is_store; - /* If this stmt defines a value used on a backedge, update the - vectorized PHIs. */ + /* If this stmt defines a value used on a backedge, record it so + we can update the vectorized PHIs later. */ stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); stmt_vec_info reduc_info; if (STMT_VINFO_REDUC_DEF (orig_stmt_info) @@ -10932,30 +10932,11 @@ vect_transform_stmt (vec_info *vinfo, && (e = loop_latch_edge (gimple_bb (phi)->loop_father)) && (PHI_ARG_DEF_FROM_EDGE (phi, e) == gimple_get_lhs (orig_stmt_info->stmt))) - { - vec<gimple *> &phi_info - = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info)); - vec<gimple *> &vec_stmt - = STMT_VINFO_VEC_STMTS (stmt_info); - gcc_assert (phi_info.length () == vec_stmt.length ()); - for (unsigned i = 0; i < phi_info.length (); ++i) - add_phi_arg (as_a <gphi *> (phi_info[i]), - gimple_get_lhs (vec_stmt[i]), e, - gimple_phi_arg_location (phi, e->dest_idx)); - } + as_a <loop_vec_info> (vinfo)->reduc_latch_defs.safe_push (stmt_info); else if (slp_node && slp_node != slp_node_instance->reduc_phis) - { - slp_tree phi_node = slp_node_instance->reduc_phis; - gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt); - e = loop_latch_edge (gimple_bb (phi)->loop_father); - gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length () - == SLP_TREE_VEC_STMTS (slp_node).length ()); - for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i) - add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]), - vect_get_slp_vect_def (slp_node, i), - e, gimple_phi_arg_location (phi, e->dest_idx)); - } + as_a <loop_vec_info> (vinfo)->reduc_latch_slp_defs.safe_push + (std::make_pair (slp_node, slp_node_instance->reduc_phis)); } /* Handle stmts whose DEF is used outside the loop-nest that is diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 8551b686613..f36e2ad9626 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -627,6 +627,11 @@ public: stmt in the chain. */ auto_vec<stmt_vec_info> reduction_chains; + /* The vectorized stmts defining the latch values of the reduction + they are involved with. */ + auto_vec<stmt_vec_info> reduc_latch_defs; + auto_vec<std::pair<slp_tree, slp_tree> > reduc_latch_slp_defs; + /* Cost vector for a single scalar iteration. */ auto_vec<stmt_info_for_cost> scalar_cost_vec; -- 2.26.2