On Fri, Jan 12, 2024 at 7:15 AM Feng Xue OS <f...@os.amperecomputing.com> wrote: > > Add a depth parameter to limit recursion of vec_slp_has_scalar_use.
OK. > Feng > ------- > > .../gcc.target/aarch64/bb-slp-pr113091.c | 22 ++ > gcc/tree-vect-slp.cc | 207 ++++++++++++++---- > 2 files changed, 190 insertions(+), 39 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > > diff --git a/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > new file mode 100644 > index 00000000000..ff822e90b4a > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3 -fdump-tree-slp-details > -ftree-slp-vectorize" } */ > + > +int test(unsigned array[8]); > + > +int foo(char *a, char *b) > +{ > + unsigned array[8]; > + > + array[0] = (a[0] - b[0]); > + array[1] = (a[1] - b[1]); > + array[2] = (a[2] - b[2]); > + array[3] = (a[3] - b[3]); > + array[4] = (a[4] - b[4]); > + array[5] = (a[5] - b[5]); > + array[6] = (a[6] - b[6]); > + array[7] = (a[7] - b[7]); > + > + return test(array); > +} > + > +/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using > SLP" 1 "slp2" } } */ > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index b6cce55ce90..086377a9ac0 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -6418,6 +6418,102 @@ vect_slp_analyze_node_operations (vec_info *vinfo, > slp_tree node, > return res; > } > > +/* Given a definition DEF, analyze if it will have any live scalar use after > + performing SLP vectorization whose information is represented by BB_VINFO, > + and record result into hash map SCALAR_USE_MAP as cache for later fast > + check. If recursion DEPTH exceeds a limit, stop analysis and make a > + conservative assumption. Return 0 if no scalar use, 1 if there is, -1 > + means recursion is limited. */ > + > +static int > +vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def, > + hash_map<tree, int> &scalar_use_map, > + int depth = 0) > +{ > + const int depth_limit = 2; > + imm_use_iterator use_iter; > + gimple *use_stmt; > + > + if (int *res = scalar_use_map.get (def)) > + return *res; > + > + int scalar_use = 1; > + > + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) > + { > + if (is_gimple_debug (use_stmt)) > + continue; > + > + stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt); > + > + if (!use_stmt_info) > + break; > + > + if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info))) > + continue; > + > + /* Do not step forward when encounter PHI statement, since it may > + involve cyclic reference and cause infinite recursive invocation. */ > + if (gimple_code (use_stmt) == GIMPLE_PHI) > + break; > + > + /* When pattern recognition is involved, a statement whose definition > is > + consumed in some pattern, may not be included in the final > replacement > + pattern statements, so would be skipped when building SLP graph. > + > + * Original > + char a_c = *(char *) a; > + char b_c = *(char *) b; > + unsigned short a_s = (unsigned short) a_c; > + int a_i = (int) a_s; > + int b_i = (int) b_c; > + int r_i = a_i - b_i; > + > + * After pattern replacement > + a_s = (unsigned short) a_c; > + a_i = (int) a_s; > + > + patt_b_s = (unsigned short) b_c; // b_i = (int) b_c > + patt_b_i = (int) patt_b_s; // b_i = (int) b_c > + > + patt_r_s = widen_minus(a_c, b_c); // r_i = a_i - b_i > + patt_r_i = (int) patt_r_s; // r_i = a_i - b_i > + > + The definitions of a_i(original statement) and b_i(pattern statement) > + are related to, but actually not part of widen_minus pattern. > + Vectorizing the pattern does not cause these definition statements to > + be marked as PURE_SLP. For this case, we need to recursively check > + whether their uses are all absorbed into vectorized code. But there > + is an exception that some use may participate in an vectorized > + operation via an external SLP node containing that use as an element. > + The parameter "scalar_use_map" tags such kind of SSA as having scalar > + use in advance. */ > + tree lhs = gimple_get_lhs (use_stmt); > + > + if (!lhs || TREE_CODE (lhs) != SSA_NAME) > + break; > + > + if (depth_limit && depth >= depth_limit) > + return -1; > + > + if ((scalar_use = vec_slp_has_scalar_use (bb_vinfo, lhs, > scalar_use_map, > + depth + 1))) > + break; > + } > + > + if (end_imm_use_stmt_p (&use_iter)) > + scalar_use = 0; > + > + /* If recursion is limited, do not cache result for non-root defs. */ > + if (!depth || scalar_use >= 0) > + { > + bool added = scalar_use_map.put (def, scalar_use); > + gcc_assert (!added); > + } > + > + return scalar_use; > +} > + > /* Mark lanes of NODE that are live outside of the basic-block vectorized > region and that can be vectorized using vectorizable_live_operation > with STMT_VINFO_LIVE_P. Not handled live operations will cause the > @@ -6427,6 +6523,7 @@ static void > vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node, > slp_instance instance, > stmt_vector_for_cost *cost_vec, > + hash_map<tree, int> &scalar_use_map, > hash_set<stmt_vec_info> &svisited, > hash_set<slp_tree> &visited) > { > @@ -6451,32 +6548,22 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, > slp_tree node, > def_operand_p def_p; > FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF) > { > - imm_use_iterator use_iter; > - gimple *use_stmt; > - stmt_vec_info use_stmt_info; > - FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) > - if (!is_gimple_debug (use_stmt)) > - { > - use_stmt_info = bb_vinfo->lookup_stmt (use_stmt); > - if (!use_stmt_info > - || !PURE_SLP_STMT (vect_stmt_to_vectorize > (use_stmt_info))) > - { > - STMT_VINFO_LIVE_P (stmt_info) = true; > - if (vectorizable_live_operation (bb_vinfo, stmt_info, > - node, instance, i, > - false, cost_vec)) > - /* ??? So we know we can vectorize the live stmt > - from one SLP node. If we cannot do so from all > - or none consistently we'd have to record which > - SLP node (and lane) we want to use for the live > - operation. So make sure we can code-generate > - from all nodes. */ > - mark_visited = false; > - else > - STMT_VINFO_LIVE_P (stmt_info) = false; > - break; > - } > - } > + if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p), > + scalar_use_map)) > + { > + STMT_VINFO_LIVE_P (stmt_info) = true; > + if (vectorizable_live_operation (bb_vinfo, stmt_info, node, > + instance, i, false, cost_vec)) > + /* ??? So we know we can vectorize the live stmt from one SLP > + node. If we cannot do so from all or none consistently > + we'd have to record which SLP node (and lane) we want to > + use for the live operation. So make sure we can > + code-generate from all nodes. */ > + mark_visited = false; > + else > + STMT_VINFO_LIVE_P (stmt_info) = false; > + } > + > /* We have to verify whether we can insert the lane extract > before all uses. The following is a conservative approximation. > We cannot put this into vectorizable_live_operation because > @@ -6495,6 +6582,10 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, > slp_tree node, > from the latest stmt in a node. So we compensate for this > during code-generation, simply not replacing uses for those > hopefully rare cases. */ > + imm_use_iterator use_iter; > + gimple *use_stmt; > + stmt_vec_info use_stmt_info; > + > if (STMT_VINFO_LIVE_P (stmt_info)) > FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) > if (!is_gimple_debug (use_stmt) > @@ -6517,8 +6608,56 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, > slp_tree node, > slp_tree child; > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) > if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def) > - vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, > - cost_vec, svisited, visited); > + vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec, > + scalar_use_map, svisited, visited); > +} > + > +/* Traverse all slp instances of BB_VINFO, and mark lanes of every node that > + are live outside of the basic-block vectorized region and that can be > + vectorized using vectorizable_live_operation with STMT_VINFO_LIVE_P. */ > + > +static void > +vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo) > +{ > + if (bb_vinfo->slp_instances.is_empty ()) > + return; > + > + hash_set<stmt_vec_info> svisited; > + hash_set<slp_tree> visited; > + hash_map<tree, int> scalar_use_map; > + auto_vec<slp_tree> worklist; > + > + for (slp_instance instance : bb_vinfo->slp_instances) > + if (!visited.add (SLP_INSTANCE_TREE (instance))) > + worklist.safe_push (SLP_INSTANCE_TREE (instance)); > + > + do > + { > + slp_tree node = worklist.pop (); > + > + if (SLP_TREE_DEF_TYPE (node) == vect_external_def) > + { > + for (tree op : SLP_TREE_SCALAR_OPS (node)) > + if (TREE_CODE (op) == SSA_NAME) > + scalar_use_map.put (op, 1); > + } > + else > + { > + for (slp_tree child : SLP_TREE_CHILDREN (node)) > + if (child && !visited.add (child)) > + worklist.safe_push (child); > + } > + } while (!worklist.is_empty ()); > + > + visited.empty (); > + > + for (slp_instance instance : bb_vinfo->slp_instances) > + { > + vect_location = instance->location (); > + vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance), > + instance, &instance->cost_vec, > + scalar_use_map, svisited, visited); > + } > } > > /* Determine whether we can vectorize the reduction epilogue for INSTANCE. > */ > @@ -6684,17 +6823,7 @@ vect_slp_analyze_operations (vec_info *vinfo) > > /* Compute vectorizable live stmts. */ > if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo)) > - { > - hash_set<stmt_vec_info> svisited; > - hash_set<slp_tree> visited; > - for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i) > - { > - vect_location = instance->location (); > - vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance), > - instance, &instance->cost_vec, > svisited, > - visited); > - } > - } > + vect_bb_slp_mark_live_stmts (bb_vinfo); > > return !vinfo->slp_instances.is_empty (); > } > -- > 2.17.1 > > ________________________________________ > From: Richard Biener <richard.guent...@gmail.com> > Sent: Thursday, January 11, 2024 5:52 PM > To: Feng Xue OS; Richard Sandiford > Cc: gcc-patches@gcc.gnu.org > Subject: Re: [PATCH] Do not count unused scalar use when marking > STMT_VINFO_LIVE_P [PR113091] > > On Thu, Jan 11, 2024 at 10:46 AM Richard Biener > <richard.guent...@gmail.com> wrote: > > > > On Fri, Dec 29, 2023 at 11:29 AM Feng Xue OS > > <f...@os.amperecomputing.com> wrote: > > > > > > This patch is meant to fix over-estimation about SLP vector-to-scalar > > > cost for > > > STMT_VINFO_LIVE_P statement. When pattern recognition is involved, a > > > statement whose definition is consumed in some pattern, may not be > > > included in the final replacement pattern statements, and would be skipped > > > when building SLP graph. > > > > > > * Original > > > char a_c = *(char *) a; > > > char b_c = *(char *) b; > > > unsigned short a_s = (unsigned short) a_c; > > > int a_i = (int) a_s; > > > int b_i = (int) b_c; > > > int r_i = a_i - b_i; > > > > > > * After pattern replacement > > > a_s = (unsigned short) a_c; > > > a_i = (int) a_s; > > > > > > patt_b_s = (unsigned short) b_c; // b_i = (int) b_c > > > patt_b_i = (int) patt_b_s; // b_i = (int) b_c > > > > > > patt_r_s = widen_minus(a_c, b_c); // r_i = a_i - b_i > > > patt_r_i = (int) patt_r_s; // r_i = a_i - b_i > > > > > > The definitions of a_i(original statement) and b_i(pattern statement) > > > are related to, but actually not part of widen_minus pattern. > > > Vectorizing the pattern does not cause these definition statements to > > > be marked as PURE_SLP. For this case, we need to recursively check > > > whether their uses are all absorbed into vectorized code. But there > > > is an exception that some use may participate in an vectorized > > > operation via an external SLP node containing that use as an element. > > > > > > Feng > > > > > > --- > > > .../gcc.target/aarch64/bb-slp-pr113091.c | 22 ++ > > > gcc/tree-vect-slp.cc | 189 ++++++++++++++---- > > > 2 files changed, 172 insertions(+), 39 deletions(-) > > > create mode 100644 gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > > > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > > > b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > > > new file mode 100644 > > > index 00000000000..ff822e90b4a > > > --- /dev/null > > > +++ b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c > > > @@ -0,0 +1,22 @@ > > > +/* { dg-do compile } */ > > > +/* { dg-additional-options "-O3 -fdump-tree-slp-details > > > -ftree-slp-vectorize" } */ > > > + > > > +int test(unsigned array[8]); > > > + > > > +int foo(char *a, char *b) > > > +{ > > > + unsigned array[8]; > > > + > > > + array[0] = (a[0] - b[0]); > > > + array[1] = (a[1] - b[1]); > > > + array[2] = (a[2] - b[2]); > > > + array[3] = (a[3] - b[3]); > > > + array[4] = (a[4] - b[4]); > > > + array[5] = (a[5] - b[5]); > > > + array[6] = (a[6] - b[6]); > > > + array[7] = (a[7] - b[7]); > > > + > > > + return test(array); > > > +} > > > + > > > +/* { dg-final { scan-tree-dump-times "Basic block will be vectorized > > > using SLP" 1 "slp2" } } */ > > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > > > index a82fca45161..d36ff37114e 100644 > > > --- a/gcc/tree-vect-slp.cc > > > +++ b/gcc/tree-vect-slp.cc > > > @@ -6418,6 +6418,84 @@ vect_slp_analyze_node_operations (vec_info *vinfo, > > > slp_tree node, > > > return res; > > > } > > > > > > +/* Given a definition DEF, analyze if it will have any live scalar use > > > after > > > + performing SLP vectorization whose information is represented by > > > BB_VINFO, > > > + and record result into hash map SCALAR_USE_MAP as cache for later fast > > > + check. */ > > > + > > > +static bool > > > +vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def, > > > + hash_map<tree, bool> &scalar_use_map) > > > +{ > > > + imm_use_iterator use_iter; > > > + gimple *use_stmt; > > > + > > > + if (bool *res = scalar_use_map.get (def)) > > > + return *res; > > > + > > > + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) > > > + { > > > + if (is_gimple_debug (use_stmt)) > > > + continue; > > > + > > > + stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt); > > > + > > > + if (!use_stmt_info) > > > + break; > > > + > > > + if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info))) > > > + continue; > > > + > > > + /* Do not step forward when encounter PHI statement, since it may > > > + involve cyclic reference and cause infinite recursive > > > invocation. */ > > > + if (gimple_code (use_stmt) == GIMPLE_PHI) > > > + break; > > > + > > > + /* When pattern recognition is involved, a statement whose > > > definition is > > > + consumed in some pattern, may not be included in the final > > > replacement > > > + pattern statements, so would be skipped when building SLP graph. > > > + > > > + * Original > > > + char a_c = *(char *) a; > > > + char b_c = *(char *) b; > > > + unsigned short a_s = (unsigned short) a_c; > > > + int a_i = (int) a_s; > > > + int b_i = (int) b_c; > > > + int r_i = a_i - b_i; > > > + > > > + * After pattern replacement > > > + a_s = (unsigned short) a_c; > > > + a_i = (int) a_s; > > > + > > > + patt_b_s = (unsigned short) b_c; // b_i = (int) b_c > > > + patt_b_i = (int) patt_b_s; // b_i = (int) b_c > > > + > > > + patt_r_s = widen_minus(a_c, b_c); // r_i = a_i - b_i > > > + patt_r_i = (int) patt_r_s; // r_i = a_i - b_i > > > + > > > + The definitions of a_i(original statement) and b_i(pattern > > > statement) > > > + are related to, but actually not part of widen_minus pattern. > > > + Vectorizing the pattern does not cause these definition > > > statements to > > > + be marked as PURE_SLP. For this case, we need to recursively > > > check > > > + whether their uses are all absorbed into vectorized code. But > > > there > > > + is an exception that some use may participate in an vectorized > > > + operation via an external SLP node containing that use as an > > > element. > > > + The parameter "scalar_use_map" tags such kind of SSA as having > > > scalar > > > + use in advance. */ > > > + tree lhs = gimple_get_lhs (use_stmt); > > > + > > > + if (!lhs || TREE_CODE (lhs) != SSA_NAME > > > + || vec_slp_has_scalar_use (bb_vinfo, lhs, scalar_use_map)) > > > > This looks mostly good, the only worry I have is that this recursion > > will - for unvectorized uses - eventually go all the way down to the end > > of the vectorized region (aka the whole function). Since it's really > > patterns we are after it should be possible to limit the recursion to > > use_stmt which are covered by a pattern, aka STMT_VINFO_IN_PATTERN_P? > > Hmm, in_pattern_p doesn't seem to be set for original stmts replaced > by a pattern. > In fact we don't seem to mark those in any way? It should be possible to > programmatically do this looking at the root stmt uses maybe. Maybe we can > instead simply limit recursion depth to one and only increase when we get > testcases requiring more? We should eventually revisit how we mark patterns > in next stage1. > > > PHIs should be then magically covered since we have no patterns > > for them (though explicitly handling is good for documentation purposes). > > > > Thanks, > > Richard. > > > > > + break; > > > + } > > > + > > > + bool found = !end_imm_use_stmt_p (&use_iter); > > > + bool added = scalar_use_map.put (def, found); > > > + > > > + gcc_assert (!added); > > > + return found; > > > +} > > > + > > > /* Mark lanes of NODE that are live outside of the basic-block vectorized > > > region and that can be vectorized using vectorizable_live_operation > > > with STMT_VINFO_LIVE_P. Not handled live operations will cause the > > > @@ -6427,6 +6505,7 @@ static void > > > vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node, > > > slp_instance instance, > > > stmt_vector_for_cost *cost_vec, > > > + hash_map<tree, bool> &scalar_use_map, > > > hash_set<stmt_vec_info> &svisited, > > > hash_set<slp_tree> &visited) > > > { > > > @@ -6451,32 +6530,22 @@ vect_bb_slp_mark_live_stmts (bb_vec_info > > > bb_vinfo, slp_tree node, > > > def_operand_p def_p; > > > FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF) > > > { > > > - imm_use_iterator use_iter; > > > - gimple *use_stmt; > > > - stmt_vec_info use_stmt_info; > > > - FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) > > > - if (!is_gimple_debug (use_stmt)) > > > - { > > > - use_stmt_info = bb_vinfo->lookup_stmt (use_stmt); > > > - if (!use_stmt_info > > > - || !PURE_SLP_STMT (vect_stmt_to_vectorize > > > (use_stmt_info))) > > > - { > > > - STMT_VINFO_LIVE_P (stmt_info) = true; > > > - if (vectorizable_live_operation (bb_vinfo, stmt_info, > > > - node, instance, i, > > > - false, cost_vec)) > > > - /* ??? So we know we can vectorize the live stmt > > > - from one SLP node. If we cannot do so from all > > > - or none consistently we'd have to record which > > > - SLP node (and lane) we want to use for the live > > > - operation. So make sure we can code-generate > > > - from all nodes. */ > > > - mark_visited = false; > > > - else > > > - STMT_VINFO_LIVE_P (stmt_info) = false; > > > - break; > > > - } > > > - } > > > + if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p), > > > + scalar_use_map)) > > > + { > > > + STMT_VINFO_LIVE_P (stmt_info) = true; > > > + if (vectorizable_live_operation (bb_vinfo, stmt_info, node, > > > + instance, i, false, > > > cost_vec)) > > > + /* ??? So we know we can vectorize the live stmt from > > > one SLP > > > + node. If we cannot do so from all or none consistently > > > + we'd have to record which SLP node (and lane) we want > > > to > > > + use for the live operation. So make sure we can > > > + code-generate from all nodes. */ > > > + mark_visited = false; > > > + else > > > + STMT_VINFO_LIVE_P (stmt_info) = false; > > > + } > > > + > > > /* We have to verify whether we can insert the lane extract > > > before all uses. The following is a conservative > > > approximation. > > > We cannot put this into vectorizable_live_operation because > > > @@ -6495,6 +6564,10 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, > > > slp_tree node, > > > from the latest stmt in a node. So we compensate for this > > > during code-generation, simply not replacing uses for those > > > hopefully rare cases. */ > > > + imm_use_iterator use_iter; > > > + gimple *use_stmt; > > > + stmt_vec_info use_stmt_info; > > > + > > > if (STMT_VINFO_LIVE_P (stmt_info)) > > > FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR > > > (def_p)) > > > if (!is_gimple_debug (use_stmt) > > > @@ -6517,8 +6590,56 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, > > > slp_tree node, > > > slp_tree child; > > > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) > > > if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def) > > > - vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, > > > - cost_vec, svisited, visited); > > > + vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec, > > > + scalar_use_map, svisited, visited); > > > +} > > > + > > > +/* Traverse all slp instances of BB_VINFO, and mark lanes of every node > > > that > > > + are live outside of the basic-block vectorized region and that can be > > > + vectorized using vectorizable_live_operation with STMT_VINFO_LIVE_P. > > > */ > > > + > > > +static void > > > +vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo) > > > +{ > > > + if (bb_vinfo->slp_instances.is_empty ()) > > > + return; > > > + > > > + hash_set<stmt_vec_info> svisited; > > > + hash_set<slp_tree> visited; > > > + hash_map<tree, bool> scalar_use_map; > > > + auto_vec<slp_tree> worklist; > > > + > > > + for (slp_instance instance : bb_vinfo->slp_instances) > > > + if (!visited.add (SLP_INSTANCE_TREE (instance))) > > > + worklist.safe_push (SLP_INSTANCE_TREE (instance)); > > > + > > > + do > > > + { > > > + slp_tree node = worklist.pop (); > > > + > > > + if (SLP_TREE_DEF_TYPE (node) == vect_external_def) > > > + { > > > + for (tree op : SLP_TREE_SCALAR_OPS (node)) > > > + if (TREE_CODE (op) == SSA_NAME) > > > + scalar_use_map.put (op, true); > > > + } > > > + else > > > + { > > > + for (slp_tree child : SLP_TREE_CHILDREN (node)) > > > + if (child && !visited.add (child)) > > > + worklist.safe_push (child); > > > + } > > > + } while (!worklist.is_empty ()); > > > + > > > + visited.empty (); > > > + > > > + for (slp_instance instance : bb_vinfo->slp_instances) > > > + { > > > + vect_location = instance->location (); > > > + vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE > > > (instance), > > > + instance, &instance->cost_vec, > > > + scalar_use_map, svisited, visited); > > > + } > > > } > > > > > > /* Determine whether we can vectorize the reduction epilogue for > > > INSTANCE. */ > > > @@ -6684,17 +6805,7 @@ vect_slp_analyze_operations (vec_info *vinfo) > > > > > > /* Compute vectorizable live stmts. */ > > > if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo)) > > > - { > > > - hash_set<stmt_vec_info> svisited; > > > - hash_set<slp_tree> visited; > > > - for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i) > > > - { > > > - vect_location = instance->location (); > > > - vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE > > > (instance), > > > - instance, &instance->cost_vec, > > > svisited, > > > - visited); > > > - } > > > - } > > > + vect_bb_slp_mark_live_stmts (bb_vinfo); > > > > > > return !vinfo->slp_instances.is_empty (); > > > } > > > -- > > > 2.17.1