On Fri, Jan 12, 2024 at 7:15 AM Feng Xue OS <f...@os.amperecomputing.com> wrote:
>
> Add a depth parameter to limit recursion of vec_slp_has_scalar_use.

OK.

> Feng
> -------
>
>  .../gcc.target/aarch64/bb-slp-pr113091.c      |  22 ++
>  gcc/tree-vect-slp.cc                          | 207 ++++++++++++++----
>  2 files changed, 190 insertions(+), 39 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c 
> b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
> new file mode 100644
> index 00000000000..ff822e90b4a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-O3 -fdump-tree-slp-details 
> -ftree-slp-vectorize" } */
> +
> +int test(unsigned array[8]);
> +
> +int foo(char *a, char *b)
> +{
> +  unsigned array[8];
> +
> +  array[0] = (a[0] - b[0]);
> +  array[1] = (a[1] - b[1]);
> +  array[2] = (a[2] - b[2]);
> +  array[3] = (a[3] - b[3]);
> +  array[4] = (a[4] - b[4]);
> +  array[5] = (a[5] - b[5]);
> +  array[6] = (a[6] - b[6]);
> +  array[7] = (a[7] - b[7]);
> +
> +  return test(array);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "Basic block will be vectorized using 
> SLP" 1 "slp2" } } */
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index b6cce55ce90..086377a9ac0 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -6418,6 +6418,102 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
> slp_tree node,
>    return res;
>  }
>
> +/* Given a definition DEF, analyze if it will have any live scalar use after
> +   performing SLP vectorization whose information is represented by BB_VINFO,
> +   and record result into hash map SCALAR_USE_MAP as cache for later fast
> +   check.  If recursion DEPTH exceeds a limit, stop analysis and make a
> +   conservative assumption.  Return 0 if no scalar use, 1 if there is, -1
> +   means recursion is limited.  */
> +
> +static int
> +vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
> +                       hash_map<tree, int> &scalar_use_map,
> +                       int depth = 0)
> +{
> +  const int depth_limit = 2;
> +  imm_use_iterator use_iter;
> +  gimple *use_stmt;
> +
> +  if (int *res = scalar_use_map.get (def))
> +    return *res;
> +
> +  int scalar_use = 1;
> +
> +  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
> +    {
> +      if (is_gimple_debug (use_stmt))
> +       continue;
> +
> +      stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
> +
> +      if (!use_stmt_info)
> +       break;
> +
> +      if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
> +       continue;
> +
> +      /* Do not step forward when encounter PHI statement, since it may
> +        involve cyclic reference and cause infinite recursive invocation.  */
> +      if (gimple_code (use_stmt) == GIMPLE_PHI)
> +       break;
> +
> +      /* When pattern recognition is involved, a statement whose definition 
> is
> +        consumed in some pattern, may not be included in the final 
> replacement
> +        pattern statements, so would be skipped when building SLP graph.
> +
> +        * Original
> +         char a_c = *(char *) a;
> +         char b_c = *(char *) b;
> +         unsigned short a_s = (unsigned short) a_c;
> +         int a_i = (int) a_s;
> +         int b_i = (int) b_c;
> +         int r_i = a_i - b_i;
> +
> +        * After pattern replacement
> +         a_s = (unsigned short) a_c;
> +         a_i = (int) a_s;
> +
> +         patt_b_s = (unsigned short) b_c;    // b_i = (int) b_c
> +         patt_b_i = (int) patt_b_s;          // b_i = (int) b_c
> +
> +         patt_r_s = widen_minus(a_c, b_c);   // r_i = a_i - b_i
> +         patt_r_i = (int) patt_r_s;          // r_i = a_i - b_i
> +
> +        The definitions of a_i(original statement) and b_i(pattern statement)
> +        are related to, but actually not part of widen_minus pattern.
> +        Vectorizing the pattern does not cause these definition statements to
> +        be marked as PURE_SLP.  For this case, we need to recursively check
> +        whether their uses are all absorbed into vectorized code.  But there
> +        is an exception that some use may participate in an vectorized
> +        operation via an external SLP node containing that use as an element.
> +        The parameter "scalar_use_map" tags such kind of SSA as having scalar
> +        use in advance.  */
> +      tree lhs = gimple_get_lhs (use_stmt);
> +
> +      if (!lhs || TREE_CODE (lhs) != SSA_NAME)
> +       break;
> +
> +      if (depth_limit && depth >= depth_limit)
> +       return -1;
> +
> +      if ((scalar_use = vec_slp_has_scalar_use (bb_vinfo, lhs, 
> scalar_use_map,
> +                                               depth + 1)))
> +       break;
> +    }
> +
> +  if (end_imm_use_stmt_p (&use_iter))
> +    scalar_use = 0;
> +
> +  /* If recursion is limited, do not cache result for non-root defs.  */
> +  if (!depth || scalar_use >= 0)
> +    {
> +      bool added = scalar_use_map.put (def, scalar_use);
> +      gcc_assert (!added);
> +    }
> +
> +  return scalar_use;
> +}
> +
>  /* Mark lanes of NODE that are live outside of the basic-block vectorized
>     region and that can be vectorized using vectorizable_live_operation
>     with STMT_VINFO_LIVE_P.  Not handled live operations will cause the
> @@ -6427,6 +6523,7 @@ static void
>  vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
>                              slp_instance instance,
>                              stmt_vector_for_cost *cost_vec,
> +                            hash_map<tree, int> &scalar_use_map,
>                              hash_set<stmt_vec_info> &svisited,
>                              hash_set<slp_tree> &visited)
>  {
> @@ -6451,32 +6548,22 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
> slp_tree node,
>        def_operand_p def_p;
>        FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
>         {
> -         imm_use_iterator use_iter;
> -         gimple *use_stmt;
> -         stmt_vec_info use_stmt_info;
> -         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
> -           if (!is_gimple_debug (use_stmt))
> -             {
> -               use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
> -               if (!use_stmt_info
> -                   || !PURE_SLP_STMT (vect_stmt_to_vectorize 
> (use_stmt_info)))
> -                 {
> -                   STMT_VINFO_LIVE_P (stmt_info) = true;
> -                   if (vectorizable_live_operation (bb_vinfo, stmt_info,
> -                                                    node, instance, i,
> -                                                    false, cost_vec))
> -                     /* ???  So we know we can vectorize the live stmt
> -                        from one SLP node.  If we cannot do so from all
> -                        or none consistently we'd have to record which
> -                        SLP node (and lane) we want to use for the live
> -                        operation.  So make sure we can code-generate
> -                        from all nodes.  */
> -                     mark_visited = false;
> -                   else
> -                     STMT_VINFO_LIVE_P (stmt_info) = false;
> -                   break;
> -                 }
> -             }
> +         if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p),
> +                                     scalar_use_map))
> +           {
> +             STMT_VINFO_LIVE_P (stmt_info) = true;
> +             if (vectorizable_live_operation (bb_vinfo, stmt_info, node,
> +                                              instance, i, false, cost_vec))
> +               /* ???  So we know we can vectorize the live stmt from one SLP
> +                  node.  If we cannot do so from all or none consistently
> +                  we'd have to record which SLP node (and lane) we want to
> +                  use for the live operation.  So make sure we can
> +                  code-generate from all nodes.  */
> +               mark_visited = false;
> +             else
> +               STMT_VINFO_LIVE_P (stmt_info) = false;
> +           }
> +
>           /* We have to verify whether we can insert the lane extract
>              before all uses.  The following is a conservative approximation.
>              We cannot put this into vectorizable_live_operation because
> @@ -6495,6 +6582,10 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
> slp_tree node,
>              from the latest stmt in a node.  So we compensate for this
>              during code-generation, simply not replacing uses for those
>              hopefully rare cases.  */
> +         imm_use_iterator use_iter;
> +         gimple *use_stmt;
> +         stmt_vec_info use_stmt_info;
> +
>           if (STMT_VINFO_LIVE_P (stmt_info))
>             FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
>               if (!is_gimple_debug (use_stmt)
> @@ -6517,8 +6608,56 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
> slp_tree node,
>    slp_tree child;
>    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>      if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> -      vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
> -                                  cost_vec, svisited, visited);
> +      vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec,
> +                                  scalar_use_map, svisited, visited);
> +}
> +
> +/* Traverse all slp instances of BB_VINFO, and mark lanes of every node that
> +   are live outside of the basic-block vectorized region and that can be
> +   vectorized using vectorizable_live_operation with STMT_VINFO_LIVE_P.  */
> +
> +static void
> +vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo)
> +{
> +  if (bb_vinfo->slp_instances.is_empty ())
> +    return;
> +
> +  hash_set<stmt_vec_info> svisited;
> +  hash_set<slp_tree> visited;
> +  hash_map<tree, int> scalar_use_map;
> +  auto_vec<slp_tree> worklist;
> +
> +  for (slp_instance instance : bb_vinfo->slp_instances)
> +    if (!visited.add (SLP_INSTANCE_TREE (instance)))
> +      worklist.safe_push (SLP_INSTANCE_TREE (instance));
> +
> +  do
> +    {
> +      slp_tree node = worklist.pop ();
> +
> +      if (SLP_TREE_DEF_TYPE (node) == vect_external_def)
> +       {
> +         for (tree op : SLP_TREE_SCALAR_OPS (node))
> +           if (TREE_CODE (op) == SSA_NAME)
> +             scalar_use_map.put (op, 1);
> +       }
> +      else
> +       {
> +         for (slp_tree child : SLP_TREE_CHILDREN (node))
> +           if (child && !visited.add (child))
> +             worklist.safe_push (child);
> +       }
> +    } while (!worklist.is_empty ());
> +
> +  visited.empty ();
> +
> +  for (slp_instance instance : bb_vinfo->slp_instances)
> +    {
> +      vect_location = instance->location ();
> +      vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
> +                                  instance, &instance->cost_vec,
> +                                  scalar_use_map, svisited, visited);
> +    }
>  }
>
>  /* Determine whether we can vectorize the reduction epilogue for INSTANCE.  
> */
> @@ -6684,17 +6823,7 @@ vect_slp_analyze_operations (vec_info *vinfo)
>
>    /* Compute vectorizable live stmts.  */
>    if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
> -    {
> -      hash_set<stmt_vec_info> svisited;
> -      hash_set<slp_tree> visited;
> -      for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
> -       {
> -         vect_location = instance->location ();
> -         vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE (instance),
> -                                      instance, &instance->cost_vec, 
> svisited,
> -                                      visited);
> -       }
> -    }
> +    vect_bb_slp_mark_live_stmts (bb_vinfo);
>
>    return !vinfo->slp_instances.is_empty ();
>  }
> --
> 2.17.1
>
> ________________________________________
> From: Richard Biener <richard.guent...@gmail.com>
> Sent: Thursday, January 11, 2024 5:52 PM
> To: Feng Xue OS; Richard Sandiford
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] Do not count unused scalar use when marking 
> STMT_VINFO_LIVE_P [PR113091]
>
> On Thu, Jan 11, 2024 at 10:46 AM Richard Biener
> <richard.guent...@gmail.com> wrote:
> >
> > On Fri, Dec 29, 2023 at 11:29 AM Feng Xue OS
> > <f...@os.amperecomputing.com> wrote:
> > >
> > > This patch is meant to fix over-estimation about SLP vector-to-scalar 
> > > cost for
> > > STMT_VINFO_LIVE_P statement. When pattern recognition is involved, a
> > > statement whose definition is consumed in some pattern, may not be
> > > included in the final replacement pattern statements, and would be skipped
> > > when building SLP graph.
> > >
> > >      * Original
> > >       char a_c = *(char *) a;
> > >       char b_c = *(char *) b;
> > >       unsigned short a_s = (unsigned short) a_c;
> > >       int a_i = (int) a_s;
> > >       int b_i = (int) b_c;
> > >       int r_i = a_i - b_i;
> > >
> > >      * After pattern replacement
> > >       a_s = (unsigned short) a_c;
> > >       a_i = (int) a_s;
> > >
> > >       patt_b_s = (unsigned short) b_c;    // b_i = (int) b_c
> > >       patt_b_i = (int) patt_b_s;          // b_i = (int) b_c
> > >
> > >       patt_r_s = widen_minus(a_c, b_c);   // r_i = a_i - b_i
> > >       patt_r_i = (int) patt_r_s;          // r_i = a_i - b_i
> > >
> > > The definitions of a_i(original statement) and b_i(pattern statement)
> > > are related to, but actually not part of widen_minus pattern.
> > > Vectorizing the pattern does not cause these definition statements to
> > > be marked as PURE_SLP.  For this case, we need to recursively check
> > > whether their uses are all absorbed into vectorized code.  But there
> > > is an exception that some use may participate in an vectorized
> > > operation via an external SLP node containing that use as an element.
> > >
> > > Feng
> > >
> > > ---
> > >  .../gcc.target/aarch64/bb-slp-pr113091.c      |  22 ++
> > >  gcc/tree-vect-slp.cc                          | 189 ++++++++++++++----
> > >  2 files changed, 172 insertions(+), 39 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
> > >
> > > diff --git a/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c 
> > > b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
> > > new file mode 100644
> > > index 00000000000..ff822e90b4a
> > > --- /dev/null
> > > +++ b/gcc/testsuite/gcc.target/aarch64/bb-slp-pr113091.c
> > > @@ -0,0 +1,22 @@
> > > +/* { dg-do compile } */
> > > +/* { dg-additional-options "-O3 -fdump-tree-slp-details 
> > > -ftree-slp-vectorize" } */
> > > +
> > > +int test(unsigned array[8]);
> > > +
> > > +int foo(char *a, char *b)
> > > +{
> > > +  unsigned array[8];
> > > +
> > > +  array[0] = (a[0] - b[0]);
> > > +  array[1] = (a[1] - b[1]);
> > > +  array[2] = (a[2] - b[2]);
> > > +  array[3] = (a[3] - b[3]);
> > > +  array[4] = (a[4] - b[4]);
> > > +  array[5] = (a[5] - b[5]);
> > > +  array[6] = (a[6] - b[6]);
> > > +  array[7] = (a[7] - b[7]);
> > > +
> > > +  return test(array);
> > > +}
> > > +
> > > +/* { dg-final { scan-tree-dump-times "Basic block will be vectorized 
> > > using SLP" 1 "slp2" } } */
> > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > > index a82fca45161..d36ff37114e 100644
> > > --- a/gcc/tree-vect-slp.cc
> > > +++ b/gcc/tree-vect-slp.cc
> > > @@ -6418,6 +6418,84 @@ vect_slp_analyze_node_operations (vec_info *vinfo, 
> > > slp_tree node,
> > >    return res;
> > >  }
> > >
> > > +/* Given a definition DEF, analyze if it will have any live scalar use 
> > > after
> > > +   performing SLP vectorization whose information is represented by 
> > > BB_VINFO,
> > > +   and record result into hash map SCALAR_USE_MAP as cache for later fast
> > > +   check.  */
> > > +
> > > +static bool
> > > +vec_slp_has_scalar_use (bb_vec_info bb_vinfo, tree def,
> > > +                       hash_map<tree, bool> &scalar_use_map)
> > > +{
> > > +  imm_use_iterator use_iter;
> > > +  gimple *use_stmt;
> > > +
> > > +  if (bool *res = scalar_use_map.get (def))
> > > +    return *res;
> > > +
> > > +  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
> > > +    {
> > > +      if (is_gimple_debug (use_stmt))
> > > +       continue;
> > > +
> > > +      stmt_vec_info use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
> > > +
> > > +      if (!use_stmt_info)
> > > +       break;
> > > +
> > > +      if (PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
> > > +       continue;
> > > +
> > > +      /* Do not step forward when encounter PHI statement, since it may
> > > +        involve cyclic reference and cause infinite recursive 
> > > invocation.  */
> > > +      if (gimple_code (use_stmt) == GIMPLE_PHI)
> > > +       break;
> > > +
> > > +      /* When pattern recognition is involved, a statement whose 
> > > definition is
> > > +        consumed in some pattern, may not be included in the final 
> > > replacement
> > > +        pattern statements, so would be skipped when building SLP graph.
> > > +
> > > +        * Original
> > > +         char a_c = *(char *) a;
> > > +         char b_c = *(char *) b;
> > > +         unsigned short a_s = (unsigned short) a_c;
> > > +         int a_i = (int) a_s;
> > > +         int b_i = (int) b_c;
> > > +         int r_i = a_i - b_i;
> > > +
> > > +        * After pattern replacement
> > > +         a_s = (unsigned short) a_c;
> > > +         a_i = (int) a_s;
> > > +
> > > +         patt_b_s = (unsigned short) b_c;    // b_i = (int) b_c
> > > +         patt_b_i = (int) patt_b_s;          // b_i = (int) b_c
> > > +
> > > +         patt_r_s = widen_minus(a_c, b_c);   // r_i = a_i - b_i
> > > +         patt_r_i = (int) patt_r_s;          // r_i = a_i - b_i
> > > +
> > > +        The definitions of a_i(original statement) and b_i(pattern 
> > > statement)
> > > +        are related to, but actually not part of widen_minus pattern.
> > > +        Vectorizing the pattern does not cause these definition 
> > > statements to
> > > +        be marked as PURE_SLP.  For this case, we need to recursively 
> > > check
> > > +        whether their uses are all absorbed into vectorized code.  But 
> > > there
> > > +        is an exception that some use may participate in an vectorized
> > > +        operation via an external SLP node containing that use as an 
> > > element.
> > > +        The parameter "scalar_use_map" tags such kind of SSA as having 
> > > scalar
> > > +        use in advance.  */
> > > +      tree lhs = gimple_get_lhs (use_stmt);
> > > +
> > > +      if (!lhs || TREE_CODE (lhs) != SSA_NAME
> > > +         || vec_slp_has_scalar_use (bb_vinfo, lhs, scalar_use_map))
> >
> > This looks mostly good, the only worry I have is that this recursion
> > will - for unvectorized uses - eventually go all the way down to the end
> > of the vectorized region (aka the whole function).  Since it's really
> > patterns we are after it should be possible to limit the recursion to
> > use_stmt which are covered by a pattern, aka STMT_VINFO_IN_PATTERN_P?
>
> Hmm, in_pattern_p doesn't seem to be set for original stmts replaced
> by a pattern.
> In fact we don't seem to mark those in any way?  It should be possible to
> programmatically do this looking at the root stmt uses maybe.  Maybe we can
> instead simply limit recursion depth to one and only increase when we get
> testcases requiring more?  We should eventually revisit how we mark patterns
> in next stage1.
>
> > PHIs should be then magically covered since we have no patterns
> > for them (though explicitly handling is good for documentation purposes).
> >
> > Thanks,
> > Richard.
> >
> > > +       break;
> > > +    }
> > > +
> > > +  bool found = !end_imm_use_stmt_p (&use_iter);
> > > +  bool added = scalar_use_map.put (def, found);
> > > +
> > > +  gcc_assert (!added);
> > > +  return found;
> > > +}
> > > +
> > >  /* Mark lanes of NODE that are live outside of the basic-block vectorized
> > >     region and that can be vectorized using vectorizable_live_operation
> > >     with STMT_VINFO_LIVE_P.  Not handled live operations will cause the
> > > @@ -6427,6 +6505,7 @@ static void
> > >  vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, slp_tree node,
> > >                              slp_instance instance,
> > >                              stmt_vector_for_cost *cost_vec,
> > > +                            hash_map<tree, bool> &scalar_use_map,
> > >                              hash_set<stmt_vec_info> &svisited,
> > >                              hash_set<slp_tree> &visited)
> > >  {
> > > @@ -6451,32 +6530,22 @@ vect_bb_slp_mark_live_stmts (bb_vec_info 
> > > bb_vinfo, slp_tree node,
> > >        def_operand_p def_p;
> > >        FOR_EACH_PHI_OR_STMT_DEF (def_p, orig_stmt, op_iter, SSA_OP_DEF)
> > >         {
> > > -         imm_use_iterator use_iter;
> > > -         gimple *use_stmt;
> > > -         stmt_vec_info use_stmt_info;
> > > -         FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
> > > -           if (!is_gimple_debug (use_stmt))
> > > -             {
> > > -               use_stmt_info = bb_vinfo->lookup_stmt (use_stmt);
> > > -               if (!use_stmt_info
> > > -                   || !PURE_SLP_STMT (vect_stmt_to_vectorize 
> > > (use_stmt_info)))
> > > -                 {
> > > -                   STMT_VINFO_LIVE_P (stmt_info) = true;
> > > -                   if (vectorizable_live_operation (bb_vinfo, stmt_info,
> > > -                                                    node, instance, i,
> > > -                                                    false, cost_vec))
> > > -                     /* ???  So we know we can vectorize the live stmt
> > > -                        from one SLP node.  If we cannot do so from all
> > > -                        or none consistently we'd have to record which
> > > -                        SLP node (and lane) we want to use for the live
> > > -                        operation.  So make sure we can code-generate
> > > -                        from all nodes.  */
> > > -                     mark_visited = false;
> > > -                   else
> > > -                     STMT_VINFO_LIVE_P (stmt_info) = false;
> > > -                   break;
> > > -                 }
> > > -             }
> > > +         if (vec_slp_has_scalar_use (bb_vinfo, DEF_FROM_PTR (def_p),
> > > +                                     scalar_use_map))
> > > +           {
> > > +             STMT_VINFO_LIVE_P (stmt_info) = true;
> > > +             if (vectorizable_live_operation (bb_vinfo, stmt_info, node,
> > > +                                              instance, i, false, 
> > > cost_vec))
> > > +               /* ???  So we know we can vectorize the live stmt from 
> > > one SLP
> > > +                  node.  If we cannot do so from all or none consistently
> > > +                  we'd have to record which SLP node (and lane) we want 
> > > to
> > > +                  use for the live operation.  So make sure we can
> > > +                  code-generate from all nodes.  */
> > > +               mark_visited = false;
> > > +             else
> > > +               STMT_VINFO_LIVE_P (stmt_info) = false;
> > > +           }
> > > +
> > >           /* We have to verify whether we can insert the lane extract
> > >              before all uses.  The following is a conservative 
> > > approximation.
> > >              We cannot put this into vectorizable_live_operation because
> > > @@ -6495,6 +6564,10 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
> > > slp_tree node,
> > >              from the latest stmt in a node.  So we compensate for this
> > >              during code-generation, simply not replacing uses for those
> > >              hopefully rare cases.  */
> > > +         imm_use_iterator use_iter;
> > > +         gimple *use_stmt;
> > > +         stmt_vec_info use_stmt_info;
> > > +
> > >           if (STMT_VINFO_LIVE_P (stmt_info))
> > >             FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR 
> > > (def_p))
> > >               if (!is_gimple_debug (use_stmt)
> > > @@ -6517,8 +6590,56 @@ vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo, 
> > > slp_tree node,
> > >    slp_tree child;
> > >    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > >      if (child && SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> > > -      vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance,
> > > -                                  cost_vec, svisited, visited);
> > > +      vect_bb_slp_mark_live_stmts (bb_vinfo, child, instance, cost_vec,
> > > +                                  scalar_use_map, svisited, visited);
> > > +}
> > > +
> > > +/* Traverse all slp instances of BB_VINFO, and mark lanes of every node 
> > > that
> > > +   are live outside of the basic-block vectorized region and that can be
> > > +   vectorized using vectorizable_live_operation with STMT_VINFO_LIVE_P.  
> > > */
> > > +
> > > +static void
> > > +vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo)
> > > +{
> > > +  if (bb_vinfo->slp_instances.is_empty ())
> > > +    return;
> > > +
> > > +  hash_set<stmt_vec_info> svisited;
> > > +  hash_set<slp_tree> visited;
> > > +  hash_map<tree, bool> scalar_use_map;
> > > +  auto_vec<slp_tree> worklist;
> > > +
> > > +  for (slp_instance instance : bb_vinfo->slp_instances)
> > > +    if (!visited.add (SLP_INSTANCE_TREE (instance)))
> > > +      worklist.safe_push (SLP_INSTANCE_TREE (instance));
> > > +
> > > +  do
> > > +    {
> > > +      slp_tree node = worklist.pop ();
> > > +
> > > +      if (SLP_TREE_DEF_TYPE (node) == vect_external_def)
> > > +       {
> > > +         for (tree op : SLP_TREE_SCALAR_OPS (node))
> > > +           if (TREE_CODE (op) == SSA_NAME)
> > > +             scalar_use_map.put (op, true);
> > > +       }
> > > +      else
> > > +       {
> > > +         for (slp_tree child : SLP_TREE_CHILDREN (node))
> > > +           if (child && !visited.add (child))
> > > +             worklist.safe_push (child);
> > > +       }
> > > +    } while (!worklist.is_empty ());
> > > +
> > > +  visited.empty ();
> > > +
> > > +  for (slp_instance instance : bb_vinfo->slp_instances)
> > > +    {
> > > +      vect_location = instance->location ();
> > > +      vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE 
> > > (instance),
> > > +                                  instance, &instance->cost_vec,
> > > +                                  scalar_use_map, svisited, visited);
> > > +    }
> > >  }
> > >
> > >  /* Determine whether we can vectorize the reduction epilogue for 
> > > INSTANCE.  */
> > > @@ -6684,17 +6805,7 @@ vect_slp_analyze_operations (vec_info *vinfo)
> > >
> > >    /* Compute vectorizable live stmts.  */
> > >    if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo))
> > > -    {
> > > -      hash_set<stmt_vec_info> svisited;
> > > -      hash_set<slp_tree> visited;
> > > -      for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i)
> > > -       {
> > > -         vect_location = instance->location ();
> > > -         vect_bb_slp_mark_live_stmts (bb_vinfo, SLP_INSTANCE_TREE 
> > > (instance),
> > > -                                      instance, &instance->cost_vec, 
> > > svisited,
> > > -                                      visited);
> > > -       }
> > > -    }
> > > +    vect_bb_slp_mark_live_stmts (bb_vinfo);
> > >
> > >    return !vinfo->slp_instances.is_empty ();
> > >  }
> > > --
> > > 2.17.1

Reply via email to