Richard Biener <rguent...@suse.de> writes:
> As discussed this splits the analysis loop into two, first settling
> on a vector mode used for the main loop and only then analyzing
> the epilogue of that for possible vectorization.  That makes it
> easier to put in support for unrolled main loops.
>
> On the way I've realized some cleanup opportunities, namely caching
> n_stmts in vec_info_shared (it's computed by dataref analysis)
> avoiding to pass that around and setting/clearing loop->aux
> during analysis - try_vectorize_loop_1 will ultimatively set it
> on those we vectorize.
>
> This also gets rid of the previously introduced callback in
> vect_analyze_loop_1 in favor of making that advance the mode iterator.
> I'm now pushing VOIDmode explicitely into the vector_modes array
> which makes the re-start on the epilogue side a bit more
> straight-forward.  Note that will now use auto-detection of the
> vector mode in case the main loop used it and we want to try
> LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P and the first mode from
> the target array if not.  I've added a comment that says we may
> want to make sure we don't try vectorizing the epilogue with a
> bigger vector size than the main loop but the situation isn't
> very likely to appear in practice I guess (and it was also present
> before this change).
>
> In principle this change should not change vectorization decisions
> but the way we handled re-analyzing epilogues as main loops makes
> me only 99% sure that it does.
>
> Bootstrapped and tested on x86_64-unkown-linux-gnu.

Comments inline.

>
> OK?
>
> Thanks,
> Richard.
>
> 2021-11-05  Richard Biener  <rguent...@suse.de>
>
>       * tree-vectorizer.h (vec_info_shared::n_stmts): Add.
>       (LOOP_VINFO_N_STMTS): Likewise.
>       (vec_info_for_bb): Remove unused function.
>       * tree-vectorizer.c (vec_info_shared::vec_info_shared):
>       Initialize n_stmts member.
>       * tree-vect-loop.c: Remove INCLUDE_FUNCTIONAL.
>       (vect_create_loop_vinfo): Do not set loop->aux.
>       (vect_analyze_loop_2): Do not get n_stmts as argument,
>       instead use LOOP_VINFO_N_STMTS.  Set LOOP_VINFO_VECTORIZABLE_P
>       here.
>       (vect_analyze_loop_1): Remove callback, get the mode iterator
>       and autodetected_vector_mode as argument, advancing the
>       iterator and initializing autodetected_vector_mode here.
>       (vect_analyze_loop): Split analysis loop into two, first
>       processing main loops only and then epilogues.
> ---
>  gcc/tree-vect-loop.c  | 415 +++++++++++++++++++++---------------------
>  gcc/tree-vectorizer.c |   3 +-
>  gcc/tree-vectorizer.h |  10 +-
>  3 files changed, 212 insertions(+), 216 deletions(-)
>
> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
> index 13a53436729..abf87f99d6d 100644
> --- a/gcc/tree-vect-loop.c
> +++ b/gcc/tree-vect-loop.c
> @@ -20,7 +20,6 @@ along with GCC; see the file COPYING3.  If not see
>  <http://www.gnu.org/licenses/>.  */
>  
>  #define INCLUDE_ALGORITHM
> -#define INCLUDE_FUNCTIONAL
>  #include "config.h"
>  #include "system.h"
>  #include "coretypes.h"
> @@ -1520,8 +1519,6 @@ vect_create_loop_vinfo (class loop *loop, 
> vec_info_shared *shared,
>         = wi::smin (nit, param_vect_inner_loop_cost_factor).to_uhwi ();
>      }
>  
> -  gcc_assert (!loop->aux);
> -  loop->aux = loop_vinfo;
>    return loop_vinfo;
>  }
>  
> @@ -2209,7 +2206,7 @@ vect_determine_partial_vectors_and_peeling 
> (loop_vec_info loop_vinfo,
>     for it.  The different analyses will record information in the
>     loop_vec_info struct.  */
>  static opt_result
> -vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned 
> *n_stmts)
> +vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
>  {
>    opt_result ok = opt_result::success ();
>    int res;
> @@ -2244,7 +2241,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> &fatal, unsigned *n_stmts)
>        opt_result res
>       = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
>                                    &LOOP_VINFO_DATAREFS (loop_vinfo),
> -                                  n_stmts);
> +                                  &LOOP_VINFO_N_STMTS (loop_vinfo));
>        if (!res)
>       {
>         if (dump_enabled_p ())
> @@ -2341,7 +2338,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool 
> &fatal, unsigned *n_stmts)
>    poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR 
> (loop_vinfo);
>  
>    /* Check the SLP opportunities in the loop, analyze and build SLP trees.  
> */
> -  ok = vect_analyze_slp (loop_vinfo, *n_stmts);
> +  ok = vect_analyze_slp (loop_vinfo, LOOP_VINFO_N_STMTS (loop_vinfo));
>    if (!ok)
>      return ok;
>  
> @@ -2641,6 +2638,7 @@ start_over:
>                       LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
>  
>    /* Ok to vectorize!  */
> +  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
>    return opt_result::success ();
>  
>  again:
> @@ -2891,46 +2889,70 @@ vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
>    return true;
>  }
>  
> -/* Analyze LOOP with VECTOR_MODE and as epilogue if MAIN_LOOP_VINFO is
> -   not NULL.  Process the analyzed loop with PROCESS even if analysis
> -   failed.  Sets *N_STMTS and FATAL according to the analysis.
> +/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO 
> is
> +   not NULL.  Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
> +   MODE_I to the next mode useful to analyze.
>     Return the loop_vinfo on success and wrapped null on failure.  */
>  
>  static opt_loop_vec_info
>  vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
>                    const vect_loop_form_info *loop_form_info,
> -                  machine_mode vector_mode, loop_vec_info main_loop_vinfo,
> -                  unsigned int *n_stmts, bool &fatal,
> -                  std::function<void(loop_vec_info)> process = nullptr)
> +                  loop_vec_info main_loop_vinfo,
> +                  const vector_modes &vector_modes, unsigned &mode_i,
> +                  machine_mode &autodetected_vector_mode,
> +                  bool &fatal)
>  {
>    loop_vec_info loop_vinfo
>      = vect_create_loop_vinfo (loop, shared, loop_form_info);
> -  loop_vinfo->vector_mode = vector_mode;
> -
>    if (main_loop_vinfo)
>      LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_vinfo;
>  
> +  machine_mode vector_mode = vector_modes[mode_i];
> +  loop_vinfo->vector_mode = vector_mode;
> +
>    /* Run the main analysis.  */
> -  fatal = false;
> -  opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, n_stmts);
> -  loop->aux = NULL;
> +  opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal);
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "***** Analysis %s with vector mode %s\n",
> +                  res ? "succeeded" : " failed",
> +                  GET_MODE_NAME (loop_vinfo->vector_mode));
>  
> -  /* Process info before we destroy loop_vinfo upon analysis failure
> -     when there was no fatal failure.  */
> -  if (!fatal && process)
> -    process (loop_vinfo);
> +  /* Remember the autodetected vector mode.  */
> +  if (vector_mode == VOIDmode)
> +    autodetected_vector_mode = loop_vinfo->vector_mode;
>  
> -  if (dump_enabled_p ())
> +  /* Advance mode_i, first skipping modes that would result in the
> +     same analysis result.  */
> +  while (mode_i + 1 < vector_modes.length ()
> +      && vect_chooses_same_modes_p (loop_vinfo,
> +                                    vector_modes[mode_i + 1]))
>      {
> -      if (res)
> +      if (dump_enabled_p ())
>       dump_printf_loc (MSG_NOTE, vect_location,
> -                      "***** Analysis succeeded with vector mode %s\n",
> -                      GET_MODE_NAME (loop_vinfo->vector_mode));
> -      else
> +                      "***** The result for vector mode %s would"
> +                      " be the same\n",
> +                      GET_MODE_NAME (vector_modes[mode_i + 1]));
> +      mode_i += 1;
> +    }
> +  if (mode_i + 1 < vector_modes.length ()
> +      && VECTOR_MODE_P (autodetected_vector_mode)
> +      && (related_vector_mode (vector_modes[mode_i + 1],
> +                            GET_MODE_INNER (autodetected_vector_mode))
> +       == autodetected_vector_mode)
> +      && (related_vector_mode (autodetected_vector_mode,
> +                            GET_MODE_INNER (vector_modes[mode_i + 1]))
> +       == vector_modes[mode_i + 1]))
> +    {
> +      if (dump_enabled_p ())
>       dump_printf_loc (MSG_NOTE, vect_location,
> -                      "***** Analysis failed with vector mode %s\n",
> -                      GET_MODE_NAME (loop_vinfo->vector_mode));
> +                      "***** Skipping vector mode %s, which would"
> +                      " repeat the analysis for %s\n",
> +                      GET_MODE_NAME (vector_modes[mode_i + 1]),
> +                      GET_MODE_NAME (autodetected_vector_mode));
> +      mode_i += 1;
>      }
> +  mode_i++;
>  
>    if (!res)
>      {
> @@ -2940,7 +2962,6 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
> *shared,
>        return opt_loop_vec_info::propagate_failure (res);
>      }
>  
> -  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
>    return opt_loop_vec_info::success (loop_vinfo);
>  }
>  
> @@ -2952,14 +2973,6 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
> *shared,
>  opt_loop_vec_info
>  vect_analyze_loop (class loop *loop, vec_info_shared *shared)
>  {
> -  auto_vector_modes vector_modes;
> -
> -  /* Autodetect first vector size we try.  */
> -  unsigned int autovec_flags
> -    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
> -                                                 loop->simdlen != 0);
> -  unsigned int mode_i = 0;
> -
>    DUMP_VECT_SCOPE ("analyze_loop_nest");
>  
>    if (loop_outer (loop)
> @@ -2985,70 +2998,59 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
> *shared)
>        return opt_loop_vec_info::propagate_failure (res);
>      }
>  
> -  unsigned n_stmts = 0;
> -  machine_mode autodetected_vector_mode = VOIDmode;
> -  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -  machine_mode next_vector_mode = VOIDmode;
> -  poly_uint64 lowest_th = 0;
> -  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
> -                          && !unlimited_cost_model (loop));
> +  /* When pick_lowest_cost_p is true, we should in principle iterate
> +     over all the loop_vec_infos that LOOP_VINFO could replace and
> +     try to vectorize LOOP_VINFO under the same conditions.
> +     E.g. when trying to replace an epilogue loop, we should vectorize
> +     LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
> +     to replace the main loop, we should vectorize LOOP_VINFO as a main
> +     loop too.
>  
> -  bool vect_epilogues = false;
> -  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
> -  while (1)
> -    {
> -      /* When pick_lowest_cost_p is true, we should in principle iterate
> -      over all the loop_vec_infos that LOOP_VINFO could replace and
> -      try to vectorize LOOP_VINFO under the same conditions.
> -      E.g. when trying to replace an epilogue loop, we should vectorize
> -      LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
> -      to replace the main loop, we should vectorize LOOP_VINFO as a main
> -      loop too.
> +     However, autovectorize_vector_modes is usually sorted as follows:
>  
> -      However, autovectorize_vector_modes is usually sorted as follows:
> +     - Modes that naturally produce lower VFs usually follow modes that
> +     naturally produce higher VFs.
>  
> -      - Modes that naturally produce lower VFs usually follow modes that
> -        naturally produce higher VFs.
> +     - When modes naturally produce the same VF, maskable modes
> +     usually follow unmaskable ones, so that the maskable mode
> +     can be used to vectorize the epilogue of the unmaskable mode.
>  
> -      - When modes naturally produce the same VF, maskable modes
> -        usually follow unmaskable ones, so that the maskable mode
> -        can be used to vectorize the epilogue of the unmaskable mode.
> +     This order is preferred because it leads to the maximum
> +     epilogue vectorization opportunities.  Targets should only use
> +     a different order if they want to make wide modes available while
> +     disparaging them relative to earlier, smaller modes.  The assumption
> +     in that case is that the wider modes are more expensive in some
> +     way that isn't reflected directly in the costs.
>  
> -      This order is preferred because it leads to the maximum
> -      epilogue vectorization opportunities.  Targets should only use
> -      a different order if they want to make wide modes available while
> -      disparaging them relative to earlier, smaller modes.  The assumption
> -      in that case is that the wider modes are more expensive in some
> -      way that isn't reflected directly in the costs.
> +     There should therefore be few interesting cases in which
> +     LOOP_VINFO fails when treated as an epilogue loop, succeeds when
> +     treated as a standalone loop, and ends up being genuinely cheaper
> +     than FIRST_LOOP_VINFO.  */

I think the patch obsoletes this big comment, which was trying to explain
why we *didn't* try to vectorise as a main loop separately from an
epilogue loop.  (It was already on shaky ground, as previously discussed.)

>  
> -      There should therefore be few interesting cases in which
> -      LOOP_VINFO fails when treated as an epilogue loop, succeeds when
> -      treated as a standalone loop, and ends up being genuinely cheaper
> -      than FIRST_LOOP_VINFO.  */
> +  auto_vector_modes vector_modes;
> +  /* Autodetect first vector size we try.  */
> +  vector_modes.safe_push (VOIDmode);
> +  unsigned int autovec_flags
> +    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
> +                                                 loop->simdlen != 0);
> +  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
> +                          && !unlimited_cost_model (loop));
> +  machine_mode autodetected_vector_mode = VOIDmode;
> +  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
> +  unsigned int mode_i = 0;
> +  unsigned int first_loop_i = 0;
> +  unsigned int first_loop_next_i = 0;
> +  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
>  
> +  /* First determine the main loop vectorization mode.  */
> +  while (1)
> +    {
> +      unsigned int loop_vinfo_i = mode_i;
>        bool fatal;
> -      auto cb = [&] (loop_vec_info loop_vinfo)
> -     {
> -       if (mode_i == 0)
> -         autodetected_vector_mode = loop_vinfo->vector_mode;
> -       while (mode_i < vector_modes.length ()
> -              && vect_chooses_same_modes_p (loop_vinfo,
> -                                            vector_modes[mode_i]))
> -         {
> -           if (dump_enabled_p ())
> -             dump_printf_loc (MSG_NOTE, vect_location,
> -                              "***** The result for vector mode %s would"
> -                              " be the same\n",
> -                              GET_MODE_NAME (vector_modes[mode_i]));
> -           mode_i += 1;
> -         }
> -     };
>        opt_loop_vec_info loop_vinfo
>       = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> -                            next_vector_mode,
> -                            vect_epilogues
> -                            ? (loop_vec_info)first_loop_vinfo : NULL,
> -                            &n_stmts, fatal, cb);
> +                            NULL, vector_modes, mode_i,
> +                            autodetected_vector_mode, fatal);
>        if (fatal)
>       break;
>  
> @@ -3061,10 +3063,107 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
> *shared)
>           {
>             delete first_loop_vinfo;
>             first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -           LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
>             simdlen = 0;
>           }
>         else if (pick_lowest_cost_p && first_loop_vinfo)
> +         {
> +           /* Keep trying to roll back vectorization attempts while the
> +              loop_vec_infos they produced were worse than this one.  */
> +           if (vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
> +             {
> +               delete first_loop_vinfo;
> +               first_loop_vinfo = opt_loop_vec_info::success (NULL);
> +             }

The comment no longer really describes the code here.  We're just
making a straight comparison between two main loop vinfos (which is
a good thing).  Think it might be easier to follow if the joust condition
was part of the “else if”, so that it's more obviously a sibling of the
simdlen comparison.

> +         }
> +       if (first_loop_vinfo == NULL)
> +         {
> +           first_loop_vinfo = loop_vinfo;
> +           first_loop_i = loop_vinfo_i;
> +           first_loop_next_i = mode_i;
> +         }
> +       else
> +         {
> +           delete loop_vinfo;
> +           loop_vinfo = opt_loop_vec_info::success (NULL);
> +         }
> +
> +       /* Commit to first_loop_vinfo if we have no reason to try
> +          alternatives.  */
> +       if (!simdlen && !pick_lowest_cost_p)
> +         break;
> +     }
> +      if (mode_i == vector_modes.length ()
> +       || autodetected_vector_mode == VOIDmode)
> +     break;
> +
> +      /* Try the next biggest vector size.  */
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_NOTE, vect_location,
> +                      "***** Re-trying analysis with vector mode %s\n",
> +                      GET_MODE_NAME (vector_modes[mode_i]));
> +    }
> +  if (!first_loop_vinfo)
> +    return opt_loop_vec_info::propagate_failure (res);
> +
> +  if (dump_enabled_p ())
> +    dump_printf_loc (MSG_NOTE, vect_location,
> +                  "***** Choosing vector mode %s\n",
> +                  GET_MODE_NAME (first_loop_vinfo->vector_mode));
> +
> +  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> +     enabled, SIMDUID is not set, it is the innermost loop and we have
> +     either already found the loop's SIMDLEN or there was no SIMDLEN to
> +     begin with.
> +     TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
> +  bool vect_epilogues = (!simdlen
> +                      && loop->inner == NULL
> +                      && param_vect_epilogues_nomask
> +                      && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
> +                      && !loop->simduid);
> +  if (!vect_epilogues)
> +    return first_loop_vinfo;
> +
> +  /* Now analyze first_loop_vinfo for epilogue vectorization.  */
> +  poly_uint64 lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
> +
> +  /* Handle the case that the original loop can use partial
> +     vectorization, but want to only adopt it for the epilogue.
> +     The retry should be in the same mode as original.  */
> +  if (LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (first_loop_vinfo))
> +    {
> +      gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (first_loop_vinfo)
> +               && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (first_loop_vinfo));
> +      if (dump_enabled_p ())
> +     dump_printf_loc (MSG_NOTE, vect_location,
> +                      "***** Re-trying analysis with same vector mode"
> +                      " %s for epilogue with partial vectors.\n",
> +                      GET_MODE_NAME (first_loop_vinfo->vector_mode));
> +      mode_i = first_loop_i;
> +    }
> +  else
> +    {
> +      mode_i = first_loop_next_i;
> +      if (mode_i == vector_modes.length ())
> +     return first_loop_vinfo;
> +    }

It's an interesting question whether we should continue doing this,
or whether we should consider all epilogue alternatives even for
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P.  Perhaps this reorg makes
LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P redundant.

Obviously doesn't affect this patch though.

Looks great to me otherwise FWIW.

Thanks,
Richard

> +
> +  /* ???  If first_loop_vinfo was using VOIDmode then we probably
> +     want to instead search for the corresponding mode in vector_modes[].  */
> +
> +  while (1)
> +    {
> +      bool fatal;
> +      opt_loop_vec_info loop_vinfo
> +     = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> +                            first_loop_vinfo,
> +                            vector_modes, mode_i,
> +                            autodetected_vector_mode, fatal);
> +      if (fatal)
> +     break;
> +
> +      if (loop_vinfo)
> +     {
> +       if (pick_lowest_cost_p)
>           {
>             /* Keep trying to roll back vectorization attempts while the
>                loop_vec_infos they produced were worse than this one.  */
> @@ -3075,59 +3174,9 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
> *shared)
>                 gcc_assert (vect_epilogues);
>                 delete vinfos.pop ();
>               }
> -           if (vinfos.is_empty ()
> -               && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
> -             {
> -               if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
> -                 {
> -                   delete first_loop_vinfo;
> -                   first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -                 }
> -               else
> -                 {
> -                   if (dump_enabled_p ())
> -                     dump_printf_loc (MSG_NOTE, vect_location,
> -                                      "***** Reanalyzing as a main loop "
> -                                      "with vector mode %s\n",
> -                                      GET_MODE_NAME
> -                                        (loop_vinfo->vector_mode));
> -                   opt_loop_vec_info main_loop_vinfo
> -                     = vect_analyze_loop_1 (loop, shared, &loop_form_info,
> -                                            loop_vinfo->vector_mode,
> -                                            NULL, &n_stmts, fatal);
> -                   if (main_loop_vinfo
> -                       && vect_joust_loop_vinfos (main_loop_vinfo,
> -                                                  first_loop_vinfo))
> -                     {
> -                       delete first_loop_vinfo;
> -                       first_loop_vinfo = opt_loop_vec_info::success (NULL);
> -                       delete loop_vinfo;
> -                       loop_vinfo
> -                         = opt_loop_vec_info::success (main_loop_vinfo);
> -                     }
> -                   else
> -                     {
> -                       if (dump_enabled_p ())
> -                         dump_printf_loc (MSG_NOTE, vect_location,
> -                                          "***** No longer preferring vector"
> -                                          " mode %s after reanalyzing the "
> -                                          " loop as a main loop\n",
> -                                          GET_MODE_NAME
> -                                            (loop_vinfo->vector_mode));
> -                       delete main_loop_vinfo;
> -                     }
> -                 }
> -             }
>           }
> -
> -       if (first_loop_vinfo == NULL)
> -         {
> -           first_loop_vinfo = loop_vinfo;
> -           lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
> -         }
> -       else if (vect_epilogues
> -                /* For now only allow one epilogue loop.  */
> -                && first_loop_vinfo->epilogue_vinfos.is_empty ())
> +       /* For now only allow one epilogue loop.  */
> +       if (first_loop_vinfo->epilogue_vinfos.is_empty ())
>           {
>             first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
>             poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
> @@ -3144,86 +3193,34 @@ vect_analyze_loop (class loop *loop, vec_info_shared 
> *shared)
>             loop_vinfo = opt_loop_vec_info::success (NULL);
>           }
>  
> -       /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
> -          enabled, SIMDUID is not set, it is the innermost loop and we have
> -          either already found the loop's SIMDLEN or there was no SIMDLEN to
> -          begin with.
> -          TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
> -       vect_epilogues = (!simdlen
> -                         && loop->inner == NULL
> -                         && param_vect_epilogues_nomask
> -                         && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
> -                         && !loop->simduid
> -                         /* For now only allow one epilogue loop, but allow
> -                            pick_lowest_cost_p to replace it.  */
> -                         && (first_loop_vinfo->epilogue_vinfos.is_empty ()
> -                             || pick_lowest_cost_p));
> -
> -       /* Commit to first_loop_vinfo if we have no reason to try
> -          alternatives.  */
> -       if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
> +       /* For now only allow one epilogue loop, but allow
> +          pick_lowest_cost_p to replace it, so commit to the
> +          first epilogue if we have no reason to try alternatives.  */
> +       if (!pick_lowest_cost_p)
>           break;
>       }
>  
> -      /* Handle the case that the original loop can use partial
> -      vectorization, but want to only adopt it for the epilogue.
> -      The retry should be in the same mode as original.  */
> -      if (vect_epilogues
> -       && loop_vinfo
> -       && LOOP_VINFO_EPIL_USING_PARTIAL_VECTORS_P (loop_vinfo))
> -     {
> -       gcc_assert (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> -                   && !LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
> -       if (dump_enabled_p ())
> -         dump_printf_loc (MSG_NOTE, vect_location,
> -                          "***** Re-trying analysis with same vector mode"
> -                          " %s for epilogue with partial vectors.\n",
> -                          GET_MODE_NAME (loop_vinfo->vector_mode));
> -       continue;
> -     }
> -
> -      if (mode_i < vector_modes.length ()
> -       && VECTOR_MODE_P (autodetected_vector_mode)
> -       && (related_vector_mode (vector_modes[mode_i],
> -                                GET_MODE_INNER (autodetected_vector_mode))
> -           == autodetected_vector_mode)
> -       && (related_vector_mode (autodetected_vector_mode,
> -                                GET_MODE_INNER (vector_modes[mode_i]))
> -           == vector_modes[mode_i]))
> -     {
> -       if (dump_enabled_p ())
> -         dump_printf_loc (MSG_NOTE, vect_location,
> -                          "***** Skipping vector mode %s, which would"
> -                          " repeat the analysis for %s\n",
> -                          GET_MODE_NAME (vector_modes[mode_i]),
> -                          GET_MODE_NAME (autodetected_vector_mode));
> -       mode_i += 1;
> -     }
> -
> -      if (mode_i == vector_modes.length ()
> -       || autodetected_vector_mode == VOIDmode)
> +      if (mode_i == vector_modes.length ())
>       break;
>  
>        /* Try the next biggest vector size.  */
> -      next_vector_mode = vector_modes[mode_i++];
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_NOTE, vect_location,
> -                      "***** Re-trying analysis with vector mode %s\n",
> -                      GET_MODE_NAME (next_vector_mode));
> +                      "***** Re-trying epilogue analysis with vector "
> +                      "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
>      }
>  
> -  if (first_loop_vinfo)
> +  if (!first_loop_vinfo->epilogue_vinfos.is_empty ())
>      {
> -      loop->aux = (loop_vec_info) first_loop_vinfo;
> +      LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
>        if (dump_enabled_p ())
>       dump_printf_loc (MSG_NOTE, vect_location,
> -                      "***** Choosing vector mode %s\n",
> -                      GET_MODE_NAME (first_loop_vinfo->vector_mode));
> -      LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
> -      return first_loop_vinfo;
> +                      "***** Choosing epilogue vector mode %s\n",
> +                      GET_MODE_NAME
> +                        (first_loop_vinfo->epilogue_vinfos[0]->vector_mode));
>      }
>  
> -  return opt_loop_vec_info::propagate_failure (res);
> +  return first_loop_vinfo;
>  }
>  
>  /* Return true if there is an in-order reduction function for CODE, storing
> diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
> index 4c9ab8124b5..a2e13acb6d2 100644
> --- a/gcc/tree-vectorizer.c
> +++ b/gcc/tree-vectorizer.c
> @@ -475,7 +475,8 @@ vec_info::~vec_info ()
>  }
>  
>  vec_info_shared::vec_info_shared ()
> -  : datarefs (vNULL),
> +  : n_stmts (0),
> +    datarefs (vNULL),
>      datarefs_copy (vNULL),
>      ddrs (vNULL)
>  {
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 3f069e71296..7d3d3935c95 100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -350,6 +350,9 @@ public:
>    void save_datarefs();
>    void check_datarefs();
>  
> +  /* The number of scalar stmts.  */
> +  unsigned n_stmts;
> +
>    /* All data references.  Freed by free_data_refs, so not an auto_vec.  */
>    vec<data_reference_p> datarefs;
>    vec<data_reference> datarefs_copy;
> @@ -822,6 +825,7 @@ public:
>  #define LOOP_VINFO_RGROUP_COMPARE_TYPE(L)  (L)->rgroup_compare_type
>  #define LOOP_VINFO_RGROUP_IV_TYPE(L)       (L)->rgroup_iv_type
>  #define LOOP_VINFO_PTR_MASK(L)             (L)->ptr_mask
> +#define LOOP_VINFO_N_STMTS(L)                   (L)->shared->n_stmts
>  #define LOOP_VINFO_LOOP_NEST(L)            (L)->shared->loop_nest
>  #define LOOP_VINFO_DATAREFS(L)             (L)->shared->datarefs
>  #define LOOP_VINFO_DDRS(L)                 (L)->shared->ddrs
> @@ -928,12 +932,6 @@ public:
>  #define BB_VINFO_DATAREFS(B)         (B)->shared->datarefs
>  #define BB_VINFO_DDRS(B)             (B)->shared->ddrs
>  
> -static inline bb_vec_info
> -vec_info_for_bb (basic_block bb)
> -{
> -  return (bb_vec_info) bb->aux;
> -}
> -
>  /*-----------------------------------------------------------------*/
>  /* Info on vectorized defs.                                        */
>  /*-----------------------------------------------------------------*/

Reply via email to