The following fixes the computation of supports_partial_vectors which
is used to prune the set of modes to iterate over for epilog
vectorization.  The used partial_vectors_supported_p predicate
only looks for while_ult while also support predication when
mask modes are integer modes as for AVX512.

I've noticed this isn't very effective on x86_64 anyway since
if the main loop mode is autodetected we skip re-analyzing
mode_i == 0, but then mode_i == 1 is usually the very same
large mode.  This is fixed by the next patch.

The following simplifies the logic by simply re-using the
already computed LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P from
the main loop to decide whether we can possibly use partial
vectors for the epilogue (for the case of having the same VF).
We remember the main loop analysis before a suggested unroll
factor is applied to avoid possible differences from that.

Bootstrap and regtest ongoing on x86_64-unknown-linux-gnu.

        * tree-vect-loop.cc (vect_analyze_loop_1): New parameter
        to output whether the not unrolled loop can use partial
        vectors.
        (vect_analyze_loop): Use the main loop partial vector
        analysis result to decide if epilogues with the same VF
        can use partial vectors.
---
 gcc/tree-vect-loop.cc | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index c824b5abaaf..fa022dfad42 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3474,7 +3474,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
                     loop_vec_info orig_loop_vinfo,
                     const vector_modes &vector_modes, unsigned &mode_i,
                     machine_mode &autodetected_vector_mode,
-                    bool &fatal)
+                    bool &fatal,
+                    bool &loop_as_epilogue_supports_partial_vectors)
 {
   loop_vec_info loop_vinfo
     = vect_create_loop_vinfo (loop, shared, loop_form_info, orig_loop_vinfo);
@@ -3488,6 +3489,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared 
*shared,
   opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
                                        &suggested_unroll_factor,
                                        slp_done_for_suggested_uf);
+  loop_as_epilogue_supports_partial_vectors
+    = LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo);
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                     "***** Analysis %s with vector mode %s\n",
@@ -3633,6 +3636,8 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
   for (unsigned i = 0; i < vector_modes.length (); ++i)
     cached_vf_per_mode.safe_push (0);
 
+  bool supports_partial_vectors = false;
+
   /* First determine the main loop vectorization mode, either the first
      one that works, starting with auto-detecting the vector mode and then
      following the targets order of preference, or the one with the
@@ -3644,10 +3649,12 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
       /* Set cached VF to -1 prior to analysis, which indicates a mode has
         failed.  */
       cached_vf_per_mode[last_mode_i] = -1;
+      bool loop_as_epilogue_supports_partial_vectors;
       opt_loop_vec_info loop_vinfo
        = vect_analyze_loop_1 (loop, shared, &loop_form_info,
                               NULL, vector_modes, mode_i,
-                              autodetected_vector_mode, fatal);
+                              autodetected_vector_mode, fatal,
+                              loop_as_epilogue_supports_partial_vectors);
       if (fatal)
        break;
 
@@ -3677,7 +3684,11 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
              first_loop_vinfo = opt_loop_vec_info::success (NULL);
            }
          if (first_loop_vinfo == NULL)
-           first_loop_vinfo = loop_vinfo;
+           {
+             first_loop_vinfo = loop_vinfo;
+             supports_partial_vectors
+               = loop_as_epilogue_supports_partial_vectors;
+           }
          else
            {
              delete loop_vinfo;
@@ -3742,8 +3753,7 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
     vector_modes[0] = autodetected_vector_mode;
   mode_i = 0;
 
-  bool supports_partial_vectors =
-    partial_vectors_supported_p () && param_vect_partial_vector_usage != 0;
+  supports_partial_vectors &= param_vect_partial_vector_usage != 0;
   poly_uint64 first_vinfo_vf = LOOP_VINFO_VECT_FACTOR (first_loop_vinfo);
 
   loop_vec_info orig_loop_vinfo = first_loop_vinfo;
@@ -3769,12 +3779,13 @@ vect_analyze_loop (class loop *loop, gimple 
*loop_vectorized_call,
                             "***** Re-trying epilogue analysis with vector "
                             "mode %s\n", GET_MODE_NAME (vector_modes[mode_i]));
 
-         bool fatal;
+         bool fatal, loop_as_epilogue_supports_partial_vectors;
          opt_loop_vec_info loop_vinfo
            = vect_analyze_loop_1 (loop, shared, &loop_form_info,
                                   orig_loop_vinfo,
                                   vector_modes, mode_i,
-                                  autodetected_vector_mode, fatal);
+                                  autodetected_vector_mode, fatal,
+                                  loop_as_epilogue_supports_partial_vectors);
          if (fatal)
            break;
 
-- 
2.43.0

Reply via email to