on 2020/6/11 上午12:58, Richard Sandiford wrote:
> "Kewen.Lin" <li...@linux.ibm.com> writes:
>> diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
>> index ca68d04a919..1fac5898525 100644
>> --- a/gcc/tree-vect-loop-manip.c
>> +++ b/gcc/tree-vect-loop-manip.c
>> @@ -420,8 +420,8 @@ vect_set_loop_controls_directly (class loop *loop, 
>> loop_vec_info loop_vinfo,
>>                               rgroup_controls *rgc, tree niters,
>>                               tree niters_skip, bool might_wrap_p)
>>  {
>> -  tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
>> -  tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
>> +  tree compare_type = LOOP_VINFO_COMPARE_TYPE (loop_vinfo);
>> +  tree iv_type = LOOP_VINFO_IV_TYPE (loop_vinfo);
> 
> How about s/MASK/RGROUP/ instead?  COMPARE_TYPE and IV_TYPE sound a bit
> too generic, and might give the impression that they're meaningful for
> classic full-vector vectorisation too.
> 
>> @@ -748,13 +748,12 @@ vect_set_loop_condition_masked (class loop *loop, 
>> loop_vec_info loop_vinfo,
>>  }
>>  
>>  /* Like vect_set_loop_condition, but handle the case in which there
>> -   are no loop masks.  */
>> +   are no partial vectorization loops.  */
> 
> Maybe:
> 
>   … in which the vector loop handles exactly VF scalars per iteration.
> 
>> diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
>> index 7ea75d6d095..b6e96f77f69 100644
>> --- a/gcc/tree-vect-loop.c
>> +++ b/gcc/tree-vect-loop.c
>> @@ -155,6 +155,7 @@ along with GCC; see the file COPYING3.  If not see
>>  static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int 
>> *);
>>  static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
>>                                             bool *, bool *);
>> +static bool known_niters_smaller_than_vf (loop_vec_info);
> 
> Please instead define the function before its first caller.
> Adding “vect_” to the beginning of the name would probably be
> more consistent.
> 
>> [...]
>> @@ -959,14 +960,41 @@ vect_get_max_nscalars_per_iter (loop_vec_info 
>> loop_vinfo)
>>    return res;
>>  }
>>  
>> +/* Calculate the minimal bits necessary to represent the maximal iteration
>> +   count of loop with loop_vec_info LOOP_VINFO which is scaling with a given
>> +   factor FACTOR.  */
> 
> How about:
> 
> /* Calculate the minimum precision necessary to represent:
> 
>       MAX_NITERS * FACTOR
> 
>    as an unsigned integer, where MAX_NITERS is the maximum number of
>    loop header iterations for the original scalar form of LOOP_VINFO.  */
> 
>> +
>> +static unsigned
>> +min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
> 
> Here too I think a “vect_” prefix would be more consistent.
> 
>> +{
>> +  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
>> +
>> +  /* Get the maximum number of iterations that is representable
>> +     in the counter type.  */
>> +  tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
>> +  widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
>> +
>> +  /* Get a more refined estimate for the number of iterations.  */
>> +  widest_int max_back_edges;
>> +  if (max_loop_iterations (loop, &max_back_edges))
>> +    max_ni = wi::smin (max_ni, max_back_edges + 1);
>> +
>> +  /* Account for factor, in which each bit is replicated N times.  */
> 
> The “, in which each bit …” part no longer makes sense in this generic
> context.  Probably best just to drop the comment altogether and…
> 
>> +  max_ni *= factor;
>> +
>> +  /* Work out how many bits we need to represent the limit.  */
>> +  unsigned int min_ni_width = wi::min_precision (max_ni, UNSIGNED);
>> +
>> +  return min_ni_width;
> 
> …change this to:
> 
>   /* Work out how many bits we need to represent the limit.  */
>   return wi::min_precision (max_ni * factor, UNSIGNED);
> 
>> [...]
>> @@ -6813,8 +6820,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
>>      {
>>        if (dump_enabled_p ())
>>          dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>> -                         "can't use a fully-masked loop because no"
>> -                         " conditional operation is available.\n");
>> +                         "can't use a partial vectorization loop because"
>> +                         " no conditional operation is available.\n");
> 
> Maybe “can't operate on partial vectors because…”.  Same for the
> later changes.
> 
>> @@ -9194,12 +9202,13 @@ optimize_mask_stores (class loop *loop)
>>  }
>>  
>>  /* Decide whether it is possible to use a zero-based induction variable
>> -   when vectorizing LOOP_VINFO with a fully-masked loop.  If it is,
>> -   return the value that the induction variable must be able to hold
>> -   in order to ensure that the loop ends with an all-false mask.
>> +   when vectorizing LOOP_VINFO with a partial vectorization loop.  If
> 
> Maybe ”…with partial vectors”
> 
>> +   it is, return the value that the induction variable must be able to
>> +   hold in order to ensure that the loop ends with an all-false rgroup
>> +   control like mask.
>>     Return -1 otherwise.  */
> 
> This was originally meant to be a single paragraph, so I think it reads
> better if the ”Return -1 otherwise.” is on the previous line.
> 
>> [...]
>> @@ -9234,3 +9243,23 @@ vect_iv_limit_for_full_masking (loop_vec_info 
>> loop_vinfo)
>>    return iv_limit;
>>  }
>>  
>> +/* If we know the iteration count is smaller than vectorization factor, 
>> return
>> +   true, otherwise return false.  */
> 
> Maybe:
> 
> /* Return true if we know that the iteration count is smaller than the
>    vectorization factor.  Return false if it isn't, or if we can't be sure
>    either way.  */
> 
>> @@ -534,9 +534,10 @@ public:
>>       elements that should be false in the first mask).  */
>>    tree mask_skip_niters;
>>  
>> -  /* Type of the variables to use in the WHILE_ULT call for fully-masked
>> +  /* Type of the variables to use in the loop closing comparison for
>> +     partial vectorization, like WHILE_ULT call for fully-masked
>>       loops.  */
>> -  tree mask_compare_type;
>> +  tree compare_type;
> 
> Maybe:
> 
>   /* The type that the loop control IV should be converted to before
>      testing which of the VF scalars are active and inactive.
>      Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P.  */
> 
>> @@ -545,8 +546,8 @@ public:
>>       is false and vectorized loop otherwise.  */
>>    tree simd_if_cond;
>>  
>> -  /* Type of the IV to use in the WHILE_ULT call for fully-masked
>> -     loops.  */
>> +  /* Type of the IV to use in the loop closing comparison for partial
>> +     vectorization, like WHILE_ULT call for fully-masked loops.  */
>>    tree iv_type;
> 
> Maybe:
> 
>   /* The type that the vector loop control IV should have when
>      LOOP_VINFO_USING_PARTIAL_VECTORS_P is true.  */
> 
> Thanks,
> Richard
> 

Hi Richard,

Many thanks for the great comments on the whole series!

I've updated this patch to v2 as your comments and attached.

Does it looks better now?

BR,
Kewen
---
gcc/ChangeLog:

        * tree-vect-loop-manip.c (vect_set_loop_controls_directly): Rename
        LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.  Rename
        LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
        (vect_set_loop_condition_masked): Renamed to ...
        (vect_set_loop_condition_partial_vectors): ... this.  Rename
        LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.  Rename
        vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
        (vect_set_loop_condition_unmasked): Renamed to ...
        (vect_set_loop_condition_normal): ... this.
        (vect_set_loop_condition): Rename vect_set_loop_condition_unmasked to
        vect_set_loop_condition_normal.  Rename vect_set_loop_condition_masked
        to vect_set_loop_condition_partial_vectors.
        (vect_prepare_for_masked_peels): Rename LOOP_VINFO_MASK_COMPARE_TYPE
        to LOOP_VINFO_RGROUP_COMPARE_TYPE.
        * tree-vect-loop.c (vect_known_niters_smaller_than_vf): New, factored
        out from ...
        (vect_analyze_loop_costing): ... this.
        (_loop_vec_info::_loop_vec_info): Rename mask_compare_type to
        compare_type.
        (vect_min_prec_for_max_niters): New, factored out from ...
        (vect_verify_full_masking): ... this.  Rename
        vect_iv_limit_for_full_masking to vect_iv_limit_for_partial_vectors.
        Rename LOOP_VINFO_MASK_COMPARE_TYPE to LOOP_VINFO_RGROUP_COMPARE_TYPE.
        Rename LOOP_VINFO_MASK_IV_TYPE to LOOP_VINFO_RGROUP_IV_TYPE.
        (vectorizable_reduction): Update some dumpings with partial
        vectors instead of fully-masked.
        (vectorizable_live_operation): Likewise.
        (vect_iv_limit_for_full_masking): Renamed to ...
        (vect_iv_limit_for_partial_vectors): ... this.
        * tree-vect-stmts.c (check_load_store_masking): Renamed to ...
        (check_load_store_for_partial_vectors): ... this.  Update some
        dumpings with partial vectors instead of fully-masked.
        (vectorizable_store): Rename check_load_store_masking to
        check_load_store_for_partial_vectors.
        (vectorizable_load): Likewise.
        * tree-vectorizer.h (LOOP_VINFO_MASK_COMPARE_TYPE): Renamed to ...
        (LOOP_VINFO_RGROUP_COMPARE_TYPE): ... this.
        (LOOP_VINFO_MASK_IV_TYPE): Renamed to ...
        (LOOP_VINFO_RGROUP_IV_TYPE): ... this.
        (vect_iv_limit_for_full_masking): Renamed to ...
        (vect_iv_limit_for_partial_vectors): this.
        (_loop_vec_info): Rename mask_compare_type to rgroup_compare_type.
        Rename iv_type to rgroup_iv_type.
---
 gcc/tree-vect-loop-manip.c |  42 ++++++-------
 gcc/tree-vect-loop.c       | 118 +++++++++++++++++++++++--------------
 gcc/tree-vect-stmts.c      |  41 +++++++------
 gcc/tree-vectorizer.h      |  19 +++---
 4 files changed, 126 insertions(+), 94 deletions(-)

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 806db84cb61..458a6675c47 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -420,8 +420,8 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
                                 rgroup_controls *rgc, tree niters,
                                 tree niters_skip, bool might_wrap_p)
 {
-  tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
-  tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
+  tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
+  tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
   tree ctrl_type = rgc->type;
   unsigned int nscalars_per_iter = rgc->max_nscalars_per_iter;
   poly_uint64 nscalars_per_ctrl = TYPE_VECTOR_SUBPARTS (ctrl_type);
@@ -644,15 +644,15 @@ vect_set_loop_controls_directly (class loop *loop, 
loop_vec_info loop_vinfo,
    final gcond.  */
 
 static gcond *
-vect_set_loop_condition_masked (class loop *loop, loop_vec_info loop_vinfo,
-                               tree niters, tree final_iv,
-                               bool niters_maybe_zero,
-                               gimple_stmt_iterator loop_cond_gsi)
+vect_set_loop_condition_partial_vectors (class loop *loop,
+                                        loop_vec_info loop_vinfo, tree niters,
+                                        tree final_iv, bool niters_maybe_zero,
+                                        gimple_stmt_iterator loop_cond_gsi)
 {
   gimple_seq preheader_seq = NULL;
   gimple_seq header_seq = NULL;
 
-  tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+  tree compare_type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
   unsigned int compare_precision = TYPE_PRECISION (compare_type);
   tree orig_niters = niters;
 
@@ -678,7 +678,7 @@ vect_set_loop_condition_masked (class loop *loop, 
loop_vec_info loop_vinfo,
   else
     niters = gimple_convert (&preheader_seq, compare_type, niters);
 
-  widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
+  widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
 
   /* Iterate over all the rgroups and fill in their controls.  We could use
      the first control from any rgroup for the loop condition; here we
@@ -748,14 +748,13 @@ vect_set_loop_condition_masked (class loop *loop, 
loop_vec_info loop_vinfo,
   return cond_stmt;
 }
 
-/* Like vect_set_loop_condition, but handle the case in which there
-   are no loop masks.  */
+/* Like vect_set_loop_condition, but handle the case in which the vector
+   loop handles exactly VF scalars per iteration.  */
 
 static gcond *
-vect_set_loop_condition_unmasked (class loop *loop, tree niters,
-                                 tree step, tree final_iv,
-                                 bool niters_maybe_zero,
-                                 gimple_stmt_iterator loop_cond_gsi)
+vect_set_loop_condition_normal (class loop *loop, tree niters, tree step,
+                               tree final_iv, bool niters_maybe_zero,
+                               gimple_stmt_iterator loop_cond_gsi)
 {
   tree indx_before_incr, indx_after_incr;
   gcond *cond_stmt;
@@ -914,13 +913,14 @@ vect_set_loop_condition (class loop *loop, loop_vec_info 
loop_vinfo,
   gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
 
   if (loop_vinfo && LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
-    cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
-                                               final_iv, niters_maybe_zero,
-                                               loop_cond_gsi);
+    cond_stmt = vect_set_loop_condition_partial_vectors (loop, loop_vinfo,
+                                                        niters, final_iv,
+                                                        niters_maybe_zero,
+                                                        loop_cond_gsi);
   else
-    cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
-                                                 final_iv, niters_maybe_zero,
-                                                 loop_cond_gsi);
+    cond_stmt = vect_set_loop_condition_normal (loop, niters, step, final_iv,
+                                               niters_maybe_zero,
+                                               loop_cond_gsi);
 
   /* Remove old loop exit test.  */
   stmt_vec_info orig_cond_info;
@@ -1775,7 +1775,7 @@ void
 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
 {
   tree misalign_in_elems;
-  tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
+  tree type = LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo);
 
   gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
 
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 5a3644a8d71..5ee21c87d7c 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -800,7 +800,7 @@ _loop_vec_info::_loop_vec_info (class loop *loop_in, 
vec_info_shared *shared)
     vectorization_factor (0),
     max_vectorization_factor (0),
     mask_skip_niters (NULL_TREE),
-    mask_compare_type (NULL_TREE),
+    rgroup_compare_type (NULL_TREE),
     simd_if_cond (NULL_TREE),
     unaligned_dr (NULL),
     peeling_for_alignment (0),
@@ -959,14 +959,39 @@ vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
   return res;
 }
 
+/* Calculate the minimum precision necessary to represent:
+
+      MAX_NITERS * FACTOR
+
+   as an unsigned integer, where MAX_NITERS is the maximum number of
+   loop header iterations for the original scalar form of LOOP_VINFO.  */
+
+static unsigned
+vect_min_prec_for_max_niters (loop_vec_info loop_vinfo, unsigned int factor)
+{
+  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+
+  /* Get the maximum number of iterations that is representable
+     in the counter type.  */
+  tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
+  widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
+
+  /* Get a more refined estimate for the number of iterations.  */
+  widest_int max_back_edges;
+  if (max_loop_iterations (loop, &max_back_edges))
+    max_ni = wi::smin (max_ni, max_back_edges + 1);
+
+  /* Work out how many bits we need to represent the limit.  */
+  return wi::min_precision (max_ni * factor, UNSIGNED);
+}
+
 /* Each statement in LOOP_VINFO can be masked where necessary.  Check
    whether we can actually generate the masks required.  Return true if so,
-   storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE.  */
+   storing the type of the scalar IV in LOOP_VINFO_RGROUP_COMPARE_TYPE.  */
 
 static bool
 vect_verify_full_masking (loop_vec_info loop_vinfo)
 {
-  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   unsigned int min_ni_width;
   unsigned int max_nscalars_per_iter
     = vect_get_max_nscalars_per_iter (loop_vinfo);
@@ -977,27 +1002,15 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
   if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
     return false;
 
-  /* Get the maximum number of iterations that is representable
-     in the counter type.  */
-  tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
-  widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
-
-  /* Get a more refined estimate for the number of iterations.  */
-  widest_int max_back_edges;
-  if (max_loop_iterations (loop, &max_back_edges))
-    max_ni = wi::smin (max_ni, max_back_edges + 1);
-
-  /* Account for rgroup masks, in which each bit is replicated N times.  */
-  max_ni *= max_nscalars_per_iter;
-
   /* Work out how many bits we need to represent the limit.  */
-  min_ni_width = wi::min_precision (max_ni, UNSIGNED);
+  min_ni_width
+    = vect_min_prec_for_max_niters (loop_vinfo, max_nscalars_per_iter);
 
   /* Find a scalar mode for which WHILE_ULT is supported.  */
   opt_scalar_int_mode cmp_mode_iter;
   tree cmp_type = NULL_TREE;
   tree iv_type = NULL_TREE;
-  widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
+  widest_int iv_limit = vect_iv_limit_for_partial_vectors (loop_vinfo);
   unsigned int iv_precision = UINT_MAX;
 
   if (iv_limit != -1)
@@ -1050,8 +1063,8 @@ vect_verify_full_masking (loop_vec_info loop_vinfo)
   if (!cmp_type)
     return false;
 
-  LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
-  LOOP_VINFO_MASK_IV_TYPE (loop_vinfo) = iv_type;
+  LOOP_VINFO_RGROUP_COMPARE_TYPE (loop_vinfo) = cmp_type;
+  LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo) = iv_type;
   return true;
 }
 
@@ -1617,6 +1630,27 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
   return opt_result::success ();
 }
 
+/* Return true if we know that the iteration count is smaller than the
+   vectorization factor.  Return false if it isn't, or if we can't be sure
+   either way.  */
+
+static bool
+vect_known_niters_smaller_than_vf (loop_vec_info loop_vinfo)
+{
+  unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
+
+  HOST_WIDE_INT max_niter;
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+    max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
+  else
+    max_niter = max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+
+  if (max_niter != -1 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+    return true;
+
+  return false;
+}
+
 /* Analyze the cost of the loop described by LOOP_VINFO.  Decide if it
    is worthwhile to vectorize.  Return 1 if definitely yes, 0 if
    definitely no, or -1 if it's worth retrying.  */
@@ -1631,15 +1665,7 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
      counts less than the vectorization factor.  */
   if (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo))
     {
-      HOST_WIDE_INT max_niter;
-
-      if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
-       max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
-      else
-       max_niter = max_stmt_executions_int (loop);
-
-      if (max_niter != -1
-         && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
+      if (vect_known_niters_smaller_than_vf (loop_vinfo))
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -6802,8 +6828,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because no"
-                            " conditional operation is available.\n");
+                            "can't operate on partial vectors because"
+                            " no conditional operation is available.\n");
          LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
        }
       else if (reduction_type == FOLD_LEFT_REDUCTION
@@ -6814,8 +6840,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because no"
-                            " conditional operation is available.\n");
+                            "can't operate on partial vectors because"
+                            " no conditional operation is available.\n");
          LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
        }
       else
@@ -8022,25 +8048,26 @@ vectorizable_live_operation (loop_vec_info loop_vinfo,
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "can't use a fully-masked loop because "
-                                "the target doesn't support extract last "
-                                "reduction.\n");
+                                "can't operate on partial vectors "
+                                "because the target doesn't support extract "
+                                "last reduction.\n");
              LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else if (slp_node)
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "can't use a fully-masked loop because an "
-                                "SLP statement is live after the loop.\n");
+                                "can't operate on partial vectors "
+                                "because an SLP statement is live after "
+                                "the loop.\n");
              LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else if (ncopies > 1)
            {
              if (dump_enabled_p ())
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                "can't use a fully-masked loop because"
-                                " ncopies is greater than 1.\n");
+                                "can't operate on partial vectors "
+                                "because ncopies is greater than 1.\n");
              LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
            }
          else
@@ -9195,12 +9222,13 @@ optimize_mask_stores (class loop *loop)
 }
 
 /* Decide whether it is possible to use a zero-based induction variable
-   when vectorizing LOOP_VINFO with a fully-masked loop.  If it is,
-   return the value that the induction variable must be able to hold
-   in order to ensure that the loop ends with an all-false mask.
-   Return -1 otherwise.  */
+   when vectorizing LOOP_VINFO with partial vectors.  If it is, return
+   the value that the induction variable must be able to hold in order
+   to ensure that the loop ends with an all-false rgroup control like
+   mask.  Return -1 otherwise.  */
+
 widest_int
-vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo)
+vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo)
 {
   tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index fb82c8d940f..285cdb9efa9 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1771,9 +1771,9 @@ static tree permute_vec_elements (vec_info *, tree, tree, 
tree, stmt_vec_info,
                                  gimple_stmt_iterator *);
 
 /* Check whether a load or store statement in the loop described by
-   LOOP_VINFO is possible in a fully-masked loop.  This is testing
-   whether the vectorizer pass has the appropriate support, as well as
-   whether the target does.
+   LOOP_VINFO is possible in a loop using partial vectors.  This is
+   testing whether the vectorizer pass has the appropriate support,
+   as well as whether the target does.
 
    VLS_TYPE says whether the statement is a load or store and VECTYPE
    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
@@ -1783,14 +1783,15 @@ static tree permute_vec_elements (vec_info *, tree, 
tree, tree, stmt_vec_info,
    its arguments.  If the load or store is conditional, SCALAR_MASK is the
    condition under which it occurs.
 
-   Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a fully-masked loop is not
-   supported, otherwise record the required mask types.  */
+   Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
+   vectors is not supported, otherwise record the required rgroup control
+   types.  */
 
 static void
-check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
-                         vec_load_store_type vls_type, int group_size,
-                         vect_memory_access_type memory_access_type,
-                         gather_scatter_info *gs_info, tree scalar_mask)
+check_load_store_for_partial_vectors (
+  loop_vec_info loop_vinfo, tree vectype, vec_load_store_type vls_type,
+  int group_size, vect_memory_access_type memory_access_type,
+  gather_scatter_info *gs_info, tree scalar_mask)
 {
   /* Invariant loads need no special support.  */
   if (memory_access_type == VMAT_INVARIANT)
@@ -1807,8 +1808,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree 
vectype,
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because the"
-                            " target doesn't have an appropriate masked"
+                            "can't operate on partial vectors because"
+                            " the target doesn't have an appropriate"
                             " load/store-lanes instruction.\n");
          LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
          return;
@@ -1830,8 +1831,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree 
vectype,
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "can't use a fully-masked loop because the"
-                            " target doesn't have an appropriate masked"
+                            "can't operate on partial vectors because"
+                            " the target doesn't have an appropriate"
                             " gather load or scatter store instruction.\n");
          LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
          return;
@@ -1848,8 +1849,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree 
vectype,
         scalar loop.  We need more work to support other mappings.  */
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "can't use a fully-masked loop because an access"
-                        " isn't contiguous.\n");
+                        "can't operate on partial vectors because an"
+                        " access isn't contiguous.\n");
       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
       return;
     }
@@ -7529,8 +7530,9 @@ vectorizable_store (vec_info *vinfo,
 
       if (loop_vinfo
          && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-       check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
-                                 memory_access_type, &gs_info, mask);
+       check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
+                                             group_size, memory_access_type,
+                                             &gs_info, mask);
 
       if (slp_node
          && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
@@ -8836,8 +8838,9 @@ vectorizable_load (vec_info *vinfo,
 
       if (loop_vinfo
          && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-       check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
-                                 memory_access_type, &gs_info, mask);
+       check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
+                                             group_size, memory_access_type,
+                                             &gs_info, mask);
 
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index d2a6dc73b90..51bfc975a4a 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -534,9 +534,10 @@ public:
      elements that should be false in the first mask).  */
   tree mask_skip_niters;
 
-  /* Type of the variables to use in the WHILE_ULT call for fully-masked
-     loops.  */
-  tree mask_compare_type;
+  /* The type that the loop control IV should be converted to before
+     testing which of the VF scalars are active and inactive.
+     Only meaningful if LOOP_VINFO_USING_PARTIAL_VECTORS_P.  */
+  tree rgroup_compare_type;
 
   /* For #pragma omp simd if (x) loops the x expression.  If constant 0,
      the loop should not be vectorized, if constant non-zero, simd_if_cond
@@ -545,9 +546,9 @@ public:
      is false and vectorized loop otherwise.  */
   tree simd_if_cond;
 
-  /* Type of the IV to use in the WHILE_ULT call for fully-masked
-     loops.  */
-  tree iv_type;
+  /* The type that the vector loop control IV should have when
+     LOOP_VINFO_USING_PARTIAL_VECTORS_P is true.  */
+  tree rgroup_iv_type;
 
   /* Unknown DRs according to which loop was peeled.  */
   class dr_vec_info *unaligned_dr;
@@ -699,8 +700,8 @@ public:
 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor
 #define LOOP_VINFO_MASKS(L)                (L)->masks
 #define LOOP_VINFO_MASK_SKIP_NITERS(L)     (L)->mask_skip_niters
-#define LOOP_VINFO_MASK_COMPARE_TYPE(L)    (L)->mask_compare_type
-#define LOOP_VINFO_MASK_IV_TYPE(L)         (L)->iv_type
+#define LOOP_VINFO_RGROUP_COMPARE_TYPE(L)  (L)->rgroup_compare_type
+#define LOOP_VINFO_RGROUP_IV_TYPE(L)       (L)->rgroup_iv_type
 #define LOOP_VINFO_PTR_MASK(L)             (L)->ptr_mask
 #define LOOP_VINFO_LOOP_NEST(L)            (L)->shared->loop_nest
 #define LOOP_VINFO_DATAREFS(L)             (L)->shared->datarefs
@@ -1834,7 +1835,7 @@ extern tree vect_create_addr_base_for_vector_ref 
(vec_info *,
                                                  tree, tree = NULL_TREE);
 
 /* In tree-vect-loop.c.  */
-extern widest_int vect_iv_limit_for_full_masking (loop_vec_info loop_vinfo);
+extern widest_int vect_iv_limit_for_partial_vectors (loop_vec_info loop_vinfo);
 /* Used in tree-vect-loop-manip.c */
 extern void determine_peel_for_niter (loop_vec_info);
 /* Used in gimple-loop-interchange.c and tree-parloops.c.  */
-- 

Reply via email to