https://gcc.gnu.org/g:57f8a2f67c1536be23231808ab00613ab69193ed

commit r15-578-g57f8a2f67c1536be23231808ab00613ab69193ed
Author: Pan Li <pan2...@intel.com>
Date:   Thu May 16 09:58:13 2024 +0800

    Vect: Support loop len in vectorizable early exit
    
    This patch adds early break auto-vectorization support for target which
    use length on partial vectorization.  Consider this following example:
    
    unsigned vect_a[802];
    unsigned vect_b[802];
    
    void test (unsigned x, int n)
    {
      for (int i = 0; i < n; i++)
      {
        vect_b[i] = x + i;
    
        if (vect_a[i] > x)
          break;
    
        vect_a[i] = x;
      }
    }
    
    We use VCOND_MASK_LEN to simulate the generate (mask && i < len + bias).
    And then the IR of RVV looks like below:
    
      ...
      _87 = .SELECT_VL (ivtmp_85, POLY_INT_CST [32, 32]);
      _55 = (int) _87;
      ...
      mask_patt_6.13_69 = vect_cst__62 < vect__3.12_67;
      vec_len_mask_72 = .VCOND_MASK_LEN (mask_patt_6.13_69, { -1, ... }, \
        {0, ... }, _87, 0);
      if (vec_len_mask_72 != { 0, ... })
        goto <bb 6>; [5.50%]
      else
        goto <bb 7>; [94.50%]
    
    The below tests are passed for this patch:
    1. The riscv fully regression tests.
    2. The x86 bootstrap tests.
    3. The x86 fully regression tests.
    
    gcc/ChangeLog:
    
            * tree-vect-loop.cc (vect_gen_loop_len_mask): New func to gen
            the loop len mask.
            * tree-vect-stmts.cc (vectorizable_early_exit): Invoke the
            vect_gen_loop_len_mask for 1 or more stmt(s).
            * tree-vectorizer.h (vect_gen_loop_len_mask): New func decl
            for vect_gen_loop_len_mask.
    
    Signed-off-by: Pan Li <pan2...@intel.com>

Diff:
---
 gcc/tree-vect-loop.cc  | 27 +++++++++++++++++++++++++++
 gcc/tree-vect-stmts.cc | 17 +++++++++++++++--
 gcc/tree-vectorizer.h  |  4 ++++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 361aec064884..83c0544b6aa5 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -11416,6 +11416,33 @@ vect_get_loop_len (loop_vec_info loop_vinfo, 
gimple_stmt_iterator *gsi,
   return loop_len;
 }
 
+/* Generate the tree for the loop len mask and return it.  Given the lens,
+   nvectors, vectype, index and factor to gen the len mask as below.
+
+   tree len_mask = VCOND_MASK_LEN (compare_mask, ones, zero, len, bias)
+*/
+tree
+vect_gen_loop_len_mask (loop_vec_info loop_vinfo, gimple_stmt_iterator *gsi,
+                       gimple_stmt_iterator *cond_gsi, vec_loop_lens *lens,
+                       unsigned int nvectors, tree vectype, tree stmt,
+                       unsigned int index, unsigned int factor)
+{
+  tree all_one_mask = build_all_ones_cst (vectype);
+  tree all_zero_mask = build_zero_cst (vectype);
+  tree len = vect_get_loop_len (loop_vinfo, gsi, lens, nvectors, vectype, 
index,
+                               factor);
+  tree bias = build_int_cst (intQI_type_node,
+                            LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo));
+  tree len_mask = make_temp_ssa_name (TREE_TYPE (stmt), NULL, "vec_len_mask");
+  gcall *call = gimple_build_call_internal (IFN_VCOND_MASK_LEN, 5, stmt,
+                                           all_one_mask, all_zero_mask, len,
+                                           bias);
+  gimple_call_set_lhs (call, len_mask);
+  gsi_insert_before (cond_gsi, call, GSI_SAME_STMT);
+
+  return len_mask;
+}
+
 /* Scale profiling counters by estimation for LOOP which is vectorized
    by factor VF.
    If FLAT is true, the loop we started with had unrealistically flat
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index b8a71605f1bc..672959501bb7 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -12895,7 +12895,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
     ncopies = vect_get_num_copies (loop_vinfo, vectype);
 
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+  vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+  bool len_loop_p = LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
 
   /* Now build the new conditional.  Pattern gimple_conds get dropped during
      codegen so we must replace the original insn.  */
@@ -12959,12 +12961,11 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
        {
          if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
                                              OPTIMIZE_FOR_SPEED))
-           return false;
+           vect_record_loop_len (loop_vinfo, lens, ncopies, vectype, 1);
          else
            vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
        }
 
-
       return true;
     }
 
@@ -13017,6 +13018,15 @@ vectorizable_early_exit (vec_info *vinfo, 
stmt_vec_info stmt_info,
                                  stmts[i], &cond_gsi);
            workset.quick_push (stmt_mask);
          }
+      else if (len_loop_p)
+       for (unsigned i = 0; i < stmts.length (); i++)
+         {
+           tree len_mask = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi,
+                                                   lens, ncopies, vectype,
+                                                   stmts[i], i, 1);
+
+           workset.quick_push (len_mask);
+         }
       else
        workset.splice (stmts);
 
@@ -13041,6 +13051,9 @@ vectorizable_early_exit (vec_info *vinfo, stmt_vec_info 
stmt_info,
          new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
                                       new_temp, &cond_gsi);
        }
+      else if (len_loop_p)
+       new_temp = vect_gen_loop_len_mask (loop_vinfo, gsi, &cond_gsi, lens,
+                                          ncopies, vectype, new_temp, 0, 1);
     }
 
   gcc_assert (new_temp);
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index db44d730b702..93bc30ef6605 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2408,6 +2408,10 @@ extern void vect_record_loop_len (loop_vec_info, 
vec_loop_lens *, unsigned int,
 extern tree vect_get_loop_len (loop_vec_info, gimple_stmt_iterator *,
                               vec_loop_lens *, unsigned int, tree,
                               unsigned int, unsigned int);
+extern tree vect_gen_loop_len_mask (loop_vec_info, gimple_stmt_iterator *,
+                                   gimple_stmt_iterator *, vec_loop_lens *,
+                                   unsigned int, tree, tree, unsigned int,
+                                   unsigned int);
 extern gimple_seq vect_gen_len (tree, tree, tree, tree);
 extern stmt_vec_info info_for_reduction (vec_info *, stmt_vec_info);
 extern bool reduction_fn_for_scalar_code (code_helper, internal_fn *);

Reply via email to