Hi, Richard and Richi.

This patch is adding MASK_LEN_{LOAD_LANES,STORE_LANES} support into vectorizer.

Consider this simple case:

void __attribute__ ((noinline, noclone))
foo (int *__restrict a, int *__restrict b, int *__restrict c,
          int *__restrict d, int *__restrict e, int *__restrict f,
          int *__restrict g, int *__restrict h, int *__restrict j, int n)
{
  for (int i = 0; i < n; ++i)
    {
      a[i] = j[i * 8];
      b[i] = j[i * 8 + 1];
      c[i] = j[i * 8 + 2];
      d[i] = j[i * 8 + 3];
      e[i] = j[i * 8 + 4];
      f[i] = j[i * 8 + 5];
      g[i] = j[i * 8 + 6];
      h[i] = j[i * 8 + 7];
    }
}

RVV Gimple IR:

  _79 = .SELECT_VL (ivtmp_81, POLY_INT_CST [4, 4]);
  ivtmp_125 = _79 * 32;
  vect_array.8 = .MASK_LEN_LOAD_LANES (vectp_j.6_124, 32B, { -1, ... }, _79, 0);
  vect__8.9_122 = vect_array.8[0];
  vect__8.10_121 = vect_array.8[1];
  vect__8.11_120 = vect_array.8[2];
  vect__8.12_119 = vect_array.8[3];
  vect__8.13_118 = vect_array.8[4];
  vect__8.14_117 = vect_array.8[5];
  vect__8.15_116 = vect_array.8[6];
  vect__8.16_115 = vect_array.8[7];
  vect_array.8 ={v} {CLOBBER};
  ivtmp_114 = _79 * 4;
  .MASK_LEN_STORE (vectp_a.17_113, 32B, { -1, ... }, _79, 0, vect__8.9_122);
  .MASK_LEN_STORE (vectp_b.19_109, 32B, { -1, ... }, _79, 0, vect__8.10_121);
  .MASK_LEN_STORE (vectp_c.21_105, 32B, { -1, ... }, _79, 0, vect__8.11_120);
  .MASK_LEN_STORE (vectp_d.23_101, 32B, { -1, ... }, _79, 0, vect__8.12_119);
  .MASK_LEN_STORE (vectp_e.25_97, 32B, { -1, ... }, _79, 0, vect__8.13_118);
  .MASK_LEN_STORE (vectp_f.27_93, 32B, { -1, ... }, _79, 0, vect__8.14_117);
  .MASK_LEN_STORE (vectp_g.29_89, 32B, { -1, ... }, _79, 0, vect__8.15_116);
  .MASK_LEN_STORE (vectp_h.31_85, 32B, { -1, ... }, _79, 0, vect__8.16_115);

ASM:

foo:
        lw      t4,8(sp)
        ld      t5,0(sp)
        ble     t4,zero,.L5
.L3:
        vsetvli t1,t4,e8,mf4,ta,ma
        vlseg8e32.v     v8,(t5)
        slli    t3,t1,2
        slli    t6,t1,5
        vse32.v v8,0(a0)
        vse32.v v9,0(a1)
        vse32.v v10,0(a2)
        vse32.v v11,0(a3)
        vse32.v v12,0(a4)
        vse32.v v13,0(a5)
        vse32.v v14,0(a6)
        vse32.v v15,0(a7)
        sub     t4,t4,t1
        add     t5,t5,t6
        add     a0,a0,t3
        add     a1,a1,t3
        add     a2,a2,t3
        add     a3,a3,t3
        add     a4,a4,t3
        add     a5,a5,t3
        add     a6,a6,t3
        add     a7,a7,t3
        bne     t4,zero,.L3
.L5:
        ret

The details of the approach:

Step 1 - Modifiy the LANES LOAD/STORE support function 
(vect_load_lanes_supported/vect_store_lanes_supported):

+/* Return FN if vec_{masked_,mask_len,}load_lanes is available for COUNT
+   vectors of type VECTYPE.  MASKED_P says whether the masked form is needed. 
*/
 
-bool
+internal_fn
 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
                           bool masked_p)
 {
-  if (masked_p)
-    return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
-                                        vec_mask_load_lanes_optab,
-                                        vectype, count);
+  if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
+                                   vec_mask_len_load_lanes_optab,
+                                   vectype, count))
+    return IFN_MASK_LEN_LOAD_LANES;
+  else if (masked_p)
+    {
+      if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
+                                       vec_mask_load_lanes_optab,
+                                       vectype, count))
+       return IFN_MASK_LOAD_LANES;
+    }
   else
-    return vect_lanes_optab_supported_p ("vec_load_lanes",
-                                        vec_load_lanes_optab,
-                                        vectype, count);
+    {
+      if (vect_lanes_optab_supported_p ("vec_load_lanes",
+                                       vec_load_lanes_optab,
+                                       vectype, count))
+       return IFN_LOAD_LANES;
+    }
+  return IFN_LAST;
 }
 
Instead of returning TRUE or FALSE whether target support the LANES LOAD/STORE.
I change it into return internal_fn of the LANES LOAD/STORE that target support,
If target didn't support any LANE LOAD/STORE optabs, return IFN_LAST.

Step 2 - Compute IFN for LANES LOAD/STORE (Only compute once).

      if (!STMT_VINFO_STRIDED_P (first_stmt_info)
          && (can_overrun_p || !would_overrun_p)
          && compare_step_with_zero (vinfo, stmt_info) > 0)
        {
          /* First cope with the degenerate case of a single-element
             vector.  */
          if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
            ;

          else
            {
              /* Otherwise try using LOAD/STORE_LANES.  */
              *lanes_ifn
                = vls_type == VLS_LOAD
                    ? vect_load_lanes_supported (vectype, group_size, masked_p)
                    : vect_store_lanes_supported (vectype, group_size,
                                                  masked_p);
              if (*lanes_ifn != IFN_LAST)
                {
                  *memory_access_type = VMAT_LOAD_STORE_LANES;
                  overrun_p = would_overrun_p;
                }

              /* If that fails, try using permuting loads.  */
              else if (vls_type == VLS_LOAD
                         ? vect_grouped_load_supported (vectype,
                                                        single_element_p,
                                                        group_size)
                         : vect_grouped_store_supported (vectype, group_size))
                {
                  *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
                  overrun_p = would_overrun_p;
                }
            }
        }

Step 3 - Build MASK_LEN_{LANES_LOAD,LANES_STORE} Gimple IR:

+         if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
+           {
+             if (loop_lens)
+               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                              ncopies, vectype, j, 1);
+             else
+               final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+             signed char biasval
+               = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+             bias = build_int_cst (intQI_type_node, biasval);
+             if (!final_mask)
+               {
+                 mask_vectype = truth_type_for (vectype);
+                 final_mask = build_minus_one_cst (mask_vectype);
+               }
+           }
+
          gcall *call;
-         if (final_mask)
+         if (final_len && final_mask)
+           {
+             /* Emit:
+                  MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
+                                        LEN, BIAS, VEC_ARRAY).  */
+             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
+             tree alias_ptr = build_int_cst (ref_type, align);
+             call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
+                                                dataref_ptr, alias_ptr,
+                                                final_mask, final_len, bias,
+                                                vec_array);
+           }
+         else if (final_mask)

The LEN and MASK flow is totally the same as other MASK_LEN_* load/store.

gcc/ChangeLog:

        * internal-fn.cc (internal_load_fn_p): Apply 
MASK_LEN_{LOAD_LANES,STORE_LANES} into vectorizer.
        (internal_store_fn_p): Ditto.
        (internal_fn_len_index): Ditto.
        (internal_fn_mask_index): Ditto.
        (internal_fn_stored_value_index): Ditto.
        * tree-vect-data-refs.cc (vect_store_lanes_supported): Ditto.
        (vect_load_lanes_supported): Ditto.
        * tree-vect-loop.cc: Ditto.
        * tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Ditto.
        * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
        (get_group_load_store_type): Ditto.
        (vectorizable_store): Ditto.
        (vectorizable_load): Ditto.
        * tree-vectorizer.h (vect_store_lanes_supported): Ditto.
        (vect_load_lanes_supported): Ditto.

---
 gcc/internal-fn.cc         |   7 ++
 gcc/tree-vect-data-refs.cc |  61 ++++++++++------
 gcc/tree-vect-loop.cc      |  11 +--
 gcc/tree-vect-slp.cc       |   2 +-
 gcc/tree-vect-stmts.cc     | 141 ++++++++++++++++++++++++++++---------
 gcc/tree-vectorizer.h      |   4 +-
 6 files changed, 163 insertions(+), 63 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 4f2b20a79e5..cc1ede58799 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -4578,6 +4578,7 @@ internal_load_fn_p (internal_fn fn)
     case IFN_MASK_LOAD:
     case IFN_LOAD_LANES:
     case IFN_MASK_LOAD_LANES:
+    case IFN_MASK_LEN_LOAD_LANES:
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
     case IFN_MASK_LEN_GATHER_LOAD:
@@ -4600,6 +4601,7 @@ internal_store_fn_p (internal_fn fn)
     case IFN_MASK_STORE:
     case IFN_STORE_LANES:
     case IFN_MASK_STORE_LANES:
+    case IFN_MASK_LEN_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
     case IFN_MASK_LEN_SCATTER_STORE:
@@ -4672,6 +4674,8 @@ internal_fn_len_index (internal_fn fn)
     case IFN_COND_LEN_NEG:
     case IFN_MASK_LEN_LOAD:
     case IFN_MASK_LEN_STORE:
+    case IFN_MASK_LEN_LOAD_LANES:
+    case IFN_MASK_LEN_STORE_LANES:
       return 3;
 
     default:
@@ -4689,8 +4693,10 @@ internal_fn_mask_index (internal_fn fn)
     {
     case IFN_MASK_LOAD:
     case IFN_MASK_LOAD_LANES:
+    case IFN_MASK_LEN_LOAD_LANES:
     case IFN_MASK_STORE:
     case IFN_MASK_STORE_LANES:
+    case IFN_MASK_LEN_STORE_LANES:
     case IFN_MASK_LEN_LOAD:
     case IFN_MASK_LEN_STORE:
       return 2;
@@ -4726,6 +4732,7 @@ internal_fn_stored_value_index (internal_fn fn)
       return 4;
 
     case IFN_MASK_LEN_STORE:
+    case IFN_MASK_LEN_STORE_LANES:
       return 5;
 
     default:
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index a3570c45b52..3e9a284666c 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -5438,22 +5438,31 @@ vect_grouped_store_supported (tree vectype, unsigned 
HOST_WIDE_INT count)
   return false;
 }
 
+/* Return FN if vec_{mask_,mask_len_}store_lanes is available for COUNT vectors
+   of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
 
-/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
-   type VECTYPE.  MASKED_P says whether the masked form is needed.  */
-
-bool
+internal_fn
 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
                            bool masked_p)
 {
-  if (masked_p)
-    return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
-                                        vec_mask_store_lanes_optab,
-                                        vectype, count);
+  if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
+                                   vec_mask_len_store_lanes_optab, vectype,
+                                   count))
+    return IFN_MASK_LEN_STORE_LANES;
+  else if (masked_p)
+    {
+      if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
+                                       vec_mask_store_lanes_optab, vectype,
+                                       count))
+       return IFN_MASK_STORE_LANES;
+    }
   else
-    return vect_lanes_optab_supported_p ("vec_store_lanes",
-                                        vec_store_lanes_optab,
-                                        vectype, count);
+    {
+      if (vect_lanes_optab_supported_p ("vec_store_lanes",
+                                       vec_store_lanes_optab, vectype, count))
+       return IFN_STORE_LANES;
+    }
+  return IFN_LAST;
 }
 
 
@@ -6056,21 +6065,31 @@ vect_grouped_load_supported (tree vectype, bool 
single_element_p,
   return false;
 }
 
-/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
-   type VECTYPE.  MASKED_P says whether the masked form is needed.  */
+/* Return FN if vec_{masked_,mask_len_}load_lanes is available for COUNT 
vectors
+   of type VECTYPE.  MASKED_P says whether the masked form is needed.  */
 
-bool
+internal_fn
 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
                           bool masked_p)
 {
-  if (masked_p)
-    return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
-                                        vec_mask_load_lanes_optab,
-                                        vectype, count);
+  if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
+                                   vec_mask_len_load_lanes_optab, vectype,
+                                   count))
+    return IFN_MASK_LEN_LOAD_LANES;
+  else if (masked_p)
+    {
+      if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
+                                       vec_mask_load_lanes_optab, vectype,
+                                       count))
+       return IFN_MASK_LOAD_LANES;
+    }
   else
-    return vect_lanes_optab_supported_p ("vec_load_lanes",
-                                        vec_load_lanes_optab,
-                                        vectype, count);
+    {
+      if (vect_lanes_optab_supported_p ("vec_load_lanes", vec_load_lanes_optab,
+                                       vectype, count))
+       return IFN_LOAD_LANES;
+    }
+  return IFN_LAST;
 }
 
 /* Function vect_permute_load_chain.
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index bc3063c3615..1fcd8d07ea1 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -2839,7 +2839,8 @@ start_over:
             instructions record it and move on to the next instance.  */
          if (loads_permuted
              && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
-             && vect_store_lanes_supported (vectype, group_size, false))
+             && vect_store_lanes_supported (vectype, group_size, false)
+                  != IFN_LAST)
            {
              FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
                {
@@ -2848,9 +2849,9 @@ start_over:
                  /* Use SLP for strided accesses (or if we can't
                     load-lanes).  */
                  if (STMT_VINFO_STRIDED_P (stmt_vinfo)
-                     || ! vect_load_lanes_supported
+                     || vect_load_lanes_supported
                            (STMT_VINFO_VECTYPE (stmt_vinfo),
-                            DR_GROUP_SIZE (stmt_vinfo), false))
+                            DR_GROUP_SIZE (stmt_vinfo), false) == IFN_LAST)
                    break;
                }
 
@@ -3153,7 +3154,7 @@ again:
       vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
       unsigned int size = DR_GROUP_SIZE (vinfo);
       tree vectype = STMT_VINFO_VECTYPE (vinfo);
-      if (! vect_store_lanes_supported (vectype, size, false)
+      if (vect_store_lanes_supported (vectype, size, false) == IFN_LAST
         && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)
         && ! vect_grouped_store_supported (vectype, size))
        return opt_result::failure_at (vinfo->stmt,
@@ -3165,7 +3166,7 @@ again:
          bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
          size = DR_GROUP_SIZE (vinfo);
          vectype = STMT_VINFO_VECTYPE (vinfo);
-         if (! vect_load_lanes_supported (vectype, size, false)
+         if (vect_load_lanes_supported (vectype, size, false) == IFN_LAST
              && ! vect_grouped_load_supported (vectype, single_element_p,
                                                size))
            return opt_result::failure_at (vinfo->stmt,
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index cf91b21cf7d..9ad2634762e 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -3094,7 +3094,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, 
stmt_vec_info stmt_info,
   if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype))
       || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype)))
     return false;
-  return vect_store_lanes_supported (vectype, group_size, false);
+  return vect_store_lanes_supported (vectype, group_size, false) != IFN_LAST;
 }
 
 /* Analyze an SLP instance starting from a group of grouped stores.  Call
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 86d033aa60c..cd8e0a76374 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1610,9 +1610,15 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
   bool is_load = (vls_type == VLS_LOAD);
   if (memory_access_type == VMAT_LOAD_STORE_LANES)
     {
-      if (is_load
-         ? !vect_load_lanes_supported (vectype, group_size, true)
-         : !vect_store_lanes_supported (vectype, group_size, true))
+      internal_fn ifn
+       = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
+                  : vect_store_lanes_supported (vectype, group_size, true));
+      if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
+       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+      else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
+       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
+                              scalar_mask);
+      else
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -1620,10 +1626,7 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
                             " the target doesn't have an appropriate"
                             " load/store-lanes instruction.\n");
          LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
-         return;
        }
-      vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
-                            scalar_mask);
       return;
     }
 
@@ -2074,7 +2077,8 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info 
stmt_info,
                           poly_int64 *poffset,
                           dr_alignment_support *alignment_support_scheme,
                           int *misalignment,
-                          gather_scatter_info *gs_info)
+                          gather_scatter_info *gs_info,
+                          internal_fn *lanes_ifn)
 {
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
@@ -2272,24 +2276,30 @@ get_group_load_store_type (vec_info *vinfo, 
stmt_vec_info stmt_info,
          if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
            ;
 
-         /* Otherwise try using LOAD/STORE_LANES.  */
-         else if (vls_type == VLS_LOAD
-                  ? vect_load_lanes_supported (vectype, group_size, masked_p)
-                  : vect_store_lanes_supported (vectype, group_size,
-                                                masked_p))
+         else
            {
-             *memory_access_type = VMAT_LOAD_STORE_LANES;
-             overrun_p = would_overrun_p;
-           }
+             /* Otherwise try using LOAD/STORE_LANES.  */
+             *lanes_ifn
+               = vls_type == VLS_LOAD
+                   ? vect_load_lanes_supported (vectype, group_size, masked_p)
+                   : vect_store_lanes_supported (vectype, group_size,
+                                                 masked_p);
+             if (*lanes_ifn != IFN_LAST)
+               {
+                 *memory_access_type = VMAT_LOAD_STORE_LANES;
+                 overrun_p = would_overrun_p;
+               }
 
-         /* If that fails, try using permuting loads.  */
-         else if (vls_type == VLS_LOAD
-                  ? vect_grouped_load_supported (vectype, single_element_p,
-                                                 group_size)
-                  : vect_grouped_store_supported (vectype, group_size))
-           {
-             *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
-             overrun_p = would_overrun_p;
+             /* If that fails, try using permuting loads.  */
+             else if (vls_type == VLS_LOAD
+                        ? vect_grouped_load_supported (vectype,
+                                                       single_element_p,
+                                                       group_size)
+                        : vect_grouped_store_supported (vectype, group_size))
+               {
+                 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+                 overrun_p = would_overrun_p;
+               }
            }
        }
 
@@ -2378,7 +2388,8 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
                     poly_int64 *poffset,
                     dr_alignment_support *alignment_support_scheme,
                     int *misalignment,
-                    gather_scatter_info *gs_info)
+                    gather_scatter_info *gs_info,
+                    internal_fn *lanes_ifn)
 {
   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
@@ -2441,7 +2452,7 @@ get_load_store_type (vec_info  *vinfo, stmt_vec_info 
stmt_info,
                                      masked_p,
                                      vls_type, memory_access_type, poffset,
                                      alignment_support_scheme,
-                                     misalignment, gs_info))
+                                     misalignment, gs_info, lanes_ifn))
        return false;
     }
   else if (STMT_VINFO_STRIDED_P (stmt_info))
@@ -3087,11 +3098,8 @@ vect_get_loop_variant_data_ptr_increment (
   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
   tree step = vect_dr_behavior (vinfo, dr_info)->step;
 
-  /* TODO: We don't support gather/scatter or load_lanes/store_lanes for 
pointer
-     IVs are updated by variable amount but we will support them in the future.
-   */
-  gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
-             && memory_access_type != VMAT_LOAD_STORE_LANES);
+  /* gather/scatter never reach here.  */
+  gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
 
   /* When we support SELECT_VL pattern, we dynamic adjust
      the memory address by .SELECT_VL result.
@@ -8094,9 +8102,11 @@ vectorizable_store (vec_info *vinfo,
   enum dr_alignment_support alignment_support_scheme;
   int misalignment;
   poly_int64 poffset;
+  internal_fn lanes_ifn;
   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
vls_type,
                            ncopies, &memory_access_type, &poffset,
-                           &alignment_support_scheme, &misalignment, &gs_info))
+                           &alignment_support_scheme, &misalignment, &gs_info,
+                           &lanes_ifn))
     return false;
 
   if (mask)
@@ -8885,6 +8895,8 @@ vectorizable_store (vec_info *vinfo,
            }
 
          tree final_mask = NULL;
+         tree final_len = NULL;
+         tree bias = NULL;
          if (loop_masks)
            final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
                                             ncopies, vectype, j);
@@ -8892,8 +8904,37 @@ vectorizable_store (vec_info *vinfo,
            final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
                                           final_mask, vec_mask, gsi);
 
+         if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
+           {
+             if (loop_lens)
+               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                              ncopies, vectype, j, 1);
+             else
+               final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+             signed char biasval
+               = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+             bias = build_int_cst (intQI_type_node, biasval);
+             if (!final_mask)
+               {
+                 mask_vectype = truth_type_for (vectype);
+                 final_mask = build_minus_one_cst (mask_vectype);
+               }
+           }
+
          gcall *call;
-         if (final_mask)
+         if (final_len && final_mask)
+           {
+             /* Emit:
+                  MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
+                                        LEN, BIAS, VEC_ARRAY).  */
+             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
+             tree alias_ptr = build_int_cst (ref_type, align);
+             call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
+                                                dataref_ptr, alias_ptr,
+                                                final_mask, final_len, bias,
+                                                vec_array);
+           }
+         else if (final_mask)
            {
              /* Emit:
                   MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
@@ -9598,9 +9639,11 @@ vectorizable_load (vec_info *vinfo,
   enum dr_alignment_support alignment_support_scheme;
   int misalignment;
   poly_int64 poffset;
+  internal_fn lanes_ifn;
   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
VLS_LOAD,
                            ncopies, &memory_access_type, &poffset,
-                           &alignment_support_scheme, &misalignment, &gs_info))
+                           &alignment_support_scheme, &misalignment, &gs_info,
+                           &lanes_ifn))
     return false;
 
   if (mask)
@@ -10386,6 +10429,8 @@ vectorizable_load (vec_info *vinfo,
          tree vec_array = create_vector_array (vectype, vec_num);
 
          tree final_mask = NULL_TREE;
+         tree final_len = NULL_TREE;
+         tree bias = NULL_TREE;
          if (loop_masks)
            final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
                                             ncopies, vectype, j);
@@ -10393,8 +10438,36 @@ vectorizable_load (vec_info *vinfo,
            final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
                                           vec_mask, gsi);
 
+         if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
+           {
+             if (loop_lens)
+               final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                              ncopies, vectype, j, 1);
+             else
+               final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+             signed char biasval
+               = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+             bias = build_int_cst (intQI_type_node, biasval);
+             if (!final_mask)
+               {
+                 mask_vectype = truth_type_for (vectype);
+                 final_mask = build_minus_one_cst (mask_vectype);
+               }
+           }
+
          gcall *call;
-         if (final_mask)
+         if (final_len && final_mask)
+           {
+             /* Emit:
+                  VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
+                                                   VEC_MASK, LEN, BIAS).  */
+             unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
+             tree alias_ptr = build_int_cst (ref_type, align);
+             call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
+                                                dataref_ptr, alias_ptr,
+                                                final_mask, final_len, bias);
+           }
+         else if (final_mask)
            {
              /* Emit:
                   VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 1de144988c8..53a3d78d545 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -2297,9 +2297,9 @@ extern tree bump_vector_ptr (vec_info *, tree, gimple *, 
gimple_stmt_iterator *,
 extern void vect_copy_ref_info (tree, tree);
 extern tree vect_create_destination_var (tree, tree);
 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
-extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
+extern internal_fn vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, 
bool);
 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
-extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
+extern internal_fn vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, 
bool);
 extern void vect_permute_store_chain (vec_info *, vec<tree> &,
                                      unsigned int, stmt_vec_info,
                                      gimple_stmt_iterator *, vec<tree> *);
-- 
2.36.3

Reply via email to