From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai>

Target like ARM SVE in GCC has an elegant way to handle both loop control
and flow control simultaneously:

loop_control_mask = WHILE_ULT
flow_control_mask = comparison
control_mask = loop_control_mask & flow_control_mask;
MASK_LOAD (control_mask)
MASK_STORE (control_mask)

However, targets like RVV (RISC-V Vector) can not use this approach in
auto-vectorization since RVV use length in loop control.

This patch adds LEN_MASK_ LOAD/STORE to support flow control for targets
like RISC-V that uses length in loop control.
Normalize load/store into LEN_MASK_ LOAD/STORE as long as either length
or mask is valid. Length is the outcome of SELECT_VL or MIN_EXPR.
Mask is the outcome of comparison.

LEN_MASK_ LOAD/STORE format is defined as follows:
1). LEN_MASK_LOAD (ptr, align, length, mask).
2). LEN_MASK_STORE (ptr, align, length, mask, vec).

Consider these 4 following cases:

VLA: Variable-length auto-vectorization
VLS: Specific-length auto-vectorization

Case 1 (VLS): -mrvv-vector-bits=128   IR (Does not use LEN_MASK_*):
Code:                                   v1 = MEM (...)
  for (int i = 0; i < 4; i++)           v2 = MEM (...)
    a[i] = b[i] + c[i];                 v3 = v1 + v2 
                                        MEM[...] = v3

Case 2 (VLS): -mrvv-vector-bits=128   IR (LEN_MASK_* with length = VF, mask = 
comparison):
Code:                                   mask = comparison
  for (int i = 0; i < 4; i++)           v1 = LEN_MASK_LOAD (length = VF, mask)
    if (cond[i])                        v2 = LEN_MASK_LOAD (length = VF, mask) 
      a[i] = b[i] + c[i];               v3 = v1 + v2
                                        LEN_MASK_STORE (length = VF, mask, v3)
           
Case 3 (VLA):
Code:                                   loop_len = SELECT_VL or MIN
  for (int i = 0; i < n; i++)           v1 = LEN_MASK_LOAD (length = loop_len, 
mask = {-1,-1,...})
      a[i] = b[i] + c[i];               v2 = LEN_MASK_LOAD (length = loop_len, 
mask = {-1,-1,...})
                                        v3 = v1 + v2                            
                                        LEN_MASK_STORE (length = loop_len, mask 
= {-1,-1,...}, v3)

Case 4 (VLA):
Code:                                   loop_len = SELECT_VL or MIN
  for (int i = 0; i < n; i++)           mask = comparison
      if (cond[i])                      v1 = LEN_MASK_LOAD (length = loop_len, 
mask)
      a[i] = b[i] + c[i];               v2 = LEN_MASK_LOAD (length = loop_len, 
mask)
                                        v3 = v1 + v2                            
                                        LEN_MASK_STORE (length = loop_len, 
mask, v3)

More features:
1. Support simplify gimple fold for LEN_MASK_ LOAD/STORE:
   LEN_MASK_STORE (length = vf, mask = {-1,-1,...}, v) ===> MEM [...] = V
2. Allow DSE for LEN_MASK_* LOAD/STORE.

Bootstrap && Regression on X86 with no surprise difference.

gcc/ChangeLog:

        * doc/md.texi: Add LEN_MASK_ LOAD/STORE.
        * genopinit.cc (main): Ditto.
        (CMP_NAME): Ditto.
        * gimple-fold.cc (arith_overflowed_p): Ditto.
        (gimple_fold_partial_load_store_mem_ref): Ditto.
        (gimple_fold_partial_store): Ditto.
        (gimple_fold_call): Ditto.
        * internal-fn.cc (len_maskload_direct): Ditto.
        (len_maskstore_direct): Ditto.
        (expand_partial_load_optab_fn): Ditto.
        (expand_len_maskload_optab_fn): Ditto.
        (expand_partial_store_optab_fn): Ditto.
        (expand_len_maskstore_optab_fn): Ditto.
        (direct_len_maskload_optab_supported_p): Ditto.
        (direct_len_maskstore_optab_supported_p): Ditto.
        (internal_load_fn_p): Ditto.
        (internal_store_fn_p): Ditto.
        (internal_fn_mask_index): Ditto.
        (internal_fn_stored_value_index): Ditto.
        * internal-fn.def (LEN_MASK_LOAD): Ditto.
        (LEN_MASK_STORE): Ditto.
        * optabs-query.cc (can_vec_len_mask_load_store_p): Ditto.
        * optabs-query.h (can_vec_len_mask_load_store_p): Ditto.
        * optabs.def (OPTAB_CD): Ditto.
        * tree-data-ref.cc (get_references_in_stmt): Ditto.
        * tree-if-conv.cc (ifcvt_can_use_mask_load_store): Ditto.
        * tree-ssa-alias.cc (ref_maybe_used_by_call_p_1): Ditto.
        (call_may_clobber_ref_p_1): Ditto.
        * tree-ssa-dse.cc (initialize_ao_ref_for_dse): Ditto.
        (dse_optimize_stmt): Ditto.
        * tree-ssa-loop-ivopts.cc (get_mem_type_for_internal_fn): Ditto.
        (get_alias_ptr_type_for_ptr_address): Ditto.
        * tree-ssa-sccvn.cc (vn_reference_lookup_3): Ditto.
        * tree-vect-data-refs.cc (can_group_stmts_p): Ditto.
        (vect_find_stmt_data_reference): Ditto.
        (vect_supportable_dr_alignment): Ditto.
        * tree-vect-loop.cc (vect_verify_loop_lens): Ditto.
        (optimize_mask_stores): Ditto.
        * tree-vect-slp.cc (vect_get_operand_map): Ditto.
        (vect_build_slp_tree_2): Ditto.
        * tree-vect-stmts.cc (check_load_store_for_partial_vectors): Ditto.
        (vectorizable_store): Ditto.
        (vectorizable_load): Ditto.
        (vect_get_vector_types_for_stmt): Ditto.
        * tree-vectorizer.cc (try_vectorize_loop_1): Ditto.

---
 gcc/doc/md.texi             | 32 ++++++++++++
 gcc/genopinit.cc            |  6 ++-
 gcc/gimple-fold.cc          | 28 ++++++++---
 gcc/internal-fn.cc          | 37 +++++++++++++-
 gcc/internal-fn.def         |  4 ++
 gcc/optabs-query.cc         | 39 +++++++++++++++
 gcc/optabs-query.h          |  1 +
 gcc/optabs.def              |  2 +
 gcc/tree-data-ref.cc        |  4 ++
 gcc/tree-if-conv.cc         |  3 ++
 gcc/tree-ssa-alias.cc       |  3 ++
 gcc/tree-ssa-dse.cc         | 12 +++++
 gcc/tree-ssa-loop-ivopts.cc |  8 +++
 gcc/tree-ssa-sccvn.cc       |  6 +++
 gcc/tree-vect-data-refs.cc  | 20 +++++---
 gcc/tree-vect-loop.cc       | 52 +++++++++++--------
 gcc/tree-vect-slp.cc        |  5 ++
 gcc/tree-vect-stmts.cc      | 99 ++++++++++++++++++++++++++++++++++---
 gcc/tree-vectorizer.cc      |  2 +
 19 files changed, 320 insertions(+), 43 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 95f7fe1f802..fc99990465d 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5136,6 +5136,38 @@ of @code{QI} elements.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{len_maskload@var{m}@var{n}} instruction pattern
+@item @samp{len_maskload@var{m}@var{n}}
+Perform a load of vector which is predicated by length and mask
+from memory operand 1 of mode @var{m} into register operand 0.
+Length is provided in operand 2 which has whichever
+integer mode the target prefers.
+Mask is provided in register operand 3 of mode @var{n}.
+
+operand 2 can be a variable or a constant amount. It can be vectorization
+factor which is the special constant value represents the maximum length.
+
+operand 3 can be a variable or a constant amount. It can be all 1
+which is the special constant value represents the full mask.
+
+This pattern is not allowed to @code{FAIL}.
+
+@cindex @code{len_maskstore@var{m}@var{n}} instruction pattern
+@item @samp{len_maskstore@var{m}@var{n}}
+Perform a store of vector which is predicated by length and mask
+from register operand 1 of mode @var{m} into memory operand 0.
+Length is provided in operand 2 which has whichever
+integer mode the target prefers.
+Mask is provided in register operand 3 of mode @var{n}.
+
+operand 2 can be a variable or a constant amount. It can be vectorization
+factor which is the special constant value represents the maximum length.
+
+operand 3 can be a variable or a constant amount. It can be all 1
+which is the special constant value represents the full mask.
+
+This pattern is not allowed to @code{FAIL}.
+
 @cindex @code{vec_perm@var{m}} instruction pattern
 @item @samp{vec_perm@var{m}}
 Output a (variable) vector permutation.  Operand 0 is the destination
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
index 0c1b6859ca0..9aeebd66724 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -376,7 +376,8 @@ main (int argc, const char **argv)
 
   fprintf (s_file,
           "/* Returns TRUE if the target supports any of the partial vector\n"
-          "   optabs: while_ult_optab, len_load_optab or len_store_optab,\n"
+          "   optabs: while_ult_optab, len_load_optab, len_store_optab,\n"
+          "   len_maskload_optab or len_maskstore_optab,\n"
           "   for any mode.  */\n"
           "bool\npartial_vectors_supported_p (void)\n{\n");
   bool any_match = false;
@@ -386,7 +387,8 @@ main (int argc, const char **argv)
     {
 #define CMP_NAME(N) !strncmp (p->name, (N), strlen ((N)))
       if (CMP_NAME("while_ult") || CMP_NAME ("len_load")
-         || CMP_NAME ("len_store"))
+         || CMP_NAME ("len_store") || CMP_NAME ("len_maskload")
+         || CMP_NAME ("len_maskstore"))
        {
          if (first)
            fprintf (s_file, " HAVE_%s", p->name);
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 581575b65ec..a2c2ad5bfe7 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -5370,8 +5370,8 @@ arith_overflowed_p (enum tree_code code, const_tree type,
   return wi::min_precision (wres, sign) > TYPE_PRECISION (type);
 }
 
-/* If IFN_{MASK,LEN}_LOAD/STORE call CALL is unconditional, return a MEM_REF
-   for the memory it references, otherwise return null.  VECTYPE is the
+/* If IFN_{MASK,LEN,LEN_MASK}_LOAD/STORE call CALL is unconditional, return a
+   MEM_REF for the memory it references, otherwise return null.  VECTYPE is the
    type of the memory vector.  MASK_P indicates it's for MASK if true,
    otherwise it's for LEN.  */
 
@@ -5383,7 +5383,20 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, 
tree vectype, bool mask_p)
   if (!tree_fits_uhwi_p (alias_align))
     return NULL_TREE;
 
-  if (mask_p)
+  if (gimple_call_internal_fn (call) == IFN_LEN_MASK_LOAD
+      || gimple_call_internal_fn (call) == IFN_LEN_MASK_STORE)
+    {
+      tree basic_len = gimple_call_arg (call, 2);
+      if (!poly_int_tree_p (basic_len))
+       return NULL_TREE;
+      if (maybe_ne (tree_to_poly_uint64 (basic_len),
+                   TYPE_VECTOR_SUBPARTS (vectype)))
+       return NULL_TREE;
+      tree mask = gimple_call_arg (call, 3);
+      if (!integer_all_onesp (mask))
+       return NULL_TREE;
+    }
+  else if (mask_p)
     {
       tree mask = gimple_call_arg (call, 2);
       if (!integer_all_onesp (mask))
@@ -5409,7 +5422,7 @@ gimple_fold_partial_load_store_mem_ref (gcall *call, tree 
vectype, bool mask_p)
   return fold_build2 (MEM_REF, vectype, ptr, offset);
 }
 
-/* Try to fold IFN_{MASK,LEN}_LOAD call CALL.  Return true on success.
+/* Try to fold IFN_{MASK,LEN,LEN_MASK}_LOAD call CALL.  Return true on success.
    MASK_P indicates it's for MASK if true, otherwise it's for LEN.  */
 
 static bool
@@ -5431,14 +5444,15 @@ gimple_fold_partial_load (gimple_stmt_iterator *gsi, 
gcall *call, bool mask_p)
   return false;
 }
 
-/* Try to fold IFN_{MASK,LEN}_STORE call CALL.  Return true on success.
+/* Try to fold IFN_{MASK,LEN,LEN_MASK}_STORE call CALL.  Return true on 
success.
    MASK_P indicates it's for MASK if true, otherwise it's for LEN.  */
 
 static bool
 gimple_fold_partial_store (gimple_stmt_iterator *gsi, gcall *call,
                           bool mask_p)
 {
-  tree rhs = gimple_call_arg (call, 3);
+  tree rhs = gimple_call_arg (
+    call, gimple_call_internal_fn (call) == IFN_LEN_MASK_STORE ? 4 : 3);
   if (tree lhs
       = gimple_fold_partial_load_store_mem_ref (call, TREE_TYPE (rhs), mask_p))
     {
@@ -5659,9 +5673,11 @@ gimple_fold_call (gimple_stmt_iterator *gsi, bool 
inplace)
          cplx_result = true;
          break;
        case IFN_MASK_LOAD:
+       case IFN_LEN_MASK_LOAD:
          changed |= gimple_fold_partial_load (gsi, stmt, true);
          break;
        case IFN_MASK_STORE:
+       case IFN_LEN_MASK_STORE:
          changed |= gimple_fold_partial_store (gsi, stmt, true);
          break;
        case IFN_LEN_LOAD:
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index da9b944dd5d..4a9fe388eed 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -165,6 +165,7 @@ init_internal_fns ()
 #define mask_load_lanes_direct { -1, -1, false }
 #define gather_load_direct { 3, 1, false }
 #define len_load_direct { -1, -1, false }
+#define len_maskload_direct { -1, 3, false }
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
@@ -172,6 +173,7 @@ init_internal_fns ()
 #define vec_cond_direct { 2, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define len_store_direct { 3, 3, false }
+#define len_maskstore_direct { 4, 3, false }
 #define vec_set_direct { 3, 3, false }
 #define unary_direct { 0, 0, true }
 #define unary_convert_direct { -1, 0, true }
@@ -2875,6 +2877,17 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
       create_input_operand (&ops[3], bias, QImode);
       expand_insn (icode, 4, ops);
     }
+  else if (optab == len_maskload_optab)
+    {
+      create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE 
(maskt)),
+                                  TYPE_UNSIGNED (TREE_TYPE (maskt)));
+      maskt = gimple_call_arg (stmt, 3);
+      mask = expand_normal (maskt);
+      create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt)));
+      icode = convert_optab_handler (optab, TYPE_MODE (type),
+                                    TYPE_MODE (TREE_TYPE (maskt)));
+      expand_insn (icode, 4, ops);
+    }
   else
     {
       create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
@@ -2888,6 +2901,7 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
 #define expand_mask_load_optab_fn expand_partial_load_optab_fn
 #define expand_mask_load_lanes_optab_fn expand_mask_load_optab_fn
 #define expand_len_load_optab_fn expand_partial_load_optab_fn
+#define expand_len_maskload_optab_fn expand_partial_load_optab_fn
 
 /* Expand MASK_STORE{,_LANES} or LEN_STORE call STMT using optab OPTAB.  */
 
@@ -2900,7 +2914,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
   insn_code icode;
 
   maskt = gimple_call_arg (stmt, 2);
-  rhs = gimple_call_arg (stmt, 3);
+  rhs = gimple_call_arg (stmt, optab == len_maskstore_optab ? 4 : 3);
   type = TREE_TYPE (rhs);
   lhs = expand_call_mem_ref (type, stmt, 0);
 
@@ -2927,6 +2941,16 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
       create_input_operand (&ops[3], bias, QImode);
       expand_insn (icode, 4, ops);
     }
+  else if (optab == len_maskstore_optab)
+    {
+      create_convert_operand_from (&ops[2], mask, TYPE_MODE (TREE_TYPE 
(maskt)),
+                                  TYPE_UNSIGNED (TREE_TYPE (maskt)));
+      maskt = gimple_call_arg (stmt, 3);
+      mask = expand_normal (maskt);
+      create_input_operand (&ops[3], mask, TYPE_MODE (TREE_TYPE (maskt)));
+      icode = convert_optab_handler (optab, TYPE_MODE (type), GET_MODE (mask));
+      expand_insn (icode, 4, ops);
+    }
   else
     {
       create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
@@ -2937,6 +2961,7 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
 #define expand_mask_store_optab_fn expand_partial_store_optab_fn
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 #define expand_len_store_optab_fn expand_partial_store_optab_fn
+#define expand_len_maskstore_optab_fn expand_partial_store_optab_fn
 
 /* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
    The expansion of STMT happens based on OPTAB table associated.  */
@@ -3890,6 +3915,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_gather_load_optab_supported_p convert_optab_supported_p
 #define direct_len_load_optab_supported_p direct_optab_supported_p
+#define direct_len_maskload_optab_supported_p convert_optab_supported_p
 #define direct_mask_store_optab_supported_p convert_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p 
multi_vector_optab_supported_p
@@ -3897,6 +3923,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
 #define direct_vec_cond_optab_supported_p convert_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_len_store_optab_supported_p direct_optab_supported_p
+#define direct_len_maskstore_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
 #define direct_fold_left_optab_supported_p direct_optab_supported_p
@@ -4361,6 +4388,7 @@ internal_load_fn_p (internal_fn fn)
     case IFN_GATHER_LOAD:
     case IFN_MASK_GATHER_LOAD:
     case IFN_LEN_LOAD:
+    case IFN_LEN_MASK_LOAD:
       return true;
 
     default:
@@ -4381,6 +4409,7 @@ internal_store_fn_p (internal_fn fn)
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
+    case IFN_LEN_MASK_STORE:
       return true;
 
     default:
@@ -4420,6 +4449,10 @@ internal_fn_mask_index (internal_fn fn)
     case IFN_MASK_STORE_LANES:
       return 2;
 
+    case IFN_LEN_MASK_LOAD:
+    case IFN_LEN_MASK_STORE:
+      return 3;
+
     case IFN_MASK_GATHER_LOAD:
     case IFN_MASK_SCATTER_STORE:
       return 4;
@@ -4444,6 +4477,8 @@ internal_fn_stored_value_index (internal_fn fn)
     case IFN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
       return 3;
+    case IFN_LEN_MASK_STORE:
+      return 4;
 
     default:
       return -1;
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 5d638de6d06..cf0bcea5ac7 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -50,12 +50,14 @@ along with GCC; see the file COPYING3.  If not see
    - mask_load_lanes: currently just vec_mask_load_lanes
    - gather_load: used for {mask_,}gather_load
    - len_load: currently just len_load
+   - len_maskload: currently just len_maskload
 
    - mask_store: currently just maskstore
    - store_lanes: currently just vec_store_lanes
    - mask_store_lanes: currently just vec_mask_store_lanes
    - scatter_store: used for {mask_,}scatter_store
    - len_store: currently just len_store
+   - len_maskstore: currently just len_maskstore
 
    - unary: a normal unary optab, such as vec_reverse_<mode>
    - binary: a normal binary optab, such as vec_interleave_lo_<mode>
@@ -157,6 +159,7 @@ DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
                       mask_gather_load, gather_load)
 
 DEF_INTERNAL_OPTAB_FN (LEN_LOAD, ECF_PURE, len_load, len_load)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_LOAD, ECF_PURE, len_maskload, len_maskload)
 
 DEF_INTERNAL_OPTAB_FN (SCATTER_STORE, 0, scatter_store, scatter_store)
 DEF_INTERNAL_OPTAB_FN (MASK_SCATTER_STORE, 0,
@@ -175,6 +178,7 @@ DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, 
vec_cond_mask)
 DEF_INTERNAL_OPTAB_FN (VEC_SET, 0, vec_set, vec_set)
 
 DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store)
+DEF_INTERNAL_OPTAB_FN (LEN_MASK_STORE, 0, len_maskstore, len_maskstore)
 
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, binary)
diff --git a/gcc/optabs-query.cc b/gcc/optabs-query.cc
index 276f8408dd7..ec765e78088 100644
--- a/gcc/optabs-query.cc
+++ b/gcc/optabs-query.cc
@@ -624,6 +624,45 @@ get_len_load_store_mode (machine_mode mode, bool is_load)
   return opt_machine_mode ();
 }
 
+/* Return true if target supports vector length && masked load/store for mode.
+   Length is used on loop control and mask is used on flow control.  */
+
+bool
+can_vec_len_mask_load_store_p (machine_mode mode, bool is_load)
+{
+  optab op = is_load ? len_maskload_optab : len_maskstore_optab;
+  machine_mode vmode;
+  machine_mode mask_mode;
+
+  /* If mode is vector mode, check it directly.  */
+  if (VECTOR_MODE_P (mode))
+    return targetm.vectorize.get_mask_mode (mode).exists (&mask_mode)
+          && convert_optab_handler (op, mode, mask_mode) != CODE_FOR_nothing;
+
+  scalar_mode smode;
+  if (is_a<scalar_mode> (mode, &smode))
+    /* See if there is any chance the mask load or store might be
+       vectorized.  If not, punt.  */
+    vmode = targetm.vectorize.preferred_simd_mode (smode);
+  else
+    vmode = mode;
+
+  if (VECTOR_MODE_P (vmode)
+      && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
+      && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+    return true;
+
+  auto_vector_modes vector_modes;
+  targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
+  for (machine_mode base_mode : vector_modes)
+    if (related_vector_mode (base_mode, smode).exists (&vmode)
+       && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
+       && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+      return true;
+
+  return false;
+}
+
 /* Return true if there is a compare_and_swap pattern.  */
 
 bool
diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
index b266d2fe990..2b9c9b44af2 100644
--- a/gcc/optabs-query.h
+++ b/gcc/optabs-query.h
@@ -189,6 +189,7 @@ enum insn_code find_widening_optab_handler_and_mode (optab, 
machine_mode,
 int can_mult_highpart_p (machine_mode, bool);
 bool can_vec_mask_load_store_p (machine_mode, machine_mode, bool);
 opt_machine_mode get_len_load_store_mode (machine_mode, bool);
+bool can_vec_len_mask_load_store_p (machine_mode, bool);
 bool can_compare_and_swap_p (machine_mode, bool);
 bool can_atomic_exchange_p (machine_mode, bool);
 bool can_atomic_load_p (machine_mode);
diff --git a/gcc/optabs.def b/gcc/optabs.def
index f31b69c5d85..f5401aea364 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -91,6 +91,8 @@ OPTAB_CD(vec_cmpu_optab, "vec_cmpu$a$b")
 OPTAB_CD(vec_cmpeq_optab, "vec_cmpeq$a$b")
 OPTAB_CD(maskload_optab, "maskload$a$b")
 OPTAB_CD(maskstore_optab, "maskstore$a$b")
+OPTAB_CD(len_maskload_optab, "len_maskload$a$b")
+OPTAB_CD(len_maskstore_optab, "len_maskstore$a$b")
 OPTAB_CD(gather_load_optab, "gather_load$a$b")
 OPTAB_CD(mask_gather_load_optab, "mask_gather_load$a$b")
 OPTAB_CD(scatter_store_optab, "scatter_store$a$b")
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index b576cce6db6..99aca44e6a5 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -5816,6 +5816,8 @@ get_references_in_stmt (gimple *stmt, vec<data_ref_loc, 
va_heap> *references)
            }
          case IFN_MASK_LOAD:
          case IFN_MASK_STORE:
+         case IFN_LEN_MASK_LOAD:
+         case IFN_LEN_MASK_STORE:
            break;
          default:
            clobbers_memory = true;
@@ -5861,11 +5863,13 @@ get_references_in_stmt (gimple *stmt, vec<data_ref_loc, 
va_heap> *references)
        switch (gimple_call_internal_fn (stmt))
          {
          case IFN_MASK_LOAD:
+         case IFN_LEN_MASK_LOAD:
            if (gimple_call_lhs (stmt) == NULL_TREE)
              break;
            ref.is_read = true;
            /* FALLTHRU */
          case IFN_MASK_STORE:
+         case IFN_LEN_MASK_STORE:
            ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
            align = tree_to_shwi (gimple_call_arg (stmt, 1));
            if (ref.is_read)
diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc
index 1393ce184e3..0f549fa528d 100644
--- a/gcc/tree-if-conv.cc
+++ b/gcc/tree-if-conv.cc
@@ -960,6 +960,9 @@ ifcvt_can_use_mask_load_store (gimple *stmt)
   if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
     return true;
 
+  if (can_vec_len_mask_load_store_p (mode, is_load))
+    return true;
+
   return false;
 }
 
diff --git a/gcc/tree-ssa-alias.cc b/gcc/tree-ssa-alias.cc
index 79ed956e300..100c4b2e7d9 100644
--- a/gcc/tree-ssa-alias.cc
+++ b/gcc/tree-ssa-alias.cc
@@ -2815,11 +2815,13 @@ ref_maybe_used_by_call_p_1 (gcall *call, ao_ref *ref, 
bool tbaa_p)
       case IFN_SCATTER_STORE:
       case IFN_MASK_SCATTER_STORE:
       case IFN_LEN_STORE:
+      case IFN_LEN_MASK_STORE:
        return false;
       case IFN_MASK_STORE_LANES:
        goto process_args;
       case IFN_MASK_LOAD:
       case IFN_LEN_LOAD:
+      case IFN_LEN_MASK_LOAD:
       case IFN_MASK_LOAD_LANES:
        {
          ao_ref rhs_ref;
@@ -3065,6 +3067,7 @@ call_may_clobber_ref_p_1 (gcall *call, ao_ref *ref, bool 
tbaa_p)
        return false;
       case IFN_MASK_STORE:
       case IFN_LEN_STORE:
+      case IFN_LEN_MASK_STORE:
       case IFN_MASK_STORE_LANES:
        {
          tree rhs = gimple_call_arg (call,
diff --git a/gcc/tree-ssa-dse.cc b/gcc/tree-ssa-dse.cc
index eabe8ba4522..acaf844b8ef 100644
--- a/gcc/tree-ssa-dse.cc
+++ b/gcc/tree-ssa-dse.cc
@@ -174,6 +174,17 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write, 
bool may_def_ok = false)
              return true;
            }
          break;
+       case IFN_LEN_MASK_STORE:
+         /* We cannot initialize a must-def ao_ref (in all cases) but we
+            can provide a may-def variant.  */
+         if (may_def_ok)
+           {
+             ao_ref_init_from_ptr_and_size
+                 (write, gimple_call_arg (stmt, 0),
+                  TYPE_SIZE_UNIT (TREE_TYPE (gimple_call_arg (stmt, 4))));
+             return true;
+           }
+         break;
        default:;
        }
     }
@@ -1483,6 +1494,7 @@ dse_optimize_stmt (function *fun, gimple_stmt_iterator 
*gsi, sbitmap live_bytes)
        {
        case IFN_LEN_STORE:
        case IFN_MASK_STORE:
+       case IFN_LEN_MASK_STORE:
          {
            enum dse_store_status store_status;
            store_status = dse_classify_store (&ref, stmt, false, live_bytes);
diff --git a/gcc/tree-ssa-loop-ivopts.cc b/gcc/tree-ssa-loop-ivopts.cc
index 6fbd2d59318..e8e9df1ab74 100644
--- a/gcc/tree-ssa-loop-ivopts.cc
+++ b/gcc/tree-ssa-loop-ivopts.cc
@@ -2439,6 +2439,7 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
     case IFN_MASK_LOAD:
     case IFN_MASK_LOAD_LANES:
     case IFN_LEN_LOAD:
+    case IFN_LEN_MASK_LOAD:
       if (op_p == gimple_call_arg_ptr (call, 0))
        return TREE_TYPE (gimple_call_lhs (call));
       return NULL_TREE;
@@ -2450,6 +2451,11 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
        return TREE_TYPE (gimple_call_arg (call, 3));
       return NULL_TREE;
 
+    case IFN_LEN_MASK_STORE:
+      if (op_p == gimple_call_arg_ptr (call, 0))
+       return TREE_TYPE (gimple_call_arg (call, 4));
+      return NULL_TREE;
+
     default:
       return NULL_TREE;
     }
@@ -7555,6 +7561,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
     case IFN_MASK_STORE_LANES:
     case IFN_LEN_LOAD:
     case IFN_LEN_STORE:
+    case IFN_LEN_MASK_LOAD:
+    case IFN_LEN_MASK_STORE:
       /* The second argument contains the correct alias type.  */
       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
       return TREE_TYPE (gimple_call_arg (call, 1));
diff --git a/gcc/tree-ssa-sccvn.cc b/gcc/tree-ssa-sccvn.cc
index 27c84e78fcf..02fbc4a2dfa 100644
--- a/gcc/tree-ssa-sccvn.cc
+++ b/gcc/tree-ssa-sccvn.cc
@@ -3304,6 +3304,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
          if (!tree_fits_uhwi_p (len) || !tree_fits_shwi_p (bias))
            return (void *)-1;
          break;
+       case IFN_LEN_MASK_STORE:
+         len = gimple_call_arg (call, 2);
+         mask = gimple_call_arg (call, internal_fn_mask_index (fn));
+         if (!tree_fits_uhwi_p (len) || TREE_CODE (mask) != VECTOR_CST)
+           return (void *)-1;
+         break;
        default:
          return (void *)-1;
        }
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
index ebe93832b1e..fb83446519a 100644
--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
@@ -3039,17 +3039,21 @@ can_group_stmts_p (stmt_vec_info stmt1_info, 
stmt_vec_info stmt2_info,
       if (!call2 || !gimple_call_internal_p (call2))
        return false;
       internal_fn ifn = gimple_call_internal_fn (call1);
-      if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
+      if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE
+         && ifn != IFN_LEN_MASK_LOAD && ifn != IFN_LEN_MASK_STORE)
        return false;
       if (ifn != gimple_call_internal_fn (call2))
        return false;
 
       /* Check that the masks are the same.  Cope with casts of masks,
         like those created by build_mask_conversion.  */
-      tree mask1 = gimple_call_arg (call1, 2);
-      tree mask2 = gimple_call_arg (call2, 2);
+      unsigned int mask_argno
+       = ifn == IFN_LEN_MASK_LOAD || ifn == IFN_LEN_MASK_STORE ? 3 : 2;
+      tree mask1 = gimple_call_arg (call1, mask_argno);
+      tree mask2 = gimple_call_arg (call2, mask_argno);
       if (!operand_equal_p (mask1, mask2, 0)
-          && (ifn == IFN_MASK_STORE || !allow_slp_p))
+         && (ifn == IFN_MASK_STORE || ifn == IFN_LEN_MASK_STORE
+             || !allow_slp_p))
        {
          mask1 = strip_conversion (mask1);
          if (!mask1)
@@ -4292,7 +4296,9 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
   if (gcall *call = dyn_cast <gcall *> (stmt))
     if (!gimple_call_internal_p (call)
        || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
-           && gimple_call_internal_fn (call) != IFN_MASK_STORE))
+           && gimple_call_internal_fn (call) != IFN_MASK_STORE
+           && gimple_call_internal_fn (call) != IFN_LEN_MASK_LOAD
+           && gimple_call_internal_fn (call) != IFN_LEN_MASK_STORE))
       {
        free_data_ref (dr);
        return opt_result::failure_at (stmt,
@@ -6731,7 +6737,9 @@ vect_supportable_dr_alignment (vec_info *vinfo, 
dr_vec_info *dr_info,
   if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
     if (gimple_call_internal_p (stmt)
        && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
-           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
+           || gimple_call_internal_fn (stmt) == IFN_MASK_STORE
+           || gimple_call_internal_fn (stmt) == IFN_LEN_MASK_LOAD
+           || gimple_call_internal_fn (stmt) == IFN_LEN_MASK_STORE))
       return dr_unaligned_supported;
 
   if (loop_vinfo)
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index ace9e759f5b..03de41d4988 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1296,30 +1296,33 @@ vect_verify_loop_lens (loop_vec_info loop_vinfo)
   if (LOOP_VINFO_LENS (loop_vinfo).is_empty ())
     return false;
 
-  machine_mode len_load_mode = get_len_load_store_mode
-    (loop_vinfo->vector_mode, true).require ();
-  machine_mode len_store_mode = get_len_load_store_mode
-    (loop_vinfo->vector_mode, false).require ();
+  if (!can_vec_len_mask_load_store_p (loop_vinfo->vector_mode, true)
+      && !can_vec_len_mask_load_store_p (loop_vinfo->vector_mode, false))
+    {
+      machine_mode len_load_mode
+       = get_len_load_store_mode (loop_vinfo->vector_mode, true).require ();
+      machine_mode len_store_mode
+       = get_len_load_store_mode (loop_vinfo->vector_mode, false).require ();
 
-  signed char partial_load_bias = internal_len_load_store_bias
-    (IFN_LEN_LOAD, len_load_mode);
+      signed char partial_load_bias
+       = internal_len_load_store_bias (IFN_LEN_LOAD, len_load_mode);
 
-  signed char partial_store_bias = internal_len_load_store_bias
-    (IFN_LEN_STORE, len_store_mode);
+      signed char partial_store_bias
+       = internal_len_load_store_bias (IFN_LEN_STORE, len_store_mode);
 
-  gcc_assert (partial_load_bias == partial_store_bias);
+      gcc_assert (partial_load_bias == partial_store_bias);
 
-  if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED)
-    return false;
+      if (partial_load_bias == VECT_PARTIAL_BIAS_UNSUPPORTED)
+       return false;
 
-  /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
-     len_loads with a length of zero.  In order to avoid that we prohibit
-     more than one loop length here.  */
-  if (partial_load_bias == -1
-      && LOOP_VINFO_LENS (loop_vinfo).length () > 1)
-    return false;
+      /* If the backend requires a bias of -1 for LEN_LOAD, we must not emit
+        len_loads with a length of zero.  In order to avoid that we prohibit
+        more than one loop length here.  */
+      if (partial_load_bias == -1 && LOOP_VINFO_LENS (loop_vinfo).length () > 
1)
+       return false;
 
-  LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias;
+      LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo) = partial_load_bias;
+    }
 
   unsigned int max_nitems_per_iter = 1;
   unsigned int i;
@@ -11317,7 +11320,8 @@ optimize_mask_stores (class loop *loop)
           gsi_next (&gsi))
        {
          stmt = gsi_stmt (gsi);
-         if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
+         if (gimple_call_internal_p (stmt, IFN_MASK_STORE)
+             || gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE))
            worklist.safe_push (stmt);
        }
     }
@@ -11340,7 +11344,8 @@ optimize_mask_stores (class loop *loop)
       tree zero;
 
       last = worklist.pop ();
-      mask = gimple_call_arg (last, 2);
+      mask = gimple_call_arg (
+       last, gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE) ? 3 : 2);
       bb = gimple_bb (last);
       /* Create then_bb and if-then structure in CFG, then_bb belongs to
         the same loop as if_bb.  It could be different to LOOP when two
@@ -11473,7 +11478,12 @@ optimize_mask_stores (class loop *loop)
            }
          /* Put other masked stores with the same mask to STORE_BB.  */
          if (worklist.is_empty ()
-             || gimple_call_arg (worklist.last (), 2) != mask
+             || gimple_call_arg (worklist.last (),
+                                 gimple_call_internal_p (worklist.last (),
+                                                         IFN_LEN_MASK_STORE)
+                                   ? 3
+                                   : 2)
+                  != mask
              || worklist.last () != stmt1)
            break;
          last = worklist.pop ();
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index ab89a82f1b3..937b5295df4 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -489,6 +489,7 @@ static const int cond_expr_maps[3][5] = {
 };
 static const int arg1_map[] = { 1, 1 };
 static const int arg2_map[] = { 1, 2 };
+static const int arg3_map[] = { 1, 3 };
 static const int arg1_arg4_map[] = { 2, 1, 4 };
 static const int op1_op0_map[] = { 2, 1, 0 };
 
@@ -524,6 +525,9 @@ vect_get_operand_map (const gimple *stmt, unsigned char 
swap = 0)
          case IFN_MASK_LOAD:
            return arg2_map;
 
+         case IFN_LEN_MASK_LOAD:
+           return arg3_map;
+
          case IFN_GATHER_LOAD:
            return arg1_map;
 
@@ -1779,6 +1783,7 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
     {
       if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
        gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)
+                   || gimple_call_internal_p (stmt, IFN_LEN_MASK_LOAD)
                    || gimple_call_internal_p (stmt, IFN_GATHER_LOAD)
                    || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD));
       else
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a7acc032d47..9b797c61c88 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -1837,6 +1837,15 @@ check_load_store_for_partial_vectors (loop_vec_info 
loop_vinfo, tree vectype,
       using_partial_vectors_p = true;
     }
 
+  if (can_vec_len_mask_load_store_p (vecmode, is_load))
+    {
+      nvectors = group_memory_nvectors (group_size * vf, nunits);
+      /* Length is used on loop control and mask for flow control.*/
+      vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
+      vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
+      using_partial_vectors_p = true;
+    }
+
   if (!using_partial_vectors_p)
     {
       if (dump_enabled_p ())
@@ -7978,8 +7987,9 @@ vectorizable_store (vec_info *vinfo,
       if (memory_access_type == VMAT_CONTIGUOUS)
        {
          if (!VECTOR_MODE_P (vec_mode)
-             || !can_vec_mask_load_store_p (vec_mode,
-                                            TYPE_MODE (mask_vectype), false))
+             || (!can_vec_mask_load_store_p (vec_mode,
+                                             TYPE_MODE (mask_vectype), false)
+                 && !can_vec_len_mask_load_store_p (vec_mode, false)))
            return false;
        }
       else if (memory_access_type != VMAT_LOAD_STORE_LANES
@@ -8942,7 +8952,38 @@ vectorizable_store (vec_info *vinfo,
                }
 
              /* Arguments are ready.  Create the new vector stmt.  */
-             if (final_mask)
+             if (can_vec_len_mask_load_store_p (TYPE_MODE (vectype), false)
+                 && (final_mask || loop_lens))
+               {
+                 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
+                 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+                 if (!final_mask)
+                   {
+                     machine_mode mask_mode
+                       = targetm.vectorize.get_mask_mode (TYPE_MODE (vectype))
+                           .require ();
+                     mask_vectype
+                       = build_truth_vector_type_for_mode (nunits, mask_mode);
+                     tree mask = build_int_cst (TREE_TYPE (mask_vectype), -1);
+                     final_mask = build_vector_from_val (mask_vectype, mask);
+                   }
+                 tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+                 tree final_len;
+                 if (loop_lens)
+                   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                                  vec_num * ncopies, vectype,
+                                                  vec_num * j + i, 1);
+                 else
+                   final_len = build_int_cst (iv_type, nunits);
+                 gcall *call
+                   = gimple_build_call_internal (IFN_LEN_MASK_STORE, 5,
+                                                 dataref_ptr, ptr, final_len,
+                                                 final_mask, vec_oprnd);
+                 gimple_call_set_nothrow (call, true);
+                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
+                 new_stmt = call;
+               }
+             else if (final_mask)
                {
                  tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
                  gcall *call
@@ -9407,8 +9448,9 @@ vectorizable_load (vec_info *vinfo,
        {
          machine_mode vec_mode = TYPE_MODE (vectype);
          if (!VECTOR_MODE_P (vec_mode)
-             || !can_vec_mask_load_store_p (vec_mode,
-                                            TYPE_MODE (mask_vectype), true))
+             || (!can_vec_mask_load_store_p (vec_mode,
+                                             TYPE_MODE (mask_vectype), true)
+                 && !can_vec_len_mask_load_store_p (vec_mode, false)))
            return false;
        }
       else if (memory_access_type != VMAT_LOAD_STORE_LANES
@@ -10301,7 +10343,47 @@ vectorizable_load (vec_info *vinfo,
                                              align, misalign);
                    align = least_bit_hwi (misalign | align);
 
-                   if (final_mask)
+                   if (can_vec_len_mask_load_store_p (TYPE_MODE (vectype),
+                                                      true)
+                       && (final_mask || loop_lens)
+                       && memory_access_type != VMAT_INVARIANT)
+                     {
+                       tree ptr
+                         = build_int_cst (ref_type, align * BITS_PER_UNIT);
+                       poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+                       if (!final_mask)
+                         {
+                           machine_mode mask_mode
+                             = targetm.vectorize
+                                 .get_mask_mode (TYPE_MODE (vectype))
+                                 .require ();
+                           mask_vectype
+                             = build_truth_vector_type_for_mode (nunits,
+                                                                 mask_mode);
+                           tree mask
+                             = build_int_cst (TREE_TYPE (mask_vectype), -1);
+                           final_mask
+                             = build_vector_from_val (mask_vectype, mask);
+                         }
+                       tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
+                       tree final_len;
+                       if (loop_lens)
+                         final_len
+                           = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+                                                vec_num * ncopies, vectype,
+                                                vec_num * j + i, 1);
+                       else
+                         final_len = build_int_cst (iv_type, nunits);
+
+                       gcall *call
+                         = gimple_build_call_internal (IFN_LEN_MASK_LOAD, 4,
+                                                       dataref_ptr, ptr,
+                                                       final_len, final_mask);
+                       gimple_call_set_nothrow (call, true);
+                       new_stmt = call;
+                       data_ref = NULL_TREE;
+                     }
+                   else if (final_mask)
                      {
                        tree ptr = build_int_cst (ref_type,
                                                  align * BITS_PER_UNIT);
@@ -13027,7 +13109,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, 
stmt_vec_info stmt_info,
 
   if (gimple_get_lhs (stmt) == NULL_TREE
       /* MASK_STORE has no lhs, but is ok.  */
-      && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
+      && !gimple_call_internal_p (stmt, IFN_MASK_STORE)
+      && !gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE))
     {
       if (is_a <gcall *> (stmt))
        {
@@ -13071,6 +13154,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, 
stmt_vec_info stmt_info,
        scalar_type = TREE_TYPE (DR_REF (dr));
       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
        scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
+      else if (gimple_call_internal_p (stmt, IFN_LEN_MASK_STORE))
+       scalar_type = TREE_TYPE (gimple_call_arg (stmt, 4));
       else
        scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
 
diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
index a048e9d8917..19312404ac4 100644
--- a/gcc/tree-vectorizer.cc
+++ b/gcc/tree-vectorizer.cc
@@ -1101,6 +1101,8 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> 
*&simduid_to_vf_htab,
                {
                  internal_fn ifn = gimple_call_internal_fn (call);
                  if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE
+                     || ifn == IFN_LEN_MASK_LOAD
+                     || ifn == IFN_LEN_MASK_STORE
                      /* Don't keep the if-converted parts when the ifn with
                         specifc type is not supported by the backend.  */
                      || (direct_internal_fn_p (ifn)
-- 
2.36.1


Reply via email to