On Tue, 15 Aug 2023, juzhe.zh...@rivai.ai wrote:

> Hi, Richi.
> 
> I realize this code perform analysis for load/store
> 
> +  internal_fn lanes_ifn;
>    if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, 
> vls_type,
>                             ncopies, &memory_access_type, &poffset,
> -                           &alignment_support_scheme, &misalignment, 
> &gs_info))
> +                           &alignment_support_scheme, &misalignment, 
> &gs_info,
> +                           &lanes_ifn))
> 
> This function generate gather/scatter info "gs_info", using same approach.
> 
> add "&lanes_ifn" here which compute IFN for lanes load/store.
> 
> Does it reasonable ?

Ah, OK.  I guess re-computing it is OK then (once).

Richard.

> Thanks.
> 
> 
> juzhe.zh...@rivai.ai
>  
> From: Richard Biener
> Date: 2023-08-15 19:19
> To: juzhe.zh...@rivai.ai
> CC: gcc-patches; richard.sandiford
> Subject: Re: Re: [PATCH] VECT: Apply MASK_LEN_{LOAD_LANES,STORE_LANES} into 
> vectorizer
> On Tue, 15 Aug 2023, juzhe.zh...@rivai.ai wrote:
>  
> > Hi, Richi.
> > 
> > > +   if (vect_store_lanes_supported (vectype, group_size, false)
> > > +       == IFN_MASK_LEN_STORE_LANES)
> > 
> > >> can you use the previously computed 'ifn' here please?
> > 
> > Do you mean rewrite the codes as follows :?
> > 
> > internal_fn lanes_ifn = vect_store_lanes_supported (vectype, group_size, 
> > false);
> > 
> > if (lanes_ifn == IFN_MASK_LEN_STORE_LANES).
>  
> The vect_store_lanes_supported is performed during analysis already
> and ideally we'd not re-do such check, so please save it in a
> variable at that point.
> > >> I think the patch needs refreshing after r14-3214-ga74d0d36a3f337.
> > 
> > Yeah, working on it and I will test on both X86 and ARM.
> > 
> > Thanks.
> > 
> > 
> > juzhe.zh...@rivai.ai
> >  
> > From: Richard Biener
> > Date: 2023-08-15 17:40
> > To: Ju-Zhe Zhong
> > CC: gcc-patches; richard.sandiford
> > Subject: Re: [PATCH] VECT: Apply MASK_LEN_{LOAD_LANES,STORE_LANES} into 
> > vectorizer
> > On Mon, 14 Aug 2023, juzhe.zh...@rivai.ai wrote:
> >  
> > > From: Ju-Zhe Zhong <juzhe.zh...@rivai.ai>
> > > 
> > > Hi, Richard and Richi.
> > > 
> > > This patch is adding MASK_LEN_{LOAD_LANES,STORE_LANES} support into 
> > > vectorizer.
> > > 
> > > Consider this simple case:
> > > 
> > > void __attribute__ ((noinline, noclone))
> > > foo (int *__restrict a, int *__restrict b, int *__restrict c,
> > >   int *__restrict d, int *__restrict e, int *__restrict f,
> > >   int *__restrict g, int *__restrict h, int *__restrict j, int n)
> > > {
> > >   for (int i = 0; i < n; ++i)
> > >     {
> > >       a[i] = j[i * 8];
> > >       b[i] = j[i * 8 + 1];
> > >       c[i] = j[i * 8 + 2];
> > >       d[i] = j[i * 8 + 3];
> > >       e[i] = j[i * 8 + 4];
> > >       f[i] = j[i * 8 + 5];
> > >       g[i] = j[i * 8 + 6];
> > >       h[i] = j[i * 8 + 7];
> > >     }
> > > }
> > > 
> > > RVV Gimple IR:
> > > 
> > >   _79 = .SELECT_VL (ivtmp_81, POLY_INT_CST [4, 4]);
> > >   ivtmp_125 = _79 * 32;
> > >   vect_array.8 = .MASK_LEN_LOAD_LANES (vectp_j.6_124, 32B, { -1, ... }, 
> > > _79, 0);
> > >   vect__8.9_122 = vect_array.8[0];
> > >   vect__8.10_121 = vect_array.8[1];
> > >   vect__8.11_120 = vect_array.8[2];
> > >   vect__8.12_119 = vect_array.8[3];
> > >   vect__8.13_118 = vect_array.8[4];
> > >   vect__8.14_117 = vect_array.8[5];
> > >   vect__8.15_116 = vect_array.8[6];
> > >   vect__8.16_115 = vect_array.8[7];
> > >   vect_array.8 ={v} {CLOBBER};
> > >   ivtmp_114 = _79 * 4;
> > >   .MASK_LEN_STORE (vectp_a.17_113, 32B, { -1, ... }, _79, 0, 
> > > vect__8.9_122);
> > >   .MASK_LEN_STORE (vectp_b.19_109, 32B, { -1, ... }, _79, 0, 
> > > vect__8.10_121);
> > >   .MASK_LEN_STORE (vectp_c.21_105, 32B, { -1, ... }, _79, 0, 
> > > vect__8.11_120);
> > >   .MASK_LEN_STORE (vectp_d.23_101, 32B, { -1, ... }, _79, 0, 
> > > vect__8.12_119);
> > >   .MASK_LEN_STORE (vectp_e.25_97, 32B, { -1, ... }, _79, 0, 
> > > vect__8.13_118);
> > >   .MASK_LEN_STORE (vectp_f.27_93, 32B, { -1, ... }, _79, 0, 
> > > vect__8.14_117);
> > >   .MASK_LEN_STORE (vectp_g.29_89, 32B, { -1, ... }, _79, 0, 
> > > vect__8.15_116);
> > >   .MASK_LEN_STORE (vectp_h.31_85, 32B, { -1, ... }, _79, 0, 
> > > vect__8.16_115);
> > > 
> > > ASM:
> > > 
> > > foo:
> > > lw t4,8(sp)
> > > ld t5,0(sp)
> > > ble t4,zero,.L5
> > > .L3:
> > > vsetvli t1,t4,e8,mf4,ta,ma
> > > vlseg8e32.v v8,(t5)
> > > slli t3,t1,2
> > > slli t6,t1,5
> > > vse32.v v8,0(a0)
> > > vse32.v v9,0(a1)
> > > vse32.v v10,0(a2)
> > > vse32.v v11,0(a3)
> > > vse32.v v12,0(a4)
> > > vse32.v v13,0(a5)
> > > vse32.v v14,0(a6)
> > > vse32.v v15,0(a7)
> > > sub t4,t4,t1
> > > add t5,t5,t6
> > > add a0,a0,t3
> > > add a1,a1,t3
> > > add a2,a2,t3
> > > add a3,a3,t3
> > > add a4,a4,t3
> > > add a5,a5,t3
> > > add a6,a6,t3
> > > add a7,a7,t3
> > > bne t4,zero,.L3
> > > .L5:
> > > ret
> > > 
> > > The details of the approach:
> > > 
> > > Step 1 - Modifiy the LANES LOAD/STORE support function 
> > > (vect_load_lanes_supported/vect_store_lanes_supported):
> > > 
> > > +/* Return FN if vec_{masked_,mask_len,}load_lanes is available for COUNT
> > > +   vectors of type VECTYPE.  MASKED_P says whether the masked form is 
> > > needed. */
> > >  
> > > -bool
> > > +internal_fn
> > >  vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> > >     bool masked_p)
> > >  {
> > > -  if (masked_p)
> > > -    return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
> > > - vec_mask_load_lanes_optab,
> > > - vectype, count);
> > > +  if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
> > > +     vec_mask_len_load_lanes_optab,
> > > +     vectype, count))
> > > +    return IFN_MASK_LEN_LOAD_LANES;
> > > +  else if (masked_p)
> > > +    {
> > > +      if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
> > > + vec_mask_load_lanes_optab,
> > > + vectype, count))
> > > + return IFN_MASK_LOAD_LANES;
> > > +    }
> > >    else
> > > -    return vect_lanes_optab_supported_p ("vec_load_lanes",
> > > - vec_load_lanes_optab,
> > > - vectype, count);
> > > +    {
> > > +      if (vect_lanes_optab_supported_p ("vec_load_lanes",
> > > + vec_load_lanes_optab,
> > > + vectype, count))
> > > + return IFN_LOAD_LANES;
> > > +    }
> > > +  return IFN_LAST;
> > >  }
> > >  
> > > Instead of returning TRUE or FALSE whether target support the LANES 
> > > LOAD/STORE.
> > > I change it into return internal_fn of the LANES LOAD/STORE that target 
> > > support,
> > > If target didn't support any LANE LOAD/STORE optabs, return IFN_LAST.
> > > 
> > > Step 2 - Build MASK_LEN_{LANES_LOAD,LANES_STORE} Gimple IR:
> > > 
> > > +   if (vect_store_lanes_supported (vectype, group_size, false)
> > > +       == IFN_MASK_LEN_STORE_LANES)
> > > +     {
> > > +       if (loop_lens)
> > > + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > > +        ncopies, vectype, j, 1);
> > > +       else
> > > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > > +       signed char biasval
> > > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > > +       bias = build_int_cst (intQI_type_node, biasval);
> > > +       if (!final_mask)
> > > + {
> > > +   mask_vectype = truth_type_for (vectype);
> > > +   final_mask = build_minus_one_cst (mask_vectype);
> > > + }
> > > +     }
> > > +
> > >    gcall *call;
> > > -   if (final_mask)
> > > +   if (final_len && final_mask)
> > > +     {
> > > +       /* Emit:
> > > +    MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
> > > + LEN, BIAS, VEC_ARRAY).  */
> > > +       unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
> > > +       tree alias_ptr = build_int_cst (ref_type, align);
> > > +       call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
> > > + dataref_ptr, alias_ptr,
> > > + final_mask, final_len, bias,
> > > + vec_array);
> > > +     }
> > > +   else if (final_mask)
> > > 
> > > The LEN and MASK flow is totally the same as other MASK_LEN_* load/store.
> > > 
> > > This patch bootstrap and regrssion on X86 passed.
> > > 
> > > Fully tested on RISC-V.
> > > 
> > > Ok for trunk ?
> >  
> > I think the patch needs refreshing after r14-3214-ga74d0d36a3f337.
> > > gcc/ChangeLog:
> > > 
> > >         * internal-fn.cc (internal_load_fn_p): Apply 
> > > MASK_LEN_{LOAD_LANES,STORE_LANES} into vectorizer.
> > >         (internal_store_fn_p): Ditto.
> > >         (internal_fn_len_index): Ditto.
> > >         (internal_fn_mask_index): Ditto.
> > >         (internal_fn_stored_value_index): Ditto.
> > >         * tree-vect-data-refs.cc (vect_store_lanes_supported): Ditto.
> > >         (vect_load_lanes_supported): Ditto.
> > >         * tree-vect-loop.cc: Ditto.
> > >         * tree-vect-slp.cc (vect_slp_prefer_store_lanes_p): Ditto.
> > >         * tree-vect-stmts.cc (check_load_store_for_partial_vectors): 
> > > Ditto.
> > >         (get_group_load_store_type): Ditto.
> > >         (vectorizable_store): Ditto.
> > >         (vectorizable_load): Ditto.
> > >         * tree-vectorizer.h (vect_store_lanes_supported): Ditto.
> > >         (vect_load_lanes_supported): Ditto.
> > > 
> > > ---
> > >  gcc/internal-fn.cc         |  7 +++
> > >  gcc/tree-vect-data-refs.cc | 61 +++++++++++++++++--------
> > >  gcc/tree-vect-loop.cc      | 11 +++--
> > >  gcc/tree-vect-slp.cc       |  2 +-
> > >  gcc/tree-vect-stmts.cc     | 93 ++++++++++++++++++++++++++++++++------
> > >  gcc/tree-vectorizer.h      |  4 +-
> > >  6 files changed, 137 insertions(+), 41 deletions(-)
> > > 
> > > diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> > > index 4f2b20a79e5..cc1ede58799 100644
> > > --- a/gcc/internal-fn.cc
> > > +++ b/gcc/internal-fn.cc
> > > @@ -4578,6 +4578,7 @@ internal_load_fn_p (internal_fn fn)
> > >      case IFN_MASK_LOAD:
> > >      case IFN_LOAD_LANES:
> > >      case IFN_MASK_LOAD_LANES:
> > > +    case IFN_MASK_LEN_LOAD_LANES:
> > >      case IFN_GATHER_LOAD:
> > >      case IFN_MASK_GATHER_LOAD:
> > >      case IFN_MASK_LEN_GATHER_LOAD:
> > > @@ -4600,6 +4601,7 @@ internal_store_fn_p (internal_fn fn)
> > >      case IFN_MASK_STORE:
> > >      case IFN_STORE_LANES:
> > >      case IFN_MASK_STORE_LANES:
> > > +    case IFN_MASK_LEN_STORE_LANES:
> > >      case IFN_SCATTER_STORE:
> > >      case IFN_MASK_SCATTER_STORE:
> > >      case IFN_MASK_LEN_SCATTER_STORE:
> > > @@ -4672,6 +4674,8 @@ internal_fn_len_index (internal_fn fn)
> > >      case IFN_COND_LEN_NEG:
> > >      case IFN_MASK_LEN_LOAD:
> > >      case IFN_MASK_LEN_STORE:
> > > +    case IFN_MASK_LEN_LOAD_LANES:
> > > +    case IFN_MASK_LEN_STORE_LANES:
> > >        return 3;
> > >  
> > >      default:
> > > @@ -4689,8 +4693,10 @@ internal_fn_mask_index (internal_fn fn)
> > >      {
> > >      case IFN_MASK_LOAD:
> > >      case IFN_MASK_LOAD_LANES:
> > > +    case IFN_MASK_LEN_LOAD_LANES:
> > >      case IFN_MASK_STORE:
> > >      case IFN_MASK_STORE_LANES:
> > > +    case IFN_MASK_LEN_STORE_LANES:
> > >      case IFN_MASK_LEN_LOAD:
> > >      case IFN_MASK_LEN_STORE:
> > >        return 2;
> > > @@ -4726,6 +4732,7 @@ internal_fn_stored_value_index (internal_fn fn)
> > >        return 4;
> > >  
> > >      case IFN_MASK_LEN_STORE:
> > > +    case IFN_MASK_LEN_STORE_LANES:
> > >        return 5;
> > >  
> > >      default:
> > > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
> > > index a3570c45b52..232b91e8ed3 100644
> > > --- a/gcc/tree-vect-data-refs.cc
> > > +++ b/gcc/tree-vect-data-refs.cc
> > > @@ -5439,24 +5439,34 @@ vect_grouped_store_supported (tree vectype, 
> > > unsigned HOST_WIDE_INT count)
> > >  }
> > >  
> > >  
> > > -/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors 
> > > of
> > > +/* Return FN if vec_{mask_}store_lanes is available for COUNT vectors of
> > >     type VECTYPE.  MASKED_P says whether the masked form is needed.  */
> > >  
> > > -bool
> > > +internal_fn
> > >  vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> > >      bool masked_p)
> > >  {
> > > -  if (masked_p)
> > > -    return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
> > > - vec_mask_store_lanes_optab,
> > > - vectype, count);
> > > +  if (vect_lanes_optab_supported_p ("vec_mask_len_store_lanes",
> > > +     vec_mask_len_store_lanes_optab,
> > > +     vectype, count))
> > > +    return IFN_MASK_LEN_STORE_LANES;
> > > +  else if (masked_p)
> > > +    {
> > > +      if (vect_lanes_optab_supported_p ("vec_mask_store_lanes",
> > > + vec_mask_store_lanes_optab,
> > > + vectype, count))
> > > + return IFN_MASK_STORE_LANES;
> > > +    }
> > >    else
> > > -    return vect_lanes_optab_supported_p ("vec_store_lanes",
> > > - vec_store_lanes_optab,
> > > - vectype, count);
> > > +    {
> > > +      if (vect_lanes_optab_supported_p ("vec_store_lanes",
> > > + vec_store_lanes_optab,
> > > + vectype, count))
> > > + return IFN_STORE_LANES;
> > > +    }
> > > +  return IFN_LAST;
> > >  }
> > >  
> > > -
> > >  /* Function vect_permute_store_chain.
> > >  
> > >     Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
> > > @@ -6056,21 +6066,32 @@ vect_grouped_load_supported (tree vectype, bool 
> > > single_element_p,
> > >    return false;
> > >  }
> > >  
> > > -/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors 
> > > of
> > > -   type VECTYPE.  MASKED_P says whether the masked form is needed.  */
> > > +/* Return FN if vec_{masked_,mask_len,}load_lanes is available for COUNT
> > > +   vectors of type VECTYPE.  MASKED_P says whether the masked form is 
> > > needed. */
> > >  
> > > -bool
> > > +internal_fn
> > >  vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
> > >     bool masked_p)
> > >  {
> > > -  if (masked_p)
> > > -    return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
> > > - vec_mask_load_lanes_optab,
> > > - vectype, count);
> > > +  if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
> > > +     vec_mask_len_load_lanes_optab,
> > > +     vectype, count))
> > > +    return IFN_MASK_LEN_LOAD_LANES;
> > > +  else if (masked_p)
> > > +    {
> > > +      if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
> > > + vec_mask_load_lanes_optab,
> > > + vectype, count))
> > > + return IFN_MASK_LOAD_LANES;
> > > +    }
> > >    else
> > > -    return vect_lanes_optab_supported_p ("vec_load_lanes",
> > > - vec_load_lanes_optab,
> > > - vectype, count);
> > > +    {
> > > +      if (vect_lanes_optab_supported_p ("vec_load_lanes",
> > > + vec_load_lanes_optab,
> > > + vectype, count))
> > > + return IFN_LOAD_LANES;
> > > +    }
> > > +  return IFN_LAST;
> > >  }
> > >  
> > >  /* Function vect_permute_load_chain.
> > > diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> > > index 07f3717ed9d..2a0cfa3e2e8 100644
> > > --- a/gcc/tree-vect-loop.cc
> > > +++ b/gcc/tree-vect-loop.cc
> > > @@ -2839,7 +2839,8 @@ start_over:
> > >       instructions record it and move on to the next instance.  */
> > >    if (loads_permuted
> > >        && SLP_INSTANCE_KIND (instance) == slp_inst_kind_store
> > > -       && vect_store_lanes_supported (vectype, group_size, false))
> > > +       && vect_store_lanes_supported (vectype, group_size, false)
> > > +    != IFN_LAST)
> > >      {
> > >        FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
> > >  {
> > > @@ -2848,9 +2849,9 @@ start_over:
> > >    /* Use SLP for strided accesses (or if we can't
> > >       load-lanes).  */
> > >    if (STMT_VINFO_STRIDED_P (stmt_vinfo)
> > > -       || ! vect_load_lanes_supported
> > > +       || vect_load_lanes_supported
> > >      (STMT_VINFO_VECTYPE (stmt_vinfo),
> > > -      DR_GROUP_SIZE (stmt_vinfo), false))
> > > +      DR_GROUP_SIZE (stmt_vinfo), false) != IFN_LAST)
> > >      break;
> > >  }
> > >  
> > > @@ -3153,7 +3154,7 @@ again:
> > >        vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
> > >        unsigned int size = DR_GROUP_SIZE (vinfo);
> > >        tree vectype = STMT_VINFO_VECTYPE (vinfo);
> > > -      if (! vect_store_lanes_supported (vectype, size, false)
> > > +      if (vect_store_lanes_supported (vectype, size, false) != IFN_LAST
> > >  && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)
> > >  && ! vect_grouped_store_supported (vectype, size))
> > >  return opt_result::failure_at (vinfo->stmt,
> > > @@ -3165,7 +3166,7 @@ again:
> > >    bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
> > >    size = DR_GROUP_SIZE (vinfo);
> > >    vectype = STMT_VINFO_VECTYPE (vinfo);
> > > -   if (! vect_load_lanes_supported (vectype, size, false)
> > > +   if (vect_load_lanes_supported (vectype, size, false) != IFN_LAST
> > >        && ! vect_grouped_load_supported (vectype, single_element_p,
> > >  size))
> > >      return opt_result::failure_at (vinfo->stmt,
> > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > > index 41997d5a546..3adb06dfa18 100644
> > > --- a/gcc/tree-vect-slp.cc
> > > +++ b/gcc/tree-vect-slp.cc
> > > @@ -3094,7 +3094,7 @@ vect_slp_prefer_store_lanes_p (vec_info *vinfo, 
> > > stmt_vec_info stmt_info,
> > >    if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS 
> > > (vectype))
> > >        || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype)))
> > >      return false;
> > > -  return vect_store_lanes_supported (vectype, group_size, false);
> > > +  return vect_store_lanes_supported (vectype, group_size, false) != 
> > > IFN_LAST;
> > >  }
> > >  
> > >  /* Analyze an SLP instance starting from a group of grouped stores.  Call
> > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > > index 89607a98f99..0f21315995e 100644
> > > --- a/gcc/tree-vect-stmts.cc
> > > +++ b/gcc/tree-vect-stmts.cc
> > > @@ -1610,9 +1610,15 @@ check_load_store_for_partial_vectors 
> > > (loop_vec_info loop_vinfo, tree vectype,
> > >    bool is_load = (vls_type == VLS_LOAD);
> > >    if (memory_access_type == VMAT_LOAD_STORE_LANES)
> > >      {
> > > -      if (is_load
> > > -   ? !vect_load_lanes_supported (vectype, group_size, true)
> > > -   : !vect_store_lanes_supported (vectype, group_size, true))
> > > +      internal_fn ifn
> > > + = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
> > > +    : vect_store_lanes_supported (vectype, group_size, true));
> > > +      if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == 
> > > IFN_MASK_LEN_STORE_LANES)
> > > + vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
> > > +      else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
> > > + vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
> > > +        scalar_mask);
> > > +      else
> > >  {
> > >    if (dump_enabled_p ())
> > >      dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > > @@ -1620,10 +1626,7 @@ check_load_store_for_partial_vectors 
> > > (loop_vec_info loop_vinfo, tree vectype,
> > >       " the target doesn't have an appropriate"
> > >       " load/store-lanes instruction.\n");
> > >    LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
> > > -   return;
> > >  }
> > > -      vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
> > > -      scalar_mask);
> > >        return;
> > >      }
> > >  
> > > @@ -2274,9 +2277,11 @@ get_group_load_store_type (vec_info *vinfo, 
> > > stmt_vec_info stmt_info,
> > >  
> > >    /* Otherwise try using LOAD/STORE_LANES.  */
> > >    else if (vls_type == VLS_LOAD
> > > -    ? vect_load_lanes_supported (vectype, group_size, masked_p)
> > > -    : vect_store_lanes_supported (vectype, group_size,
> > > - masked_p))
> > > +      ? vect_load_lanes_supported (vectype, group_size, masked_p)
> > > + != IFN_LAST
> > > +      : vect_store_lanes_supported (vectype, group_size,
> > > +    masked_p)
> > > + != IFN_LAST)
> > >      {
> > >        *memory_access_type = VMAT_LOAD_STORE_LANES;
> > >        overrun_p = would_overrun_p;
> > > @@ -3090,8 +3095,7 @@ vect_get_loop_variant_data_ptr_increment (
> > >    /* TODO: We don't support gather/scatter or load_lanes/store_lanes for 
> > > pointer
> > >       IVs are updated by variable amount but we will support them in the 
> > > future.
> > >     */
> > > -  gcc_assert (memory_access_type != VMAT_GATHER_SCATTER
> > > -       && memory_access_type != VMAT_LOAD_STORE_LANES);
> > > +  gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
> > >  
> > >    /* When we support SELECT_VL pattern, we dynamic adjust
> > >       the memory address by .SELECT_VL result.
> > > @@ -8885,6 +8889,8 @@ vectorizable_store (vec_info *vinfo,
> > >      }
> > >  
> > >    tree final_mask = NULL;
> > > +   tree final_len = NULL;
> > > +   tree bias = NULL;
> > >    if (loop_masks)
> > >      final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
> > >       ncopies, vectype, j);
> > > @@ -8892,8 +8898,38 @@ vectorizable_store (vec_info *vinfo,
> > >      final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
> > >     final_mask, vec_mask, gsi);
> > >  
> > > +   if (vect_store_lanes_supported (vectype, group_size, false)
> > > +       == IFN_MASK_LEN_STORE_LANES)
> >  
> > can you use the previously computed 'ifn' here please?
> >  
> > Otherwise LGTM.
> >  
> > Thanks,
> > Richard.
> >  
> > > +     {
> > > +       if (loop_lens)
> > > + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > > +        ncopies, vectype, j, 1);
> > > +       else
> > > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > > +       signed char biasval
> > > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > > +       bias = build_int_cst (intQI_type_node, biasval);
> > > +       if (!final_mask)
> > > + {
> > > +   mask_vectype = truth_type_for (vectype);
> > > +   final_mask = build_minus_one_cst (mask_vectype);
> > > + }
> > > +     }
> > > +
> > >    gcall *call;
> > > -   if (final_mask)
> > > +   if (final_len && final_mask)
> > > +     {
> > > +       /* Emit:
> > > +    MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
> > > + LEN, BIAS, VEC_ARRAY).  */
> > > +       unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
> > > +       tree alias_ptr = build_int_cst (ref_type, align);
> > > +       call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
> > > + dataref_ptr, alias_ptr,
> > > + final_mask, final_len, bias,
> > > + vec_array);
> > > +     }
> > > +   else if (final_mask)
> > >      {
> > >        /* Emit:
> > >     MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
> > > @@ -10445,6 +10481,8 @@ vectorizable_load (vec_info *vinfo,
> > >    vec_array = create_vector_array (vectype, vec_num);
> > >  
> > >    tree final_mask = NULL_TREE;
> > > +   tree final_len = NULL_TREE;
> > > +   tree bias = NULL_TREE;
> > >    if (loop_masks)
> > >      final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
> > >       ncopies, vectype, j);
> > > @@ -10452,8 +10490,37 @@ vectorizable_load (vec_info *vinfo,
> > >      final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
> > >     final_mask, vec_mask, gsi);
> > >  
> > > +   if (vect_load_lanes_supported (vectype, group_size, false)
> > > +       == IFN_MASK_LEN_LOAD_LANES)
> > > +     {
> > > +       if (loop_lens)
> > > + final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
> > > +        ncopies, vectype, j, 1);
> > > +       else
> > > + final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
> > > +       signed char biasval
> > > + = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
> > > +       bias = build_int_cst (intQI_type_node, biasval);
> > > +       if (!final_mask)
> > > + {
> > > +   mask_vectype = truth_type_for (vectype);
> > > +   final_mask = build_minus_one_cst (mask_vectype);
> > > + }
> > > +     }
> > > +
> > >    gcall *call;
> > > -   if (final_mask)
> > > +   if (final_len && final_mask)
> > > +     {
> > > +       /* Emit:
> > > +    VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
> > > +     VEC_MASK, LEN, BIAS).  */
> > > +       unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
> > > +       tree alias_ptr = build_int_cst (ref_type, align);
> > > +       call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
> > > + dataref_ptr, alias_ptr,
> > > + final_mask, final_len, bias);
> > > +     }
> > > +   else if (final_mask)
> > >      {
> > >        /* Emit:
> > >     VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
> > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > > index 5987a327332..6a2e55aa1fc 100644
> > > --- a/gcc/tree-vectorizer.h
> > > +++ b/gcc/tree-vectorizer.h
> > > @@ -2296,9 +2296,9 @@ extern tree bump_vector_ptr (vec_info *, tree, 
> > > gimple *, gimple_stmt_iterator *,
> > >  extern void vect_copy_ref_info (tree, tree);
> > >  extern tree vect_create_destination_var (tree, tree);
> > >  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
> > > -extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> > > bool);
> > > +extern internal_fn vect_store_lanes_supported (tree, unsigned 
> > > HOST_WIDE_INT, bool);
> > >  extern bool vect_grouped_load_supported (tree, bool, unsigned 
> > > HOST_WIDE_INT);
> > > -extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, 
> > > bool);
> > > +extern internal_fn vect_load_lanes_supported (tree, unsigned 
> > > HOST_WIDE_INT, bool);
> > >  extern void vect_permute_store_chain (vec_info *, vec<tree> &,
> > >        unsigned int, stmt_vec_info,
> > >        gimple_stmt_iterator *, vec<tree> *);
> > > 
> >  
> > 
>  
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to