On Fri, Nov 14, 2025 at 3:03 PM Robin Dapp <[email protected]> wrote:
>
> Hi,
>
> Currently select_vl is a direct optab with its mode always Xmode/Pmode.
> This does not give us sufficient freedom to enable/disable vsetvl
> (=SELECT_VL) depending on the vector mode.
>
> This patch makes select_vl a convert optab and adjusts the associated IFN
> functions as well as the query/emit code in the vectorizer.
>
> With this patch nothing new is actually exercised yet.  This is going to
> happen in a separate riscv patch that enables "VLS" select_vl.
>
> Bootstrapped on x86 and power10. Regtested on riscv64 (with and without
> follow-up patches).

OK.

Thanks,
Richard.

> Regards
>  Robin
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec.md (select_vl<mode>): Rename to...
>         (select_vl<V:mode><P:mode>): ...this.
>         * doc/md.texi: Document new behavior.
>         * internal-fn.cc (select_vl_direct): Make
>         (expand_select_vl_optab_fn): Adjust for convert optab.
>         (direct_select_vl_optab_supported_p): Ditto.
>         * internal-fn.def (SELECT_VL): Ditto.
>         * optabs.def (OPTAB_CD): Add select_vl.
>         (OPTAB_D): Remove select_vl.
>         * tree-vect-loop-manip.cc (vect_set_loop_controls_directly):
>         Adjust for convert select_vl optab.
>         * tree-vect-loop.cc: Ditto.
> ---
>  gcc/config/riscv/autovec.md |  5 +++--
>  gcc/doc/md.texi             | 17 +++++++++--------
>  gcc/internal-fn.cc          |  5 +++++
>  gcc/internal-fn.def         |  2 +-
>  gcc/optabs.def              |  2 +-
>  gcc/tree-vect-loop-manip.cc |  4 +++-
>  gcc/tree-vect-loop.cc       | 15 ++++++++++++---
>  7 files changed, 34 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index 60beab98e03..bb66afad30b 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -1334,10 +1334,11 @@ (define_insn_and_split "fnms<mode>4"
>  ;; == SELECT_VL
>  ;; =========================================================================
>
> -(define_expand "select_vl<mode>"
> +(define_expand "select_vl<V:mode><P:mode>"
>    [(match_operand:P 0 "register_operand")
>     (match_operand:P 1 "vector_length_operand")
> -   (match_operand:P 2 "immediate_operand")]
> +   (match_operand:P 2 "immediate_operand")
> +   (match_operand:V 3)]
>    "TARGET_VECTOR"
>  {
>    riscv_vector::expand_select_vl (operands);
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index ae5d709bd47..6dedca225ae 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -5273,13 +5273,14 @@ for (i = 1; i < operand3; i++)
>    operand0[i] = operand0[i - 1] && (operand1 + i < operand2);
>  @end smallexample
>
> -@cindex @code{select_vl@var{m}} instruction pattern
> -@item @code{select_vl@var{m}}
> -Set operand 0 to the number of scalar iterations that should be handled
> -by one iteration of a vector loop.  Operand 1 is the total number of
> -scalar iterations that the loop needs to process and operand 2 is a
> -maximum bound on the result (also known as the maximum ``vectorization
> -factor'').
> +@cindex @code{select_vl@var{m}@var{n}} instruction pattern
> +@item @code{select_vl@var{m}@var{n}}
> +Set operand 0 (of mode @var{n}) to the number of scalar iterations that
> +should be handled by one iteration of a vector loop.  Operand 1 is the
> +total number of scalar iterations that the loop needs to process and
> +operand 2 is a maximum bound on the result (also known as the
> +maximum ``vectorization factor'').  Operand 3 (of mode @var{m}) is
> +a dummy parameter to pass the vector mode to be used.
>
>  The maximum value of operand 0 is given by:
>  @smallexample
> @@ -5293,7 +5294,7 @@ this, it is generally not useful to define this 
> instruction if it will
>  always calculate the maximum value.
>
>  This optab is only useful on targets that implement @samp{len_load_@var{m}}
> -and/or @samp{len_store_@var{m}}.
> +and/or @samp{len_store_@var{m}} or the associated @samp{_len} variants.
>
>  @cindex @code{check_raw_ptrs@var{m}} instruction pattern
>  @item @samp{check_raw_ptrs@var{m}}
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index 514fe98f40d..13fbd2ce788 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -195,6 +195,7 @@ init_internal_fns ()
>  #define check_ptrs_direct { 0, 0, false }
>  #define crc_direct { 1, -1, true }
>  #define reduc_sbool_direct { 0, 0, true }
> +#define select_vl_direct { 2, 0, false }
>
>  const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
>  #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
> @@ -4183,6 +4184,9 @@ expand_reduc_sbool_optab_fn (internal_fn fn, gcall 
> *stmt, direct_optab optab)
>  #define expand_check_ptrs_optab_fn(FN, STMT, OPTAB) \
>    expand_direct_optab_fn (FN, STMT, OPTAB, 4)
>
> +#define expand_select_vl_optab_fn(FN, STMT, OPTAB) \
> +  expand_convert_optab_fn (FN, STMT, OPTAB, 3)
> +
>  /* Expanders for optabs that can use expand_convert_optab_fn.  */
>
>  #define expand_unary_convert_optab_fn(FN, STMT, OPTAB) \
> @@ -4299,6 +4303,7 @@ multi_vector_optab_supported_p (convert_optab optab, 
> tree_pair types,
>  #define direct_vec_set_optab_supported_p direct_optab_supported_p
>  #define direct_vec_extract_optab_supported_p convert_optab_supported_p
>  #define direct_reduc_sbool_optab_supported_p direct_optab_supported_p
> +#define direct_select_vl_optab_supported_p convert_optab_supported_p
>
>  /* Return the optab used by internal function FN.  */
>
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 7874fcfb3df..4e83a59880e 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -250,7 +250,7 @@ DEF_INTERNAL_OPTAB_FN (LEN_STORE, 0, len_store, len_store)
>  DEF_INTERNAL_OPTAB_FN (MASK_LEN_STORE, 0, mask_len_store, mask_len_store)
>
>  DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
> -DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, binary)
> +DEF_INTERNAL_OPTAB_FN (SELECT_VL, ECF_CONST | ECF_NOTHROW, select_vl, 
> select_vl)
>  DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
>                        check_raw_ptrs, check_ptrs)
>  DEF_INTERNAL_OPTAB_FN (CHECK_WAR_PTRS, ECF_CONST | ECF_NOTHROW,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index b6f290a9513..5218b6d6ec0 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -116,6 +116,7 @@ OPTAB_CD (udot_prod_optab, "udot_prod$I$a$b")
>  OPTAB_CD (usdot_prod_optab, "usdot_prod$I$a$b")
>
>  OPTAB_CD (while_ult_optab, "while_ult$a$b")
> +OPTAB_CD (select_vl_optab, "select_vl$a$b")
>
>  OPTAB_NL(add_optab, "add$P$a3", PLUS, "add", '3', gen_int_fp_fixed_libfunc)
>  OPTAB_NX(add_optab, "add$F$a3")
> @@ -553,6 +554,5 @@ OPTAB_D (len_load_optab, "len_load_$a")
>  OPTAB_D (len_store_optab, "len_store_$a")
>  OPTAB_D (mask_len_strided_load_optab, "mask_len_strided_load_$a")
>  OPTAB_D (mask_len_strided_store_optab, "mask_len_strided_store_$a")
> -OPTAB_D (select_vl_optab, "select_vl$a")
>  OPTAB_D (andn_optab, "andn$a3")
>  OPTAB_D (iorn_optab, "iorn$a3")
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 3e121a96ddf..9e5a00f3fc6 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -574,8 +574,10 @@ vect_set_loop_controls_directly (class loop *loop, 
> loop_vec_info loop_vinfo,
>         {
>           create_iv (nitems_total, MINUS_EXPR, step, NULL_TREE, loop, 
> &incr_gsi,
>                      insert_after, &index_before_incr, &index_after_incr);
> +         tree vectype = build_zero_cst (rgc->type);
>           tree len = gimple_build (header_seq, IFN_SELECT_VL, iv_type,
> -                                  index_before_incr, nitems_step);
> +                                  index_before_incr, nitems_step,
> +                                  vectype);
>           gimple_seq_add_stmt (header_seq, gimple_build_assign (step, len));
>         }
>        else
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 576a69c82d2..65cf0d42995 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -2428,14 +2428,23 @@ start_over:
>    if (LOOP_VINFO_USING_DECREMENTING_IV_P (loop_vinfo))
>      {
>        tree iv_type = LOOP_VINFO_RGROUP_IV_TYPE (loop_vinfo);
> -      if (direct_internal_fn_supported_p (IFN_SELECT_VL, iv_type,
> -                                         OPTIMIZE_FOR_SPEED)
> -         && LOOP_VINFO_LENS (loop_vinfo).length () == 1
> +      if (LOOP_VINFO_LENS (loop_vinfo).length () == 1
>           && LOOP_VINFO_LENS (loop_vinfo)[0].factor == 1
>           && (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
>               || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ()))
>         LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = true;
>
> +      if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
> +       for (auto rgc : LOOP_VINFO_LENS (loop_vinfo))
> +         if (rgc.type
> +             && !direct_internal_fn_supported_p (IFN_SELECT_VL,
> +                                                 rgc.type, iv_type,
> +                                                 OPTIMIZE_FOR_SPEED))
> +           {
> +             LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo) = false;
> +             break;
> +           }
> +
>        /* If any of the SLP instances cover more than a single lane
>          we cannot use .SELECT_VL at the moment, even if the number
>          of lanes is uniform throughout the SLP graph.  */
> --
> 2.51.0
>

Reply via email to