On Fri, 18 Oct 2024, Robin Dapp wrote:
> This patch adds else-operand handling to the internal functions.
>
> gcc/ChangeLog:
>
> * internal-fn.cc (add_mask_and_len_args): Rename...
> (add_mask_else_and_len_args): ...to this and add else handling.
> (expand_partial_load_optab_fn): Use adjusted function.
> (expand_partial_store_optab_fn): Ditto.
> (expand_scatter_store_optab_fn): Ditto.
> (expand_gather_load_optab_fn): Ditto.
> (internal_fn_len_index): Add else handling.
> (internal_fn_else_index): Ditto.
> (internal_fn_mask_index): Ditto.
> (get_supported_else_vals): New function.
> (supported_else_val_p): New function.
> (internal_gather_scatter_fn_supported_p): Add else operand.
> * internal-fn.h (internal_gather_scatter_fn_supported_p): Define
> else constants.
> (MASK_LOAD_ELSE_ZERO): Ditto.
> (MASK_LOAD_ELSE_M1): Ditto.
> (MASK_LOAD_ELSE_UNDEFINED): Ditto.
> (get_supported_else_vals): Declare.
> (supported_else_val_p): Ditto.
> ---
> gcc/internal-fn.cc | 131 +++++++++++++++++++++++++++++++++++++++------
> gcc/internal-fn.h | 15 +++++-
> 2 files changed, 129 insertions(+), 17 deletions(-)
>
> diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
> index d89a04fe412..b6049cec91e 100644
> --- a/gcc/internal-fn.cc
> +++ b/gcc/internal-fn.cc
> @@ -331,17 +331,18 @@ get_multi_vector_move (tree array_type, convert_optab
> optab)
> return convert_optab_handler (optab, imode, vmode);
> }
>
> -/* Add mask and len arguments according to the STMT. */
> +/* Add mask, else, and len arguments according to the STMT. */
>
> static unsigned int
> -add_mask_and_len_args (expand_operand *ops, unsigned int opno, gcall *stmt)
> +add_mask_else_and_len_args (expand_operand *ops, unsigned int opno, gcall
> *stmt)
> {
> internal_fn ifn = gimple_call_internal_fn (stmt);
> int len_index = internal_fn_len_index (ifn);
> /* BIAS is always consecutive next of LEN. */
> int bias_index = len_index + 1;
> int mask_index = internal_fn_mask_index (ifn);
> - /* The order of arguments are always {len,bias,mask}. */
> +
> + /* The order of arguments is always {mask, else, len, bias}. */
> if (mask_index >= 0)
> {
> tree mask = gimple_call_arg (stmt, mask_index);
> @@ -362,6 +363,23 @@ add_mask_and_len_args (expand_operand *ops, unsigned int
> opno, gcall *stmt)
>
> create_input_operand (&ops[opno++], mask_rtx,
> TYPE_MODE (TREE_TYPE (mask)));
> +
> + }
> +
> + int els_index = internal_fn_else_index (ifn);
> + if (els_index >= 0)
> + {
> + tree els = gimple_call_arg (stmt, els_index);
> + tree els_type = TREE_TYPE (els);
> + if (TREE_CODE (els) == SSA_NAME
> + && SSA_NAME_IS_DEFAULT_DEF (els)
> + && VAR_P (SSA_NAME_VAR (els)))
> + create_undefined_input_operand (&ops[opno++], TYPE_MODE (els_type));
> + else
> + {
> + rtx els_rtx = expand_normal (els);
> + create_input_operand (&ops[opno++], els_rtx, TYPE_MODE (els_type));
> + }
> }
> if (len_index >= 0)
> {
> @@ -3014,7 +3032,7 @@ static void
> expand_partial_load_optab_fn (internal_fn ifn, gcall *stmt, convert_optab
> optab)
> {
> int i = 0;
> - class expand_operand ops[5];
> + class expand_operand ops[6];
> tree type, lhs, rhs, maskt;
> rtx mem, target;
> insn_code icode;
> @@ -3044,7 +3062,7 @@ expand_partial_load_optab_fn (internal_fn ifn, gcall
> *stmt, convert_optab optab)
> target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
> create_call_lhs_operand (&ops[i++], target, TYPE_MODE (type));
> create_fixed_operand (&ops[i++], mem);
> - i = add_mask_and_len_args (ops, i, stmt);
> + i = add_mask_else_and_len_args (ops, i, stmt);
> expand_insn (icode, i, ops);
>
> assign_call_lhs (lhs, target, &ops[0]);
> @@ -3090,7 +3108,7 @@ expand_partial_store_optab_fn (internal_fn ifn, gcall
> *stmt, convert_optab optab
> reg = expand_normal (rhs);
> create_fixed_operand (&ops[i++], mem);
> create_input_operand (&ops[i++], reg, TYPE_MODE (type));
> - i = add_mask_and_len_args (ops, i, stmt);
> + i = add_mask_else_and_len_args (ops, i, stmt);
> expand_insn (icode, i, ops);
> }
>
> @@ -3676,7 +3694,7 @@ expand_scatter_store_optab_fn (internal_fn, gcall
> *stmt, direct_optab optab)
> create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
> create_integer_operand (&ops[i++], scale_int);
> create_input_operand (&ops[i++], rhs_rtx, TYPE_MODE (TREE_TYPE (rhs)));
> - i = add_mask_and_len_args (ops, i, stmt);
> + i = add_mask_else_and_len_args (ops, i, stmt);
>
> insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE
> (rhs)),
> TYPE_MODE (TREE_TYPE (offset)));
> @@ -3705,7 +3723,7 @@ expand_gather_load_optab_fn (internal_fn, gcall *stmt,
> direct_optab optab)
> create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE
> (offset)));
> create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
> create_integer_operand (&ops[i++], scale_int);
> - i = add_mask_and_len_args (ops, i, stmt);
> + i = add_mask_else_and_len_args (ops, i, stmt);
> insn_code icode = convert_optab_handler (optab, TYPE_MODE (TREE_TYPE
> (lhs)),
> TYPE_MODE (TREE_TYPE (offset)));
> expand_insn (icode, i, ops);
> @@ -4600,6 +4618,18 @@ get_len_internal_fn (internal_fn fn)
> case IFN_COND_##NAME:
> \
> return IFN_COND_LEN_##NAME;
> #include "internal-fn.def"
> + default:
> + break;
> + }
> +
> + switch (fn)
> + {
> + case IFN_MASK_LOAD:
> + return IFN_MASK_LEN_LOAD;
> + case IFN_MASK_LOAD_LANES:
> + return IFN_MASK_LEN_LOAD_LANES;
> + case IFN_MASK_GATHER_LOAD:
> + return IFN_MASK_LEN_GATHER_LOAD;
> default:
> return IFN_LAST;
> }
> @@ -4785,8 +4815,12 @@ internal_fn_len_index (internal_fn fn)
> case IFN_LEN_STORE:
> return 2;
>
> - case IFN_MASK_LEN_GATHER_LOAD:
> case IFN_MASK_LEN_SCATTER_STORE:
> + return 5;
> +
> + case IFN_MASK_LEN_GATHER_LOAD:
> + return 6;
> +
> case IFN_COND_LEN_FMA:
> case IFN_COND_LEN_FMS:
> case IFN_COND_LEN_FNMA:
> @@ -4811,13 +4845,15 @@ internal_fn_len_index (internal_fn fn)
> return 4;
>
> case IFN_COND_LEN_NEG:
> - case IFN_MASK_LEN_LOAD:
> case IFN_MASK_LEN_STORE:
> - case IFN_MASK_LEN_LOAD_LANES:
> case IFN_MASK_LEN_STORE_LANES:
> case IFN_VCOND_MASK_LEN:
> return 3;
>
> + case IFN_MASK_LEN_LOAD:
> + case IFN_MASK_LEN_LOAD_LANES:
> + return 4;
> +
> default:
> return -1;
> }
> @@ -4867,6 +4903,12 @@ internal_fn_else_index (internal_fn fn)
> case IFN_COND_LEN_SHR:
> return 3;
>
> + case IFN_MASK_LOAD:
> + case IFN_MASK_LEN_LOAD:
> + case IFN_MASK_LOAD_LANES:
> + case IFN_MASK_LEN_LOAD_LANES:
> + return 3;
> +
> case IFN_COND_FMA:
> case IFN_COND_FMS:
> case IFN_COND_FNMA:
> @@ -4877,6 +4919,10 @@ internal_fn_else_index (internal_fn fn)
> case IFN_COND_LEN_FNMS:
> return 4;
>
> + case IFN_MASK_GATHER_LOAD:
> + case IFN_MASK_LEN_GATHER_LOAD:
> + return 5;
> +
> default:
> return -1;
> }
> @@ -4908,6 +4954,7 @@ internal_fn_mask_index (internal_fn fn)
> case IFN_MASK_LEN_SCATTER_STORE:
> return 4;
>
> + case IFN_VCOND_MASK:
> case IFN_VCOND_MASK_LEN:
> return 0;
>
> @@ -4944,6 +4991,50 @@ internal_fn_stored_value_index (internal_fn fn)
> }
> }
>
> +
> +/* Push all supported else values for the optab referred to by ICODE
> + into ELSE_VALS. The index of the else operand must be specified in
> + ELSE_INDEX. */
> +
> +void
> +get_supported_else_vals (enum insn_code icode, unsigned else_index,
> + auto_vec<int> &else_vals)
Do not pass auto_vec by reference, instead use a vec<int> &else_val
argument.
> +{
> + const struct insn_data_d *data = &insn_data[icode];
> + if ((char)else_index >= data->n_operands)
> + return;
> +
> + machine_mode else_mode = data->operand[else_index].mode;
> +
> + /* For now we only support else values of 0, -1, and "undefined". */
> + if (insn_operand_matches (icode, else_index, CONST0_RTX (else_mode)))
> + else_vals.safe_push (MASK_LOAD_ELSE_ZERO);
> +
> + if (insn_operand_matches (icode, else_index, gen_rtx_SCRATCH (else_mode)))
> + else_vals.safe_push (MASK_LOAD_ELSE_UNDEFINED);
> +
> + if (GET_MODE_CLASS (else_mode) == MODE_VECTOR_INT
> + && insn_operand_matches (icode, else_index, CONSTM1_RTX (else_mode)))
> + else_vals.safe_push (MASK_LOAD_ELSE_M1);
> +}
> +
> +/* Return true if the else value ELSE_VAL (one of MASK_LOAD_ELSE_ZERO,
> + MASK_LOAD_ELSE_M1, and MASK_LOAD_ELSE_UNDEFINED) is valid fo the optab
> + referred to by ICODE. The index of the else operand must be specified
> + in ELSE_INDEX. */
> +
> +bool
> +supported_else_val_p (enum insn_code icode, unsigned else_index, int
> else_val)
> +{
> + if (else_val != MASK_LOAD_ELSE_ZERO && else_val != MASK_LOAD_ELSE_M1
> + && else_val != MASK_LOAD_ELSE_UNDEFINED)
> + __builtin_unreachable ();
> +
> + auto_vec<int> else_vals;
> + get_supported_else_vals (icode, else_index, else_vals);
> + return else_vals.contains (else_val);
> +}
> +
> /* Return true if the target supports gather load or scatter store function
> IFN. For loads, VECTOR_TYPE is the vector type of the load result,
> while for stores it is the vector type of the stored data argument.
> @@ -4951,12 +5042,15 @@ internal_fn_stored_value_index (internal_fn fn)
> or stored. OFFSET_VECTOR_TYPE is the vector type that holds the
> offset from the shared base address of each loaded or stored element.
> SCALE is the amount by which these offsets should be multiplied
> - *after* they have been extended to address width. */
> + *after* they have been extended to address width.
> + If the target supports the gather load the supported else values
> + will be added to the vector ELSVAL points to if it is nonzero. */
>
> bool
> internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
> tree memory_element_type,
> - tree offset_vector_type, int scale)
> + tree offset_vector_type, int scale,
> + auto_vec<int> *elsvals)
> {
> if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
> TYPE_SIZE (memory_element_type)))
> @@ -4969,9 +5063,14 @@ internal_gather_scatter_fn_supported_p (internal_fn
> ifn, tree vector_type,
> TYPE_MODE (offset_vector_type));
> int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
> bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (offset_vector_type));
> - return (icode != CODE_FOR_nothing
> - && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
> - && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
> + bool ok = icode != CODE_FOR_nothing
> + && insn_operand_matches (icode, 2 + output_ops, GEN_INT (unsigned_p))
> + && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale));
> +
> + if (ok && elsvals)
> + get_supported_else_vals (icode, MASK_LOAD_GATHER_ELSE_IDX, *elsvals);
> +
> + return ok;
> }
>
> /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 2785a5a95a2..11bad4e5ed9 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -240,9 +240,22 @@ extern int internal_fn_len_index (internal_fn);
> extern int internal_fn_else_index (internal_fn);
> extern int internal_fn_stored_value_index (internal_fn);
> extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
> - tree, tree, int);
> + tree, tree, int,
> + auto_vec<int> * = nullptr);
> extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
> poly_uint64, unsigned int);
> +
> +/* Integer constants representing which else value is supported for masked
> load
> + functions. */
> +#define MASK_LOAD_ELSE_ZERO -1
> +#define MASK_LOAD_ELSE_M1 -2
> +#define MASK_LOAD_ELSE_UNDEFINED -3
> +
> +#define MASK_LOAD_GATHER_ELSE_IDX 6
Why this define?
Otherwise looks OK.
Richard.
> +extern void get_supported_else_vals (enum insn_code, unsigned,
> + auto_vec<int> &);
> +extern bool supported_else_val_p (enum insn_code, unsigned, int);
> +
> #define VECT_PARTIAL_BIAS_UNSUPPORTED 127
>
> extern signed char internal_len_load_store_bias (internal_fn ifn,
>