Claudio Bantaloukas <[email protected]> writes:
> This patch adds support for the following intrinsics:
> - svdot[_f32_mf8]_fpm
> - svdot_lane[_f32_mf8]_fpm
> - svdot[_f16_mf8]_fpm
> - svdot_lane[_f16_mf8]_fpm
>
> The first two are available under a combination of the FP8DOT4 and SVE2
> features.
> Alternatively under the SSVE_FP8DOT4 feature under streaming mode.
> The final two are available under a combination of the FP8DOT2 and SVE2
> features.
> Alternatively under the SSVE_FP8DOT2 feature under streaming mode.
Some of the comments from the previous patches apply here too
(e.g. the boilerplate at the start of the tests, and testing the
highest in-range index).
It looks like the patch is missing a change to doc/invoke.texi.
Otherwise it's just banal trivia, sorry:
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index 022163f0726..65df48a3e65 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -835,21 +835,28 @@ public:
> rtx
> expand (function_expander &e) const override
> {
> - /* In the optab, the multiplication operands come before the accumulator
> - operand. The optab is keyed off the multiplication mode. */
> - e.rotate_inputs_left (0, 3);
> insn_code icode;
> - if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
> - icode = e.convert_optab_handler_for_sign (sdot_prod_optab,
> - udot_prod_optab,
> - 0, e.result_mode (),
> - GET_MODE (e.args[0]));
> + if (e.fpm_mode == aarch64_sve::FPM_set)
> + {
> + icode = code_for_aarch64_sve_dot (e.result_mode ());
> + }
Formatting nit, but: no braces around single statements, with the body
then being indented by 2 spaces relative to the "if".
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> index 09f343e7118..9f79f6e28c7 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
> @@ -3994,6 +3994,34 @@ struct ternary_bfloat_def
> };
> SHAPE (ternary_bfloat)
>
> +/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t). */
> +struct ternary_mfloat8_def
> + : public ternary_resize2_base<8, TYPE_mfloat, TYPE_mfloat>
> +{
> + void
> + build (function_builder &b, const function_group_info &group) const
> override
> + {
> + gcc_assert (group.fpm_mode == FPM_set);
> + b.add_overloaded_functions (group, MODE_none);
> + build_all (b, "v0,v0,vM,vM", group, MODE_none);
> + }
> +
> + tree
> + resolve (function_resolver &r) const override
> + {
> + type_suffix_index type;
> + if (!r.check_num_arguments (4)
> + || (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
> + || !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
> + || !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)
> + || !r.require_scalar_type (3, "int64_t"))
uint64_t
> + return error_mark_node;
> +
> + return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8,
> GROUP_none);
> + }
> +};
> +SHAPE (ternary_mfloat8)
> +
> /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
>
> where the final argument is an integer constant expression in the range
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> index c84c153e913..7d90e3b5e20 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
> @@ -363,3 +363,15 @@ DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane,
> ternary_mfloat8_lane, s_float_mf8, none
> DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8,
> none, none, set)
> DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8,
> none, none, set)
> #undef REQUIRED_EXTENSIONS
> +
> +#define REQUIRED_EXTENSIONS \
> + streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4,
> AARCH64_FL_SSVE_FP8DOT4)
Elsewhere we've been putting the non-streaming and streaming requirements
on separate lines if the whole thing doesn't fit on one line:
#define REQUIRED_EXTENSIONS \
streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, \
AARCH64_FL_SSVE_FP8DOT4)
Same below.
Looks good to me otherwise, thanks.
Richard