On Fri, May 23, 2025 at 2:31 PM Alexander Monakov <[email protected]> wrote:
>
> In PR 105965 we accepted a request to form FMA instructions when the
> source code is using a narrow generic vector that contains just one
> element, corresponding to V1SF or V1DF mode, while the backend does not
> expand fma patterns for such modes.
>
> For this to work under -ffp-contract=on, we either need to modify
> backends, or emulate such degenerate-vector FMA via scalar FMA in
> tree-vect-generic. Do the latter.
Can you instead apply the lowering during gimplification? That is because
having an unsupported internal-function in the IL the user could not have
emitted directly is somewhat bad. I thought the vector lowering could
be generalized for more single-argument internal functions but then no
such unsupported calls should exist in the first place.
Richard.
> gcc/c-family/ChangeLog:
>
> * c-gimplify.cc (fma_supported_p): Allow forming single-element
> vector FMA when scalar FMA is available.
> (c_gimplify_expr): Allow vector types.
>
> gcc/ChangeLog:
>
> * tree-vect-generic.cc (expand_vec1_fma): New helper. Use it...
> (expand_vector_operations_1): ... here to handle IFN_FMA.
> ---
> gcc/c-family/c-gimplify.cc | 10 ++++++--
> gcc/tree-vect-generic.cc | 48 ++++++++++++++++++++++++++++++++++++--
> 2 files changed, 54 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc
> index c6fb764656..1942d5019e 100644
> --- a/gcc/c-family/c-gimplify.cc
> +++ b/gcc/c-family/c-gimplify.cc
> @@ -875,7 +875,13 @@ c_build_bind_expr (location_t loc, tree block, tree body)
> static bool
> fma_supported_p (enum internal_fn fn, tree type)
> {
> - return direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH);
> + return (direct_internal_fn_supported_p (fn, type, OPTIMIZE_FOR_BOTH)
> + /* Accept single-element vector FMA (see PR 105965) when the
> + backend handles the scalar but not the vector mode. */
> + || (VECTOR_TYPE_P (type)
> + && known_eq (TYPE_VECTOR_SUBPARTS (type), 1U)
> + && direct_internal_fn_supported_p (fn, TREE_TYPE (type),
> + OPTIMIZE_FOR_BOTH)));
> }
>
> /* Gimplification of expression trees. */
> @@ -939,7 +945,7 @@ c_gimplify_expr (tree *expr_p, gimple_seq *pre_p
> ATTRIBUTE_UNUSED,
> /* For -ffp-contract=on we need to attempt FMA contraction only
> during initial gimplification. Late contraction across statement
> boundaries would violate language semantics. */
> - if (SCALAR_FLOAT_TYPE_P (type)
> + if ((SCALAR_FLOAT_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))
> && flag_fp_contract_mode == FP_CONTRACT_ON
> && cfun && !(cfun->curr_properties & PROP_gimple_any)
> && fma_supported_p (IFN_FMA, type))
> diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
> index 3c68361870..954b84edce 100644
> --- a/gcc/tree-vect-generic.cc
> +++ b/gcc/tree-vect-generic.cc
> @@ -1983,6 +1983,36 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
> gsi_replace (gsi, g, false);
> }
>
> +/* Expand IFN_FMA, assuming vector contains just one scalar.
> + c_gimplify_expr can introduce it when performing FMA contraction. */
> +
> +static void
> +expand_vec1_fma (gimple_stmt_iterator *gsi)
> +{
> + gcall *call = as_a <gcall *> (gsi_stmt (*gsi));
> + tree type = TREE_TYPE (gimple_call_arg (call, 0));
> + if (!VECTOR_TYPE_P (type))
> + return;
> + gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (type), 1U));
> +
> + for (int i = 0; i < 3; i++)
> + {
> + tree arg = gimple_call_arg (call, i);
> + arg = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (type), arg);
> + gimple_call_set_arg (call, i, arg);
> + }
> + tree lhs = gimple_call_lhs (call);
> + if (lhs)
> + {
> + tree new_lhs = make_ssa_name (TREE_TYPE (type));
> + gimple_call_set_lhs (call, new_lhs);
> + tree ctor = build_constructor_single (type, 0, new_lhs);
> + gimple *g = gimple_build_assign (lhs, CONSTRUCTOR, ctor);
> + gsi_insert_after (gsi, g, GSI_NEW_STMT);
> + }
> + update_stmt (call);
> +}
> +
> /* Process one statement. If we identify a vector operation, expand it. */
>
> static void
> @@ -1998,8 +2028,22 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
> gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
> if (!stmt)
> {
> - if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
> - expand_vector_conversion (gsi);
> + gcall *call = dyn_cast <gcall *> (gsi_stmt (*gsi));
> + if (!call || !gimple_call_internal_p (call))
> + return;
> + switch (gimple_call_internal_fn (call))
> + {
> + case IFN_VEC_CONVERT:
> + return expand_vector_conversion (gsi);
> + case IFN_FMA:
> + case IFN_FMS:
> + case IFN_FNMA:
> + case IFN_FNMS:
> + if (!direct_internal_fn_supported_p (call, OPTIMIZE_FOR_BOTH))
> + return expand_vec1_fma (gsi);
> + default:
> + break;
> + }
> return;
> }
>
> --
> 2.49.0
>