https://gcc.gnu.org/g:f788324c724240b6280243119a6fa30f3d37b467
commit r16-1778-gf788324c724240b6280243119a6fa30f3d37b467 Author: Richard Biener <rguent...@suse.de> Date: Mon Jun 30 11:23:49 2025 +0200 Handle SLP build operand swapping for ternaries and calls The following adds SLP build operand swapping for .FMA which is a ternary operator and a call. The current code only handles binary operators in assignments, thus the patch extends this to handle both calls and assignments as well as binary and ternary operators. * tree-vect-slp.cc (vect_build_slp_2): Handle ternary and call operators when swapping operands. * gcc.target/i386/vect-pr82426.c: Pass explicit -ffp-contract=fast. * gcc.target/i386/vect-pr82426-2.c: New testcase variant with -ffp-contract=on. Diff: --- gcc/testsuite/gcc.target/i386/vect-pr82426-2.c | 31 +++++++++++++++++++++ gcc/testsuite/gcc.target/i386/vect-pr82426.c | 2 +- gcc/tree-vect-slp.cc | 37 +++++++++++++++++++------- 3 files changed, 60 insertions(+), 10 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c b/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c new file mode 100644 index 000000000000..525940866ad7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c @@ -0,0 +1,31 @@ +/* i?86 does not have V2SF, x32 does though. */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O3 -mavx -mfma -ffp-contract=on" } */ + +struct Matrix +{ + float m11; + float m12; + float m21; + float m22; + float dx; + float dy; +}; + +struct Matrix multiply(const struct Matrix *a, const struct Matrix *b) +{ + struct Matrix out; + out.m11 = a->m11*b->m11 + a->m12*b->m21; + out.m12 = a->m11*b->m12 + a->m12*b->m22; + out.m21 = a->m21*b->m11 + a->m22*b->m21; + out.m22 = a->m21*b->m12 + a->m22*b->m22; + + out.dx = a->dx*b->m11 + a->dy*b->m21 + b->dx; + out.dy = a->dx*b->m12 + a->dy*b->m22 + b->dy; + return out; +} + +/* The whole kernel should be vectorized with V4SF and V2SF operations. */ +/* { dg-final { scan-assembler-times "vadd" 1 } } */ +/* { dg-final { scan-assembler-times "vmul" 2 } } */ +/* { dg-final { scan-assembler-times "vfma" 2 } } */ diff --git a/gcc/testsuite/gcc.target/i386/vect-pr82426.c b/gcc/testsuite/gcc.target/i386/vect-pr82426.c index 03b10adff9b6..8ce8fe78a91b 100644 --- a/gcc/testsuite/gcc.target/i386/vect-pr82426.c +++ b/gcc/testsuite/gcc.target/i386/vect-pr82426.c @@ -1,6 +1,6 @@ /* i?86 does not have V2SF, x32 does though. */ /* { dg-do compile { target { ! ia32 } } } */ -/* { dg-options "-O3 -mavx -mfma" } */ +/* { dg-options "-O3 -mavx -mfma -ffp-contract=fast" } */ struct Matrix { diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index 1a703a9bae4a..7a828cadbd53 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -2849,9 +2849,10 @@ out: && matches[0] /* ??? For COND_EXPRs we can swap the comparison operands as well as the arms under some constraints. */ - && nops == 2 + && (nops == 2 || nops == 3) && oprnds_info[1]->first_dt == vect_internal_def - && is_gimple_assign (stmt_info->stmt) + && (is_gimple_assign (stmt_info->stmt) + || is_gimple_call (stmt_info->stmt)) /* Swapping operands for reductions breaks assumptions later on. */ && STMT_VINFO_REDUC_IDX (stmt_info) == -1) { @@ -2866,14 +2867,32 @@ out: continue; stmt_vec_info stmt_info = stmts[j]; /* Verify if we can swap operands of this stmt. */ - gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); - if (!stmt - || !commutative_tree_code (gimple_assign_rhs_code (stmt))) + if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) { - if (!swap_not_matching) - goto fail; - swap_not_matching = false; - break; + tree_code code = gimple_assign_rhs_code (stmt); + if (! commutative_tree_code (code) + && ! commutative_ternary_tree_code (code)) + { + if (!swap_not_matching) + goto fail; + swap_not_matching = false; + break; + } + } + else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt)) + { + internal_fn fn = (gimple_call_internal_p (call) + ? gimple_call_internal_fn (call) + : IFN_LAST); + if ((! commutative_binary_fn_p (fn) + && ! commutative_ternary_fn_p (fn)) + || first_commutative_argument (fn) != 0) + { + if (!swap_not_matching) + goto fail; + swap_not_matching = false; + break; + } } } }