https://gcc.gnu.org/g:f788324c724240b6280243119a6fa30f3d37b467

commit r16-1778-gf788324c724240b6280243119a6fa30f3d37b467
Author: Richard Biener <rguent...@suse.de>
Date:   Mon Jun 30 11:23:49 2025 +0200

    Handle SLP build operand swapping for ternaries and calls
    
    The following adds SLP build operand swapping for .FMA which is
    a ternary operator and a call.  The current code only handles
    binary operators in assignments, thus the patch extends this to
    handle both calls and assignments as well as binary and ternary
    operators.
    
            * tree-vect-slp.cc (vect_build_slp_2): Handle ternary
            and call operators when swapping operands.
    
            * gcc.target/i386/vect-pr82426.c: Pass explicit -ffp-contract=fast.
            * gcc.target/i386/vect-pr82426-2.c: New testcase variant with
            -ffp-contract=on.

Diff:
---
 gcc/testsuite/gcc.target/i386/vect-pr82426-2.c | 31 +++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/vect-pr82426.c   |  2 +-
 gcc/tree-vect-slp.cc                           | 37 +++++++++++++++++++-------
 3 files changed, 60 insertions(+), 10 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c 
b/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c
new file mode 100644
index 000000000000..525940866ad7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vect-pr82426-2.c
@@ -0,0 +1,31 @@
+/* i?86 does not have V2SF, x32 does though.  */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O3 -mavx -mfma -ffp-contract=on" } */
+
+struct Matrix
+{
+  float m11;
+  float m12;
+  float m21;
+  float m22;
+  float dx;
+  float dy;
+};
+
+struct Matrix multiply(const struct Matrix *a, const struct Matrix *b)
+{
+  struct Matrix out;
+  out.m11 = a->m11*b->m11 + a->m12*b->m21;
+  out.m12 = a->m11*b->m12 + a->m12*b->m22;
+  out.m21 = a->m21*b->m11 + a->m22*b->m21;
+  out.m22 = a->m21*b->m12 + a->m22*b->m22;
+
+  out.dx = a->dx*b->m11  + a->dy*b->m21 + b->dx;
+  out.dy = a->dx*b->m12  + a->dy*b->m22 + b->dy;
+  return out;
+}
+
+/* The whole kernel should be vectorized with V4SF and V2SF operations.  */
+/* { dg-final { scan-assembler-times "vadd" 1 } } */
+/* { dg-final { scan-assembler-times "vmul" 2 } } */
+/* { dg-final { scan-assembler-times "vfma" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/vect-pr82426.c 
b/gcc/testsuite/gcc.target/i386/vect-pr82426.c
index 03b10adff9b6..8ce8fe78a91b 100644
--- a/gcc/testsuite/gcc.target/i386/vect-pr82426.c
+++ b/gcc/testsuite/gcc.target/i386/vect-pr82426.c
@@ -1,6 +1,6 @@
 /* i?86 does not have V2SF, x32 does though.  */
 /* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O3 -mavx -mfma" } */
+/* { dg-options "-O3 -mavx -mfma -ffp-contract=fast" } */
 
 struct Matrix
 {
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 1a703a9bae4a..7a828cadbd53 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -2849,9 +2849,10 @@ out:
          && matches[0]
          /* ???  For COND_EXPRs we can swap the comparison operands
             as well as the arms under some constraints.  */
-         && nops == 2
+         && (nops == 2 || nops == 3)
          && oprnds_info[1]->first_dt == vect_internal_def
-         && is_gimple_assign (stmt_info->stmt)
+         && (is_gimple_assign (stmt_info->stmt)
+             || is_gimple_call (stmt_info->stmt))
          /* Swapping operands for reductions breaks assumptions later on.  */
          && STMT_VINFO_REDUC_IDX (stmt_info) == -1)
        {
@@ -2866,14 +2867,32 @@ out:
                    continue;
                  stmt_vec_info stmt_info = stmts[j];
                  /* Verify if we can swap operands of this stmt.  */
-                 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
-                 if (!stmt
-                     || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
+                 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
                    {
-                     if (!swap_not_matching)
-                       goto fail;
-                     swap_not_matching = false;
-                     break;
+                     tree_code code = gimple_assign_rhs_code (stmt);
+                     if (! commutative_tree_code (code)
+                         && ! commutative_ternary_tree_code (code))
+                       {
+                         if (!swap_not_matching)
+                           goto fail;
+                         swap_not_matching = false;
+                         break;
+                       }
+                   }
+                 else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
+                   {
+                     internal_fn fn = (gimple_call_internal_p (call)
+                                       ? gimple_call_internal_fn (call)
+                                       : IFN_LAST);
+                     if ((! commutative_binary_fn_p (fn)
+                          && ! commutative_ternary_fn_p (fn))
+                         || first_commutative_argument (fn) != 0)
+                       {
+                         if (!swap_not_matching)
+                           goto fail;
+                         swap_not_matching = false;
+                         break;
+                       }
                    }
                }
            }

Reply via email to