When we are unlucky operand canonicalization can end up presenting
us with different order, making a possible SLP reduction group
not match up.  The following allows swapping operands in this case.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

        * tree-vect-slp.cc (vect_get_operand_map): Handle commutative
        operands when swapping is requested.
        (vect_build_slp_tree_1): Allow STMT_VINFO_REDUC_IDX differences
        when operand swapping makes them match and request swapping.
        (vect_build_slp_instance): Indicate we have successfully
        discovered a SLP reduction group.

        * gcc.dg/vect/slp-reduc-13.c: New testcase.

Co-authored-by: Eric Botcazou <[email protected]>
---
 gcc/testsuite/gcc.dg/vect/slp-reduc-13.c | 66 ++++++++++++++++++++++++
 gcc/tree-vect-slp.cc                     | 23 ++++++++-
 2 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/slp-reduc-13.c

diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-13.c 
b/gcc/testsuite/gcc.dg/vect/slp-reduc-13.c
new file mode 100644
index 00000000000..00e91fc6251
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-13.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-additional-options "-fgimple" } */
+
+int q[2];
+
+void __GIMPLE (ssa,guessed_local(16535624),startwith("loop"))
+foo (int * r)
+{
+  int i;
+  int sum2;
+  int sum1;
+  int _1;
+  long unsigned int _2;
+  long unsigned int _3;
+  int * _4;
+  int _24;
+  __SIZETYPE__ _6;
+  __SIZETYPE__ _7;
+  int * _8;
+  int _9;
+  int _13;
+  unsigned int _30;
+  unsigned int _31;
+
+  __BB(2,guessed_local(16535624)):
+  goto __BB3(precise(134217728));
+
+  __BB(3,loop_header(1),guessed_local(1057206200)):
+  sum1_5 = __PHI (__BB5: sum1_18, __BB2: 0);
+  sum2_26 = __PHI (__BB5: sum2_19, __BB2: 0);
+  i_28 = __PHI (__BB5: i_20, __BB2: 0);
+  _31 = __PHI (__BB5: _30, __BB2: 64u);
+  _1 = i_28 * 2;
+  _2 = (long unsigned int) _1;
+  _3 = _2 * 4ul;
+  _4 = r_17(D) + _3;
+  _24 = __MEM <int> (_4);
+  /* Deliberately have swapped operands here */
+  sum1_18 = sum1_5 + _24;
+  _13 = _1 + 1;
+  _6 = (__SIZETYPE__) _13;
+  _7 = _6 * 4ul;
+  _8 = r_17(D) + _7;
+  _9 = __MEM <int> (_8);
+  /* versus here.  */
+  sum2_19 = _9 + sum2_26;
+  i_20 = i_28 + 1;
+  _30 = _31 - 1u;
+  if (_30 != 0u)
+    goto __BB5(guessed(132118446));
+  else
+    goto __BB4(guessed(2099282));
+
+  __BB(5,guessed_local(1040670576)):
+  goto __BB3(precise(134217728));
+
+  __BB(4,guessed_local(16535624)):
+  sum1_33 = __PHI (__BB3: sum1_18);
+  sum2_32 = __PHI (__BB3: sum2_19);
+  q[0] = sum1_33;
+  q[1] = sum2_32;
+  return;
+}
+
+/* { dg-final { scan-tree-dump "SLP discovery of size 2 reduction group 
succeeded" "vect" } } */
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index f64f874abff..aa6c3e2e041 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -558,7 +558,8 @@ vect_get_operand_map (const gimple *stmt, bool 
gather_scatter_p = false,
       if (gimple_assign_rhs_code (assign) == COND_EXPR
          && COMPARISON_CLASS_P (gimple_assign_rhs1 (assign)))
        gcc_unreachable ();
-      if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison
+      if ((TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison
+          || commutative_tree_code (gimple_assign_rhs_code (assign)))
          && swap)
        return op1_op0_map;
       if (gather_scatter_p)
@@ -1352,7 +1353,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
                 uniform but only that of the first stmt matters.  */
              && !(first_reduc_idx != -1
                   && STMT_VINFO_REDUC_IDX (stmt_info) != -1
-                  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
+                  && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+             && !(first_reduc_idx != -1
+                  && STMT_VINFO_REDUC_IDX (stmt_info) != -1
+                  && rhs_code.is_tree_code ()
+                  && commutative_tree_code (tree_code (rhs_code))
+                  && first_reduc_idx == 1 - STMT_VINFO_REDUC_IDX (stmt_info)))
            {
              if (dump_enabled_p ())
                {
@@ -1617,6 +1623,15 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char 
*swap,
              && (swap_tree_comparison ((tree_code)first_stmt_code)
                  == (tree_code)rhs_code))
            swap[i] = 1;
+
+         if (i != 0
+             && first_reduc_idx != STMT_VINFO_REDUC_IDX (stmt_info)
+             && first_reduc_idx != -1
+             && STMT_VINFO_REDUC_IDX (stmt_info) != -1
+             && rhs_code.is_tree_code ()
+             && commutative_tree_code (tree_code (rhs_code))
+             && first_reduc_idx == 1 - STMT_VINFO_REDUC_IDX (stmt_info))
+           swap[i] = 1;
        }
 
       matches[i] = true;
@@ -4164,6 +4179,10 @@ vect_build_slp_instance (vec_info *vinfo,
 
          if (dump_enabled_p ())
            {
+             if (kind == slp_inst_kind_reduc_group)
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "SLP discovery of size %d reduction group "
+                                "succeeded\n", group_size);
              dump_printf_loc (MSG_NOTE, vect_location,
                               "Final SLP tree for instance %p:\n",
                               (void *) new_instance);
-- 
2.51.0

Reply via email to