From: Andrei Nichita Tirziu <[email protected]>

The MATCH pattern previously allowed for a maximum number of invariants,
given by the maximum number of elements that could fit in a segment
of the chosen vector mode. For example, if we had elements
that were 16-bits wide, and the chosen mode for vectorization
was VNx8HI, than we could fit at most 8 elements in a segment.

This change allows for any number of invariants, which can be split
across multiple MATCH / NMATCH instructions. These instructions
can then be accumulated by adding extra OR / AND instructions to
obtain the final result.

This would be equivalent to the following:
    x1 = IFN_MATCH_EQ (variant, invariant_1);
    x2 = IFN_MATCH_EQ (variant, invariant_1);
    x3 = IFN_MATCH_EQ (variant, invariant_3);
    final_result = x1 || x2 || x3;
(similar for the NMATCH case).

gcc/ChangeLog:

        * tree-vect-slp-patterns.cc: Changes to the MATCH
                                     SLP pattern.

Change-Id: I67755646bb7ecf76079b203641f78e47d308f79b
---
 gcc/tree-vect-slp-patterns.cc | 265 ++++++++++++++++++++++++++--------
 1 file changed, 204 insertions(+), 61 deletions(-)

diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
index 1d04f70658ee..e417867d5523 100644
--- a/gcc/tree-vect-slp-patterns.cc
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -1892,95 +1892,238 @@ void match_pattern::build (vec_info *vinfo)
     return;
   }
 
-  // Create a new SLP node to hold all invariants as scalar operands.
-  slp_tree invariant_node = vect_create_new_slp_node (0, CONSTRUCTOR);
-  SLP_TREE_DEF_TYPE (invariant_node) = vect_external_def;
-  SLP_TREE_VECTYPE (invariant_node) = SLP_TREE_VECTYPE (variant_operand);
-  SLP_TREE_REPRESENTATIVE (invariant_node) = nullptr;
-
   /* At this point, we know that we have a vector mode and we are
      interested in how many elements we can pack in a segment
      (usually 128 bits long).  */
   const unsigned int lanes_in_segment =
        GET_MODE_NUNITS (vinfo->vector_mode).coeffs[0];
 
-  // We can't fit more invariants than the number of lanes in a segment.
-  if (lanes_in_segment < invariant_operands.size ())
+  /* We can fit at most `lanes_in_segment` invariants in a node, so we might
+     need multiple nodes.  */
+  unsigned int total_invariant_nodes =
+       invariant_operands.size () / lanes_in_segment;
+  if (invariant_operands.size () % lanes_in_segment != 0)
+    total_invariant_nodes++;
+
+  auto_vec<gcall*> new_ifn_calls {};
+  auto_vec<slp_tree> new_invariant_nodes {};
+  auto_vec<stmt_vec_info> new_vec_infos {};
+
+  for (unsigned int i = 0; i < total_invariant_nodes; i++)
   {
-    if (dump_enabled_p ())
-      dump_printf_loc (MSG_NOTE, vect_location,
-                      "SLP MATCH pattern: Too many invariants (got %d "
-                      "invariants, maximum for the current data type was %d)."
-                      "Aborting transformation\n",
-                      invariant_operands.size (),
-                      lanes_in_segment);
+    /* The invariants we want to pack on this iteration are in the range
+       [i * lanes_in_segment ... ((i + 1) * lanes_in_segment - 1)] or
+       [i * lanes_in_segment ... invariant_operands.size()].  */
+    const unsigned int index_of_first_invariant = i * lanes_in_segment;
+
+    // Create a new SLP node to hold all invariants as scalar operands.
+    slp_tree invariant_node = vect_create_new_slp_node (0, CONSTRUCTOR);
+    SLP_TREE_DEF_TYPE (invariant_node) = vect_external_def;
+    SLP_TREE_VECTYPE (invariant_node) = SLP_TREE_VECTYPE (variant_operand);
+    SLP_TREE_REPRESENTATIVE (invariant_node) = nullptr;
+
+    /* We have some remaining invariants, but not enough to fill the
+       entire node. This value is only useful in the last iteration.  */
+    const std::size_t remaining_invariants =
+       invariant_operands.size () - index_of_first_invariant;
+
+    /* The indices of the invariants to insert in this node depend on the
+       iteration we are in.
+       At the last iteration, we have to fill the lanes in a circular way. The
+       lanes can't just be left empty, since we would then match with zero,
+       which isn't necessarily among our invariants.  */
+    for (unsigned int j = 0; j < lanes_in_segment; j++)
+    {
+      const unsigned int index_of_invariant_to_insert =
+       (i == total_invariant_nodes - 1) ?
+            (index_of_first_invariant + j % remaining_invariants) :
+            (index_of_first_invariant + j);
 
-    return;
+      const tree scalar_invariant =
+       SLP_TREE_SCALAR_OPS 
(invariant_operands[index_of_invariant_to_insert])[0];
+
+      SLP_TREE_SCALAR_OPS (invariant_node).safe_push (scalar_invariant);
+    }
+
+    /* We need a value here, that ensures that the
+       scalar operands end up vectorized.  */
+    SLP_TREE_LANES (invariant_node) = 1;
+
+    /* Build a dummy IFN (the IFN will get its proper arguments
+       in vectorizable_call).  */
+    auto_vec<tree> ifn_args {};
+    tree dummy_arg_type = TREE_TYPE (gimple_get_lhs (current_stmt));
+    tree dummy_arg = fold_convert (dummy_arg_type, integer_zero_node);
+
+    ifn_args.safe_push (dummy_arg);
+    ifn_args.safe_push (dummy_arg);
+
+    gcall *ifn_call = gimple_build_call_internal_vec (m_ifn, ifn_args);
+
+    // Create an SSA LHS of the vector mask type and attach it to the call.
+    const tree lhs = make_temp_ssa_name (SLP_TREE_VECTYPE (*m_node),
+                                        ifn_call,
+                                        "match_temp_ifn");
+    gimple_call_set_lhs (ifn_call, lhs);
+    gimple_set_location (ifn_call, gimple_location (current_stmt));
+    gimple_call_set_nothrow (ifn_call, true);
+    gimple_set_bb (ifn_call, gimple_bb (current_stmt));
+
+    // Create new vector info for the newly created IFN call.
+    stmt_vec_info new_vec_info = vinfo->add_pattern_stmt (ifn_call,
+                                                         current_vec_info);
+    STMT_VINFO_RELEVANT (new_vec_info) = vect_used_in_scope;
+    STMT_SLP_TYPE (new_vec_info) = pure_slp;
+    STMT_VINFO_VECTYPE (new_vec_info) = SLP_TREE_VECTYPE (*m_node);
+    STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_vec_info) = true;
+
+    new_ifn_calls.safe_push (ifn_call);
+    new_invariant_nodes.safe_push (invariant_node);
+    new_vec_infos.safe_push (new_vec_info);
   }
 
-  const std::size_t number_invariants = invariant_operands.size ();
+  /* At this point we have multiple nodes of invariants, let's say N of them
+     For each such node, we have an IFN that takes the form
+       result_i = IFN_MATCH (variant_operand, new_invariant_nodes[i]);
+     with i between [0, N - 1].
 
-  /* Most likely, the number of invariants won't match the number of lanes,
-     so we have to fill the lanes in a circular way.
-     The lanes can't just be left empty, since we would then match with zero,
-     which isn't necessarily among our invariants.  */
-  for (unsigned int i = 0; i < lanes_in_segment; i++)
+     If we only have one invariant node, then we can directly attach it,
+     together with the variant operand, as children of the root node.
+     Otherwise, we need to to a few extra steps.
+  */
+  if (total_invariant_nodes == 1)
   {
-    const tree scalar_invariant =
-       SLP_TREE_SCALAR_OPS (invariant_operands[i % number_invariants])[0];
-    SLP_TREE_SCALAR_OPS (invariant_node).safe_push (scalar_invariant);
+    SLP_TREE_CHILDREN (*m_node).release ();
+
+    SLP_TREE_CHILDREN (*m_node).safe_push (variant_operand);
+    SLP_TREE_REF_COUNT (variant_operand)++;
+
+    SLP_TREE_CHILDREN (*m_node).safe_push (new_invariant_nodes[0]);
+    SLP_TREE_REF_COUNT (new_invariant_nodes[0])++;
+
+    // Attach the new vec info, and designate the root node as a CALL 
expression.
+    SLP_TREE_REPRESENTATIVE (*m_node) = new_vec_infos[0];
+    SLP_TREE_CODE (*m_node) = CALL_EXPR;
   }
 
-  /* We need a value here, that ensures that the
-     scalar operands end up vectorized.  */
-  SLP_TREE_LANES (invariant_node) = 1;
+  auto_vec<slp_tree> new_match_nodes {};
+
+  if (total_invariant_nodes > 1)
+  {
+    /* Create new "match nodes". Each newly created IFN will form a new
+       internal node, whose children are the variant operand and the IFN's
+       corresponding invariant node.  */ 
+    for (unsigned int i = 0; i < total_invariant_nodes; i++)
+    {
+      slp_tree new_match_node = vect_create_new_slp_node (0, CONSTRUCTOR);
+      SLP_TREE_DEF_TYPE (new_match_node) = vect_internal_def;
+      SLP_TREE_VECTYPE (new_match_node) = SLP_TREE_VECTYPE (*m_node);
+      SLP_TREE_LANES (new_match_node) = 1;
+
+      SLP_TREE_CHILDREN (new_match_node).safe_push (variant_operand);
+      SLP_TREE_REF_COUNT (variant_operand)++;
+
+      SLP_TREE_CHILDREN (new_match_node).safe_push (new_invariant_nodes[i]);
+      SLP_TREE_REF_COUNT (new_invariant_nodes[i])++;
 
-  /* Build a dummy IFN (the IFN will get its proper arguments
-     in vectorizable_call).  */
-  auto_vec<tree> ifn_args {};
-  tree dummy_arg_type = TREE_TYPE (gimple_get_lhs (current_stmt));
-  tree dummy_arg = fold_convert (dummy_arg_type, integer_zero_node);
+      /* Attach the new vec info, and designate the root node as
+        a CALL expression.  */
+      SLP_TREE_REPRESENTATIVE (new_match_node) = new_vec_infos[i];
+      SLP_TREE_CODE (new_match_node) = CALL_EXPR;
+
+      new_match_nodes.safe_push (new_match_node);
+    }
+
+    /* The following loop combines the newly created nodes to give a final
+       result. The loop is executed only when we have at least two invariant
+       nodes.
+       If the "match nodes" were match_node[0 ... N - 1], then:
+        combine_node[0] has children match_node[0] and match_node[1]
+        combine_node[1] has children combine_node[0] and match_node[2]
+        ...
+        combine_node[N - 2] has children combine_node[N - 3] and match_node[N 
- 2]
+        m_node has children combine_node[N - 1] and match_node[N - 1].
+       We can achieve such a computation by using accumulators.
+    */
+
+    tree accumulator_lhs = gimple_call_lhs (new_ifn_calls[0]);
+    slp_tree accumulator_combine_node = new_match_nodes[0];
+
+    for (unsigned int i = 0; i < total_invariant_nodes - 1; i++)
+    {
+      // Create an SSA LHS of the vector mask type and attach it to the call.
+      const tree lhs = make_temp_ssa_name (SLP_TREE_VECTYPE (variant_operand),
+                                          NULL,
+                                          "match_temp_combine");
+
+      /* Create a statement of the form
+        combine_result = result_of_match_ifn0 OR result_of_match_ifn1.  */
+      gassign *result_of_match_combine =
+       gimple_build_assign (lhs,
+                            m_ifn == IFN_MATCH_EQ ? BIT_IOR_EXPR : 
BIT_AND_EXPR,
+                            accumulator_lhs,
+                            gimple_call_lhs (new_ifn_calls[i + 1]));
+
+      gimple_set_location (result_of_match_combine,
+                          gimple_location (current_stmt));
+      gimple_set_bb (result_of_match_combine, gimple_bb (current_stmt));
+
+      accumulator_lhs = lhs;
+
+      // Create new vector info for the newly created assignment statement.
+      stmt_vec_info new_vec_info = vinfo->add_pattern_stmt 
(result_of_match_combine,
+                                                           current_vec_info);
+      STMT_VINFO_RELEVANT (new_vec_info) = vect_used_in_scope;
+      STMT_SLP_TYPE (new_vec_info) = pure_slp;
+      STMT_VINFO_VECTYPE (new_vec_info) = SLP_TREE_VECTYPE (*m_node);
+      STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_vec_info) = true;
+
+      if (i < total_invariant_nodes - 2)
+      {
+       /* If we haven't reached the last iteration, we keep creating new
+          combine nodes whose children are the current match node and the
+          previous combine node.  */
+       slp_tree new_combine_node = vect_create_new_slp_node (0, CONSTRUCTOR);
+       SLP_TREE_DEF_TYPE (new_combine_node) = vect_internal_def;
+       SLP_TREE_VECTYPE (new_combine_node) = SLP_TREE_VECTYPE (*m_node);
+       SLP_TREE_LANES (new_combine_node) = 1;
+       SLP_TREE_REPRESENTATIVE (new_combine_node) = new_vec_info;
 
-  ifn_args.safe_push (dummy_arg);
-  ifn_args.safe_push (dummy_arg);
+       SLP_TREE_CHILDREN (new_combine_node).safe_push 
(accumulator_combine_node);
+       SLP_TREE_REF_COUNT (accumulator_combine_node)++;
 
-  gcall *ifn_call = gimple_build_call_internal_vec (m_ifn, ifn_args);
+       SLP_TREE_CHILDREN (new_combine_node).safe_push (new_match_nodes[i + 1]);
+       SLP_TREE_REF_COUNT (new_match_nodes[i + 1])++;
 
-  // Create an SSA LHS of the vector mask type and attach it to the call.
-  const tree lhs = make_temp_ssa_name (SLP_TREE_VECTYPE (*m_node),
-                                       ifn_call,
-                                       "match_temp_ifn");
-  gimple_call_set_lhs (ifn_call, lhs);
-  gimple_set_location (ifn_call, gimple_location (current_stmt));
-  gimple_call_set_nothrow (ifn_call, true);
-  gimple_set_bb (ifn_call, gimple_bb (current_stmt));
+       SLP_TREE_CODE (new_combine_node) =
+           m_ifn == IFN_MATCH_EQ ? BIT_IOR_EXPR : BIT_AND_EXPR;
 
-  // Create new vector info for the newly created IFN call.
-  stmt_vec_info new_vec_info = vinfo->add_pattern_stmt (ifn_call,
-                                                       current_vec_info);
-  STMT_VINFO_RELEVANT (new_vec_info) = vect_used_in_scope;
-  STMT_SLP_TYPE (new_vec_info) = pure_slp;
-  STMT_VINFO_VECTYPE (new_vec_info) = SLP_TREE_VECTYPE (*m_node);
-  STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_vec_info) = true;
+       accumulator_combine_node = new_combine_node;
+      }
+      else
+      {
+       /* In the last iteration, we add the last combine node we created and 
the
+          last match node as the children of the root node.  */
+       SLP_TREE_CHILDREN (*m_node).release ();
 
-  /* We'll replace the node's children with the variant operand node
-     and the newly created invariants node.  */
-  SLP_TREE_CHILDREN (*m_node).release ();
+       SLP_TREE_CHILDREN (*m_node).safe_push (accumulator_combine_node);
+       SLP_TREE_REF_COUNT (accumulator_combine_node)++;
 
-  SLP_TREE_CHILDREN (*m_node).safe_push (variant_operand);
-  SLP_TREE_REF_COUNT (variant_operand)++;
+       SLP_TREE_CHILDREN (*m_node).safe_push (new_match_nodes[i + 1]);
+       SLP_TREE_REF_COUNT (new_match_nodes[i + 1])++;
 
-  SLP_TREE_CHILDREN (*m_node).safe_push (invariant_node);
-  SLP_TREE_REF_COUNT (invariant_node)++;
+       SLP_TREE_CODE (*m_node) =
+           m_ifn == IFN_MATCH_EQ ? BIT_IOR_EXPR : BIT_AND_EXPR;
+       SLP_TREE_REPRESENTATIVE (*m_node) = new_vec_info;
+      }
+    }
+  }
 
   if (dump_enabled_p ())
       dump_printf_loc (MSG_NOTE, vect_location,
                       "SLP MATCH pattern: Successfully built replacement for "
                       "statement %G",
                       current_stmt);
-
-  SLP_TREE_REPRESENTATIVE (*m_node) = new_vec_info;
-  SLP_TREE_CODE (*m_node) = CALL_EXPR;
 }
 
 vect_pattern*
-- 
2.52.0

Reply via email to