From: Andrei Nichita Tirziu <[email protected]>
The MATCH pattern previously allowed for a maximum number of invariants,
given by the maximum number of elements that could fit in a segment
of the chosen vector mode. For example, if we had elements
that were 16-bits wide, and the chosen mode for vectorization
was VNx8HI, than we could fit at most 8 elements in a segment.
This change allows for any number of invariants, which can be split
across multiple MATCH / NMATCH instructions. These instructions
can then be accumulated by adding extra OR / AND instructions to
obtain the final result.
This would be equivalent to the following:
x1 = IFN_MATCH_EQ (variant, invariant_1);
x2 = IFN_MATCH_EQ (variant, invariant_1);
x3 = IFN_MATCH_EQ (variant, invariant_3);
final_result = x1 || x2 || x3;
(similar for the NMATCH case).
gcc/ChangeLog:
* tree-vect-slp-patterns.cc: Changes to the MATCH
SLP pattern.
Change-Id: I67755646bb7ecf76079b203641f78e47d308f79b
---
gcc/tree-vect-slp-patterns.cc | 265 ++++++++++++++++++++++++++--------
1 file changed, 204 insertions(+), 61 deletions(-)
diff --git a/gcc/tree-vect-slp-patterns.cc b/gcc/tree-vect-slp-patterns.cc
index 1d04f70658ee..e417867d5523 100644
--- a/gcc/tree-vect-slp-patterns.cc
+++ b/gcc/tree-vect-slp-patterns.cc
@@ -1892,95 +1892,238 @@ void match_pattern::build (vec_info *vinfo)
return;
}
- // Create a new SLP node to hold all invariants as scalar operands.
- slp_tree invariant_node = vect_create_new_slp_node (0, CONSTRUCTOR);
- SLP_TREE_DEF_TYPE (invariant_node) = vect_external_def;
- SLP_TREE_VECTYPE (invariant_node) = SLP_TREE_VECTYPE (variant_operand);
- SLP_TREE_REPRESENTATIVE (invariant_node) = nullptr;
-
/* At this point, we know that we have a vector mode and we are
interested in how many elements we can pack in a segment
(usually 128 bits long). */
const unsigned int lanes_in_segment =
GET_MODE_NUNITS (vinfo->vector_mode).coeffs[0];
- // We can't fit more invariants than the number of lanes in a segment.
- if (lanes_in_segment < invariant_operands.size ())
+ /* We can fit at most `lanes_in_segment` invariants in a node, so we might
+ need multiple nodes. */
+ unsigned int total_invariant_nodes =
+ invariant_operands.size () / lanes_in_segment;
+ if (invariant_operands.size () % lanes_in_segment != 0)
+ total_invariant_nodes++;
+
+ auto_vec<gcall*> new_ifn_calls {};
+ auto_vec<slp_tree> new_invariant_nodes {};
+ auto_vec<stmt_vec_info> new_vec_infos {};
+
+ for (unsigned int i = 0; i < total_invariant_nodes; i++)
{
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location,
- "SLP MATCH pattern: Too many invariants (got %d "
- "invariants, maximum for the current data type was %d)."
- "Aborting transformation\n",
- invariant_operands.size (),
- lanes_in_segment);
+ /* The invariants we want to pack on this iteration are in the range
+ [i * lanes_in_segment ... ((i + 1) * lanes_in_segment - 1)] or
+ [i * lanes_in_segment ... invariant_operands.size()]. */
+ const unsigned int index_of_first_invariant = i * lanes_in_segment;
+
+ // Create a new SLP node to hold all invariants as scalar operands.
+ slp_tree invariant_node = vect_create_new_slp_node (0, CONSTRUCTOR);
+ SLP_TREE_DEF_TYPE (invariant_node) = vect_external_def;
+ SLP_TREE_VECTYPE (invariant_node) = SLP_TREE_VECTYPE (variant_operand);
+ SLP_TREE_REPRESENTATIVE (invariant_node) = nullptr;
+
+ /* We have some remaining invariants, but not enough to fill the
+ entire node. This value is only useful in the last iteration. */
+ const std::size_t remaining_invariants =
+ invariant_operands.size () - index_of_first_invariant;
+
+ /* The indices of the invariants to insert in this node depend on the
+ iteration we are in.
+ At the last iteration, we have to fill the lanes in a circular way. The
+ lanes can't just be left empty, since we would then match with zero,
+ which isn't necessarily among our invariants. */
+ for (unsigned int j = 0; j < lanes_in_segment; j++)
+ {
+ const unsigned int index_of_invariant_to_insert =
+ (i == total_invariant_nodes - 1) ?
+ (index_of_first_invariant + j % remaining_invariants) :
+ (index_of_first_invariant + j);
- return;
+ const tree scalar_invariant =
+ SLP_TREE_SCALAR_OPS
(invariant_operands[index_of_invariant_to_insert])[0];
+
+ SLP_TREE_SCALAR_OPS (invariant_node).safe_push (scalar_invariant);
+ }
+
+ /* We need a value here, that ensures that the
+ scalar operands end up vectorized. */
+ SLP_TREE_LANES (invariant_node) = 1;
+
+ /* Build a dummy IFN (the IFN will get its proper arguments
+ in vectorizable_call). */
+ auto_vec<tree> ifn_args {};
+ tree dummy_arg_type = TREE_TYPE (gimple_get_lhs (current_stmt));
+ tree dummy_arg = fold_convert (dummy_arg_type, integer_zero_node);
+
+ ifn_args.safe_push (dummy_arg);
+ ifn_args.safe_push (dummy_arg);
+
+ gcall *ifn_call = gimple_build_call_internal_vec (m_ifn, ifn_args);
+
+ // Create an SSA LHS of the vector mask type and attach it to the call.
+ const tree lhs = make_temp_ssa_name (SLP_TREE_VECTYPE (*m_node),
+ ifn_call,
+ "match_temp_ifn");
+ gimple_call_set_lhs (ifn_call, lhs);
+ gimple_set_location (ifn_call, gimple_location (current_stmt));
+ gimple_call_set_nothrow (ifn_call, true);
+ gimple_set_bb (ifn_call, gimple_bb (current_stmt));
+
+ // Create new vector info for the newly created IFN call.
+ stmt_vec_info new_vec_info = vinfo->add_pattern_stmt (ifn_call,
+ current_vec_info);
+ STMT_VINFO_RELEVANT (new_vec_info) = vect_used_in_scope;
+ STMT_SLP_TYPE (new_vec_info) = pure_slp;
+ STMT_VINFO_VECTYPE (new_vec_info) = SLP_TREE_VECTYPE (*m_node);
+ STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_vec_info) = true;
+
+ new_ifn_calls.safe_push (ifn_call);
+ new_invariant_nodes.safe_push (invariant_node);
+ new_vec_infos.safe_push (new_vec_info);
}
- const std::size_t number_invariants = invariant_operands.size ();
+ /* At this point we have multiple nodes of invariants, let's say N of them
+ For each such node, we have an IFN that takes the form
+ result_i = IFN_MATCH (variant_operand, new_invariant_nodes[i]);
+ with i between [0, N - 1].
- /* Most likely, the number of invariants won't match the number of lanes,
- so we have to fill the lanes in a circular way.
- The lanes can't just be left empty, since we would then match with zero,
- which isn't necessarily among our invariants. */
- for (unsigned int i = 0; i < lanes_in_segment; i++)
+ If we only have one invariant node, then we can directly attach it,
+ together with the variant operand, as children of the root node.
+ Otherwise, we need to to a few extra steps.
+ */
+ if (total_invariant_nodes == 1)
{
- const tree scalar_invariant =
- SLP_TREE_SCALAR_OPS (invariant_operands[i % number_invariants])[0];
- SLP_TREE_SCALAR_OPS (invariant_node).safe_push (scalar_invariant);
+ SLP_TREE_CHILDREN (*m_node).release ();
+
+ SLP_TREE_CHILDREN (*m_node).safe_push (variant_operand);
+ SLP_TREE_REF_COUNT (variant_operand)++;
+
+ SLP_TREE_CHILDREN (*m_node).safe_push (new_invariant_nodes[0]);
+ SLP_TREE_REF_COUNT (new_invariant_nodes[0])++;
+
+ // Attach the new vec info, and designate the root node as a CALL
expression.
+ SLP_TREE_REPRESENTATIVE (*m_node) = new_vec_infos[0];
+ SLP_TREE_CODE (*m_node) = CALL_EXPR;
}
- /* We need a value here, that ensures that the
- scalar operands end up vectorized. */
- SLP_TREE_LANES (invariant_node) = 1;
+ auto_vec<slp_tree> new_match_nodes {};
+
+ if (total_invariant_nodes > 1)
+ {
+ /* Create new "match nodes". Each newly created IFN will form a new
+ internal node, whose children are the variant operand and the IFN's
+ corresponding invariant node. */
+ for (unsigned int i = 0; i < total_invariant_nodes; i++)
+ {
+ slp_tree new_match_node = vect_create_new_slp_node (0, CONSTRUCTOR);
+ SLP_TREE_DEF_TYPE (new_match_node) = vect_internal_def;
+ SLP_TREE_VECTYPE (new_match_node) = SLP_TREE_VECTYPE (*m_node);
+ SLP_TREE_LANES (new_match_node) = 1;
+
+ SLP_TREE_CHILDREN (new_match_node).safe_push (variant_operand);
+ SLP_TREE_REF_COUNT (variant_operand)++;
+
+ SLP_TREE_CHILDREN (new_match_node).safe_push (new_invariant_nodes[i]);
+ SLP_TREE_REF_COUNT (new_invariant_nodes[i])++;
- /* Build a dummy IFN (the IFN will get its proper arguments
- in vectorizable_call). */
- auto_vec<tree> ifn_args {};
- tree dummy_arg_type = TREE_TYPE (gimple_get_lhs (current_stmt));
- tree dummy_arg = fold_convert (dummy_arg_type, integer_zero_node);
+ /* Attach the new vec info, and designate the root node as
+ a CALL expression. */
+ SLP_TREE_REPRESENTATIVE (new_match_node) = new_vec_infos[i];
+ SLP_TREE_CODE (new_match_node) = CALL_EXPR;
+
+ new_match_nodes.safe_push (new_match_node);
+ }
+
+ /* The following loop combines the newly created nodes to give a final
+ result. The loop is executed only when we have at least two invariant
+ nodes.
+ If the "match nodes" were match_node[0 ... N - 1], then:
+ combine_node[0] has children match_node[0] and match_node[1]
+ combine_node[1] has children combine_node[0] and match_node[2]
+ ...
+ combine_node[N - 2] has children combine_node[N - 3] and match_node[N
- 2]
+ m_node has children combine_node[N - 1] and match_node[N - 1].
+ We can achieve such a computation by using accumulators.
+ */
+
+ tree accumulator_lhs = gimple_call_lhs (new_ifn_calls[0]);
+ slp_tree accumulator_combine_node = new_match_nodes[0];
+
+ for (unsigned int i = 0; i < total_invariant_nodes - 1; i++)
+ {
+ // Create an SSA LHS of the vector mask type and attach it to the call.
+ const tree lhs = make_temp_ssa_name (SLP_TREE_VECTYPE (variant_operand),
+ NULL,
+ "match_temp_combine");
+
+ /* Create a statement of the form
+ combine_result = result_of_match_ifn0 OR result_of_match_ifn1. */
+ gassign *result_of_match_combine =
+ gimple_build_assign (lhs,
+ m_ifn == IFN_MATCH_EQ ? BIT_IOR_EXPR :
BIT_AND_EXPR,
+ accumulator_lhs,
+ gimple_call_lhs (new_ifn_calls[i + 1]));
+
+ gimple_set_location (result_of_match_combine,
+ gimple_location (current_stmt));
+ gimple_set_bb (result_of_match_combine, gimple_bb (current_stmt));
+
+ accumulator_lhs = lhs;
+
+ // Create new vector info for the newly created assignment statement.
+ stmt_vec_info new_vec_info = vinfo->add_pattern_stmt
(result_of_match_combine,
+ current_vec_info);
+ STMT_VINFO_RELEVANT (new_vec_info) = vect_used_in_scope;
+ STMT_SLP_TYPE (new_vec_info) = pure_slp;
+ STMT_VINFO_VECTYPE (new_vec_info) = SLP_TREE_VECTYPE (*m_node);
+ STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_vec_info) = true;
+
+ if (i < total_invariant_nodes - 2)
+ {
+ /* If we haven't reached the last iteration, we keep creating new
+ combine nodes whose children are the current match node and the
+ previous combine node. */
+ slp_tree new_combine_node = vect_create_new_slp_node (0, CONSTRUCTOR);
+ SLP_TREE_DEF_TYPE (new_combine_node) = vect_internal_def;
+ SLP_TREE_VECTYPE (new_combine_node) = SLP_TREE_VECTYPE (*m_node);
+ SLP_TREE_LANES (new_combine_node) = 1;
+ SLP_TREE_REPRESENTATIVE (new_combine_node) = new_vec_info;
- ifn_args.safe_push (dummy_arg);
- ifn_args.safe_push (dummy_arg);
+ SLP_TREE_CHILDREN (new_combine_node).safe_push
(accumulator_combine_node);
+ SLP_TREE_REF_COUNT (accumulator_combine_node)++;
- gcall *ifn_call = gimple_build_call_internal_vec (m_ifn, ifn_args);
+ SLP_TREE_CHILDREN (new_combine_node).safe_push (new_match_nodes[i + 1]);
+ SLP_TREE_REF_COUNT (new_match_nodes[i + 1])++;
- // Create an SSA LHS of the vector mask type and attach it to the call.
- const tree lhs = make_temp_ssa_name (SLP_TREE_VECTYPE (*m_node),
- ifn_call,
- "match_temp_ifn");
- gimple_call_set_lhs (ifn_call, lhs);
- gimple_set_location (ifn_call, gimple_location (current_stmt));
- gimple_call_set_nothrow (ifn_call, true);
- gimple_set_bb (ifn_call, gimple_bb (current_stmt));
+ SLP_TREE_CODE (new_combine_node) =
+ m_ifn == IFN_MATCH_EQ ? BIT_IOR_EXPR : BIT_AND_EXPR;
- // Create new vector info for the newly created IFN call.
- stmt_vec_info new_vec_info = vinfo->add_pattern_stmt (ifn_call,
- current_vec_info);
- STMT_VINFO_RELEVANT (new_vec_info) = vect_used_in_scope;
- STMT_SLP_TYPE (new_vec_info) = pure_slp;
- STMT_VINFO_VECTYPE (new_vec_info) = SLP_TREE_VECTYPE (*m_node);
- STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_vec_info) = true;
+ accumulator_combine_node = new_combine_node;
+ }
+ else
+ {
+ /* In the last iteration, we add the last combine node we created and
the
+ last match node as the children of the root node. */
+ SLP_TREE_CHILDREN (*m_node).release ();
- /* We'll replace the node's children with the variant operand node
- and the newly created invariants node. */
- SLP_TREE_CHILDREN (*m_node).release ();
+ SLP_TREE_CHILDREN (*m_node).safe_push (accumulator_combine_node);
+ SLP_TREE_REF_COUNT (accumulator_combine_node)++;
- SLP_TREE_CHILDREN (*m_node).safe_push (variant_operand);
- SLP_TREE_REF_COUNT (variant_operand)++;
+ SLP_TREE_CHILDREN (*m_node).safe_push (new_match_nodes[i + 1]);
+ SLP_TREE_REF_COUNT (new_match_nodes[i + 1])++;
- SLP_TREE_CHILDREN (*m_node).safe_push (invariant_node);
- SLP_TREE_REF_COUNT (invariant_node)++;
+ SLP_TREE_CODE (*m_node) =
+ m_ifn == IFN_MATCH_EQ ? BIT_IOR_EXPR : BIT_AND_EXPR;
+ SLP_TREE_REPRESENTATIVE (*m_node) = new_vec_info;
+ }
+ }
+ }
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"SLP MATCH pattern: Successfully built replacement for "
"statement %G",
current_stmt);
-
- SLP_TREE_REPRESENTATIVE (*m_node) = new_vec_info;
- SLP_TREE_CODE (*m_node) = CALL_EXPR;
}
vect_pattern*
--
2.52.0