[PATCH] Use STMT_VINFO_REDUC_IDX instead of recomputing it

2019-10-25 Thread Richard Biener


This is a cleanup.  The cond-reduction restriction can go,
the fold-left one stays (it cannot handle more than one stmt in
the cycle - in the future when we get partial loop vectorization
generic code would handle duplicating of scalar code parts, they'd
simply stay single-lane SLP graph parts).

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-10-25  Richard Biener  

* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
STMT_VINFO_REDUC_IDX from the actual stmt.
(vect_transform_reduction): Likewise.
(vectorizable_reduction): Compute the reduction chain length,
do not recompute the reduction operand index.  Remove no longer
necessary restriction for condition reduction chains.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 277441)
+++ gcc/tree-vect-loop.c(working copy)
@@ -4263,9 +4263,9 @@ vect_create_epilog_for_reduction (stmt_v
 (CCOMPARE).  The then and else values mirror the main VEC_COND_EXPR:
 the reduction phi corresponds to NEW_PHI_TREE and the new values
 correspond to INDEX_BEFORE_INCR.  */
-  gcc_assert (STMT_VINFO_REDUC_IDX (reduc_info) >= 1);
+  gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1);
   tree index_cond_expr;
-  if (STMT_VINFO_REDUC_IDX (reduc_info) == 2)
+  if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
  ccompare, indx_before_incr, new_phi_tree);
   else
@@ -5720,19 +5720,21 @@ vectorizable_reduction (stmt_vec_info st
   gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
   gphi *reduc_def_phi = as_a  (phi_info->stmt);
 
-  /* Verify following REDUC_IDX from the latch def leads us back to the PHI.  
*/
+  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
+ and compute the reduction chain length.  */
   tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
  loop_latch_edge (loop));
+  unsigned reduc_chain_length = 0;
   while (reduc_def != PHI_RESULT (reduc_def_phi))
 {
   stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
   def = vect_stmt_to_vectorize (def);
   gcc_assert (STMT_VINFO_REDUC_IDX (def) != -1);
   reduc_def = gimple_op (def->stmt, 1 + STMT_VINFO_REDUC_IDX (def));
+  reduc_chain_length++;
 }
 
   reduc_def = PHI_RESULT (reduc_def_phi);
-  int reduc_index = -1;
   for (i = 0; i < op_type; i++)
 {
   tree op = gimple_op (stmt, i + 1);
@@ -5753,7 +5755,6 @@ vectorizable_reduction (stmt_vec_info st
   if ((dt == vect_reduction_def || dt == vect_nested_cycle)
  && op == reduc_def)
{
- reduc_index = i;
  continue;
}
 
@@ -5792,10 +5793,6 @@ vectorizable_reduction (stmt_vec_info st
   if (!vectype_in)
 vectype_in = vectype_out;
   STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
-  /* For the SSA cycle we store on each participating stmt the operand index
- where the cycle continues.  Store the one relevant for the actual
- operation in the reduction meta.  */
-  STMT_VINFO_REDUC_IDX (reduc_info) = reduc_index;
 
   enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
   STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
@@ -5805,28 +5802,8 @@ vectorizable_reduction (stmt_vec_info st
   if (slp_node)
return false;
 
-  /* TODO: We can't yet handle reduction chains, since we need to treat
-each COND_EXPR in the chain specially, not just the last one.
-E.g. for:
-
-   x_1 = PHI 
-   x_2 = a_2 ? ... : x_1;
-   x_3 = a_3 ? ... : x_2;
-
-we're interested in the last element in x_3 for which a_2 || a_3
-is true, whereas the current reduction chain handling would
-vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
-as a reduction operation.  */
-  if (reduc_index == -1)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"conditional reduction chains not supported\n");
- return false;
-   }
-
   /* When the condition uses the reduction value in the condition, fail.  
*/
-  if (reduc_index == 0)
+  if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5995,17 +5972,17 @@ vectorizable_reduction (stmt_vec_info st
 outer-loop vectorization is safe.  */
   if (needs_fold_left_reduction_p (scalar_type, orig_code))
{
- STMT_VINFO_REDUC_TYPE (reduc_info)
-   = reduction_type = FOLD_LEFT_REDUCTION;
- /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
-directy used in stmt.  */
- if

Re: [PATCH] Use STMT_VINFO_REDUC_IDX instead of recomputing it

2019-10-28 Thread Richard Biener
On Fri, 25 Oct 2019, Richard Biener wrote:

> 
> This is a cleanup.  The cond-reduction restriction can go,
> the fold-left one stays (it cannot handle more than one stmt in
> the cycle - in the future when we get partial loop vectorization
> generic code would handle duplicating of scalar code parts, they'd
> simply stay single-lane SLP graph parts).
> 
> Bootstrap and regtest running on x86_64-unknown-linux-gnu.

The following variant is what I have applied.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2019-10-28  Richard Biener  

* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
STMT_VINFO_REDUC_IDX from the actual stmt.
(vect_transform_reduction): Likewise.
(vectorizable_reduction): Compute the reduction chain length,
do not recompute the reduction operand index.  Remove no longer
necessary restriction for condition reduction chains.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 277504)
+++ gcc/tree-vect-loop.c(working copy)
@@ -4263,9 +4263,9 @@ vect_create_epilog_for_reduction (stmt_v
 (CCOMPARE).  The then and else values mirror the main VEC_COND_EXPR:
 the reduction phi corresponds to NEW_PHI_TREE and the new values
 correspond to INDEX_BEFORE_INCR.  */
-  gcc_assert (STMT_VINFO_REDUC_IDX (reduc_info) >= 1);
+  gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1);
   tree index_cond_expr;
-  if (STMT_VINFO_REDUC_IDX (reduc_info) == 2)
+  if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
  ccompare, indx_before_incr, new_phi_tree);
   else
@@ -5720,19 +5720,24 @@ vectorizable_reduction (stmt_vec_info st
   gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
   gphi *reduc_def_phi = as_a  (phi_info->stmt);
 
-  /* Verify following REDUC_IDX from the latch def leads us back to the PHI.  
*/
+  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
+ and compute the reduction chain length.  */
   tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
  loop_latch_edge (loop));
+  unsigned reduc_chain_length = 0;
+  bool only_slp_reduc_chain = true;
   while (reduc_def != PHI_RESULT (reduc_def_phi))
 {
   stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
   def = vect_stmt_to_vectorize (def);
   gcc_assert (STMT_VINFO_REDUC_IDX (def) != -1);
+  if (!REDUC_GROUP_FIRST_ELEMENT (def))
+   only_slp_reduc_chain = false;
   reduc_def = gimple_op (def->stmt, 1 + STMT_VINFO_REDUC_IDX (def));
+  reduc_chain_length++;
 }
 
   reduc_def = PHI_RESULT (reduc_def_phi);
-  int reduc_index = -1;
   for (i = 0; i < op_type; i++)
 {
   tree op = gimple_op (stmt, i + 1);
@@ -5753,7 +5758,6 @@ vectorizable_reduction (stmt_vec_info st
   if ((dt == vect_reduction_def || dt == vect_nested_cycle)
  && op == reduc_def)
{
- reduc_index = i;
  continue;
}
 
@@ -5792,10 +5796,6 @@ vectorizable_reduction (stmt_vec_info st
   if (!vectype_in)
 vectype_in = vectype_out;
   STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
-  /* For the SSA cycle we store on each participating stmt the operand index
- where the cycle continues.  Store the one relevant for the actual
- operation in the reduction meta.  */
-  STMT_VINFO_REDUC_IDX (reduc_info) = reduc_index;
 
   enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
   STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
@@ -5805,28 +5805,8 @@ vectorizable_reduction (stmt_vec_info st
   if (slp_node)
return false;
 
-  /* TODO: We can't yet handle reduction chains, since we need to treat
-each COND_EXPR in the chain specially, not just the last one.
-E.g. for:
-
-   x_1 = PHI 
-   x_2 = a_2 ? ... : x_1;
-   x_3 = a_3 ? ... : x_2;
-
-we're interested in the last element in x_3 for which a_2 || a_3
-is true, whereas the current reduction chain handling would
-vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
-as a reduction operation.  */
-  if (reduc_index == -1)
-   {
- if (dump_enabled_p ())
-   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-"conditional reduction chains not supported\n");
- return false;
-   }
-
   /* When the condition uses the reduction value in the condition, fail.  
*/
-  if (reduc_index == 0)
+  if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -5995,17 +5975,18 @@ vectorizable_reduction (stmt_vec_info st
 outer-loop vectorization is safe.  */
   if (needs_fold_le