The following uses SLP_TREE_REDUC_IDX where it looks more appropriate.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Tamar, can you test 1+2 (separately)?  Possibly also the full stack
if the first part succeeds.

Thanks,
Richard.

        * tree-vect-loop.cc (vect_create_epilog_for_reduction):
        Use SLP_TREE_REDUC_IDX for following the SLP graph and
        for identifying whether we use the 'else' in a COND.
        (vectorizable_lane_reducing): Simplify check of whether
        we are in a reduction.
        (vectorizable_reduction): Add sanity checking around
        SLP_TREE_REDUC_IDX and use it where it looks appropriate.
        (vect_transform_reduction): Use SLP_TREE_REDUC_IDX.
        * tree-vect-stmts.cc (vectorizable_call): Likewise.
        (vectorizable_operation): Likewise.
        (vectorizable_condition): Likewise.
---
 gcc/tree-vect-loop.cc  | 31 +++++++++++++------------------
 gcc/tree-vect-stmts.cc |  8 ++++----
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 4af7283485e..b187d0d8533 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -5497,7 +5497,6 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
       while (cond_node != slp_node_instance->reduc_phis)
        {
          stmt_vec_info cond_info = SLP_TREE_REPRESENTATIVE (cond_node);
-         int slp_reduc_idx;
          if (gimple_assign_rhs_code (cond_info->stmt) == COND_EXPR)
            {
              gimple *vec_stmt
@@ -5505,16 +5504,9 @@ vect_create_epilog_for_reduction (loop_vec_info 
loop_vinfo,
              gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
              ccompares.safe_push
                (std::make_pair (gimple_assign_rhs1 (vec_stmt),
-                                STMT_VINFO_REDUC_IDX (cond_info) == 2));
-             /* ???  We probably want to have REDUC_IDX on the SLP node?
-                We have both three and four children COND_EXPR nodes
-                dependent on whether the comparison is still embedded
-                as GENERIC.  So work backwards.  */
-             slp_reduc_idx = (SLP_TREE_CHILDREN (cond_node).length () - 3
-                              + STMT_VINFO_REDUC_IDX (cond_info));
+                                SLP_TREE_REDUC_IDX (cond_node) == 2));
            }
-         else
-           slp_reduc_idx = STMT_VINFO_REDUC_IDX (cond_info);
+         int slp_reduc_idx = SLP_TREE_REDUC_IDX (cond_node);
          cond_node = SLP_TREE_CHILDREN (cond_node)[slp_reduc_idx];
        }
       gcc_assert (ccompares.length () != 0);
@@ -6882,14 +6874,13 @@ vectorizable_lane_reducing (loop_vec_info loop_vinfo, 
stmt_vec_info stmt_info,
   if (!type_has_mode_precision_p (type))
     return false;
 
+  vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
+
   /* TODO: Support lane-reducing operation that does not directly participate
      in loop reduction.  */
-  if (!STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
-      || STMT_VINFO_REDUC_IDX (stmt_info) < 0)
+  if (!reduc_info)
     return false;
 
-  vect_reduc_info reduc_info = info_for_reduction (loop_vinfo, slp_node);
-
   /* Lane-reducing pattern inside any inner loop of LOOP_VINFO is not
      recoginized.  */
   gcc_assert (!nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), 
stmt_info));
@@ -7135,7 +7126,8 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
       stmt_vec_info vdef = vect_stmt_to_vectorize (def);
       int reduc_idx = STMT_VINFO_REDUC_IDX (vdef);
-      if (reduc_idx == -1)
+      if (STMT_VINFO_REDUC_IDX (vdef) == -1
+         || SLP_TREE_REDUC_IDX (vdef_slp) == -1)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7204,7 +7196,10 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
          else if (!vectype_in)
            vectype_in = SLP_TREE_VECTYPE (slp_node);
          if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
-           vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
+           {
+             gcc_assert (reduc_idx == SLP_TREE_REDUC_IDX (vdef_slp));
+             vdef_slp = SLP_TREE_CHILDREN (vdef_slp)[reduc_idx];
+           }
        }
 
       reduc_def = op.ops[reduc_idx];
@@ -7361,7 +7356,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
        return false;
 
       /* When the condition uses the reduction value in the condition, fail.  
*/
-      if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
+      if (SLP_TREE_REDUC_IDX (slp_node) == 0)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -8001,7 +7996,7 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
      The last use is the reduction variable.  In case of nested cycle this
      assumption is not true: we use reduc_index to record the index of the
      reduction variable.  */
-  int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
+  int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
   tree vectype_in = SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (slp_node)[0]);
 
   vec_num = vect_get_num_copies (loop_vinfo, slp_node, vectype_in);
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 77a03ed4a7b..15e0d069dcc 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3432,7 +3432,7 @@ vectorizable_call (vec_info *vinfo,
        }
     }
 
-  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+  int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
   internal_fn cond_fn = get_conditional_internal_fn (ifn);
   internal_fn cond_len_fn = get_len_internal_fn (ifn);
   int len_opno = internal_fn_len_index (cond_len_fn);
@@ -6452,7 +6452,7 @@ vectorizable_operation (vec_info *vinfo,
       using_emulated_vectors_p = true;
     }
 
-  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
+  int reduc_idx = SLP_TREE_REDUC_IDX (slp_node);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
@@ -6570,7 +6570,7 @@ vectorizable_operation (vec_info *vinfo,
   else if (arith_code_with_undefined_signed_overflow (orig_code)
           && ANY_INTEGRAL_TYPE_P (vectype)
           && TYPE_OVERFLOW_UNDEFINED (vectype)
-          && STMT_VINFO_REDUC_IDX (stmt_info) != -1)
+          && SLP_TREE_REDUC_IDX (slp_node) != -1)
     {
       gcc_assert (orig_code == PLUS_EXPR || orig_code == MINUS_EXPR
                  || orig_code == MULT_EXPR || orig_code == POINTER_PLUS_EXPR);
@@ -11560,7 +11560,7 @@ vectorizable_condition (vec_info *vinfo,
   if (code != COND_EXPR)
     return false;
 
-  int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
+  int reduc_index = SLP_TREE_REDUC_IDX (slp_node);
   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
   bool nested_cycle_p = false;
   bool for_reduction = vect_is_reduction (stmt_info);
-- 
2.43.0

Reply via email to