https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111401

--- Comment #3 from Robin Dapp <rdapp at gcc dot gnu.org> ---
Several other things came up, so I'm just going to post the latest status here
without having revised or tested it.  Going to try fixing it and testing
tomorrow.

--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -3672,7 +3672,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared
*shared)
 static bool
 fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn)
 {
-  if (code == PLUS_EXPR)
+  if (code == PLUS_EXPR || code == IFN_COND_ADD)
     {
       *reduc_fn = IFN_FOLD_LEFT_PLUS;
       return true;
@@ -4106,8 +4106,13 @@ vect_is_simple_reduction (loop_vec_info loop_info,
stmt_vec_info phi_info,
           return NULL;
         }

-      nphi_def_loop_uses++;
-      phi_use_stmt = use_stmt;
+      /* We might have two uses in the same instruction, only count them as
+        one. */
+      if (use_stmt != phi_use_stmt)
+       {
+         nphi_def_loop_uses++;
+         phi_use_stmt = use_stmt;
+       }
     }

   tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
@@ -6861,7 +6866,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
                               gimple **vec_stmt, slp_tree slp_node,
                               gimple *reduc_def_stmt,
                               tree_code code, internal_fn reduc_fn,
-                              tree ops[3], tree vectype_in,
+                              tree *ops, int num_ops, tree vectype_in,
                               int reduc_index, vec_loop_masks *masks,
                               vec_loop_lens *lens)
 {
@@ -6883,11 +6888,24 @@ vectorize_fold_left_reduction (loop_vec_info
loop_vinfo,
     gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
                          TYPE_VECTOR_SUBPARTS (vectype_in)));

-  tree op0 = ops[1 - reduc_index];
+  /* The operands either come from a binary operation or a COND_ADD operation.
+     The former is a gimple assign and the latter is a gimple call with four
+     arguments.  */
+  gcc_assert (num_ops == 2 || num_ops == 4);
+  bool is_cond_add = num_ops == 4;
+  tree op0, opmask;
+  if (!is_cond_add)
+    op0 = ops[1 - reduc_index];
+  else
+    {
+      op0 = ops[2];
+      opmask = ops[0];
+      gcc_assert (!slp_node);
+    }
   int group_size = 1;
   stmt_vec_info scalar_dest_def_info;
-  auto_vec<tree> vec_oprnds0;
+  auto_vec<tree> vec_oprnds0, vec_opmask;
   if (slp_node)
     {
       auto_vec<vec<tree> > vec_defs (2);
@@ -6903,9 +6921,18 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
                                     op0, &vec_oprnds0);
       scalar_dest_def_info = stmt_info;
+      if (is_cond_add)
+       {
+         vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1,
+                                        opmask, &vec_opmask);
+         gcc_assert (vec_opmask.length() == 1);
+       }
     }

-  tree scalar_dest = gimple_assign_lhs (scalar_dest_def_info->stmt);
+  gimple *sdef = scalar_dest_def_info->stmt;
+  tree scalar_dest = is_gimple_call (sdef)
+                      ? gimple_call_lhs (sdef)
+                      : gimple_assign_lhs (scalar_dest_def_info->stmt);
   tree scalar_type = TREE_TYPE (scalar_dest);
   tree reduc_var = gimple_phi_result (reduc_def_stmt);

@@ -6945,7 +6972,11 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
                                   i, 1);
          signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS
(loop_vinfo);
          bias = build_int_cst (intQI_type_node, biasval);
-         mask = build_minus_one_cst (truth_type_for (vectype_in));
+         /* If we have a COND_ADD take its mask.  Otherwise use {-1, ...}.  */
+         if (is_cond_add)
+           mask = vec_opmask[0];
+         else
+           mask = build_minus_one_cst (truth_type_for (vectype_in));
        }

       /* Handle MINUS by adding the negative.  */
@@ -7440,6 +7471,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       if (i == STMT_VINFO_REDUC_IDX (stmt_info))
        continue;

+      if (op.ops[i] == op.ops[STMT_VINFO_REDUC_IDX (stmt_info)])
+       continue;
+
       /* There should be only one cycle def in the stmt, the one
          leading to reduc_def.  */
       if (VECTORIZABLE_CYCLE_DEF (dt))
@@ -8211,8 +8245,21 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
       vec_num = 1;
     }

-  code_helper code = canonicalize_code (op.code, op.type);
-  internal_fn cond_fn = get_conditional_internal_fn (code, op.type);
+  code_helper code (op.code);
+  internal_fn cond_fn;
+
+  if (code.is_internal_fn ())
+    {
+      internal_fn ifn = internal_fn (op.code);
+      code = canonicalize_code (conditional_internal_fn_code (ifn), op.type);
+      cond_fn = ifn;
+    }
+  else
+    {
+      code = canonicalize_code (op.code, op.type);
+      cond_fn = get_conditional_internal_fn (code, op.type);
+    }
+
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
   vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
   bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn,
vectype_in);
@@ -8240,8 +8287,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo,
       gcc_assert (code.is_tree_code ());
       return vectorize_fold_left_reduction
          (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi,
-          tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, masks,
-          lens);
+          tree_code (code), reduc_fn, op.ops, op.num_ops, vectype_in,
+          reduc_index, masks, lens);
     }

   bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);

Reply via email to