https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111401
--- Comment #3 from Robin Dapp <rdapp at gcc dot gnu.org> --- Several other things came up, so I'm just going to post the latest status here without having revised or tested it. Going to try fixing it and testing tomorrow. --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -3672,7 +3672,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared) static bool fold_left_reduction_fn (code_helper code, internal_fn *reduc_fn) { - if (code == PLUS_EXPR) + if (code == PLUS_EXPR || code == IFN_COND_ADD) { *reduc_fn = IFN_FOLD_LEFT_PLUS; return true; @@ -4106,8 +4106,13 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, return NULL; } - nphi_def_loop_uses++; - phi_use_stmt = use_stmt; + /* We might have two uses in the same instruction, only count them as + one. */ + if (use_stmt != phi_use_stmt) + { + nphi_def_loop_uses++; + phi_use_stmt = use_stmt; + } } tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop)); @@ -6861,7 +6866,7 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, gimple **vec_stmt, slp_tree slp_node, gimple *reduc_def_stmt, tree_code code, internal_fn reduc_fn, - tree ops[3], tree vectype_in, + tree *ops, int num_ops, tree vectype_in, int reduc_index, vec_loop_masks *masks, vec_loop_lens *lens) { @@ -6883,11 +6888,24 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out), TYPE_VECTOR_SUBPARTS (vectype_in))); - tree op0 = ops[1 - reduc_index]; + /* The operands either come from a binary operation or a COND_ADD operation. + The former is a gimple assign and the latter is a gimple call with four + arguments. */ + gcc_assert (num_ops == 2 || num_ops == 4); + bool is_cond_add = num_ops == 4; + tree op0, opmask; + if (!is_cond_add) + op0 = ops[1 - reduc_index]; + else + { + op0 = ops[2]; + opmask = ops[0]; + gcc_assert (!slp_node); + } int group_size = 1; stmt_vec_info scalar_dest_def_info; - auto_vec<tree> vec_oprnds0; + auto_vec<tree> vec_oprnds0, vec_opmask; if (slp_node) { auto_vec<vec<tree> > vec_defs (2); @@ -6903,9 +6921,18 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, op0, &vec_oprnds0); scalar_dest_def_info = stmt_info; + if (is_cond_add) + { + vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, 1, + opmask, &vec_opmask); + gcc_assert (vec_opmask.length() == 1); + } } - tree scalar_dest = gimple_assign_lhs (scalar_dest_def_info->stmt); + gimple *sdef = scalar_dest_def_info->stmt; + tree scalar_dest = is_gimple_call (sdef) + ? gimple_call_lhs (sdef) + : gimple_assign_lhs (scalar_dest_def_info->stmt); tree scalar_type = TREE_TYPE (scalar_dest); tree reduc_var = gimple_phi_result (reduc_def_stmt); @@ -6945,7 +6972,11 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, i, 1); signed char biasval = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo); bias = build_int_cst (intQI_type_node, biasval); - mask = build_minus_one_cst (truth_type_for (vectype_in)); + /* If we have a COND_ADD take its mask. Otherwise use {-1, ...}. */ + if (is_cond_add) + mask = vec_opmask[0]; + else + mask = build_minus_one_cst (truth_type_for (vectype_in)); } /* Handle MINUS by adding the negative. */ @@ -7440,6 +7471,9 @@ vectorizable_reduction (loop_vec_info loop_vinfo, if (i == STMT_VINFO_REDUC_IDX (stmt_info)) continue; + if (op.ops[i] == op.ops[STMT_VINFO_REDUC_IDX (stmt_info)]) + continue; + /* There should be only one cycle def in the stmt, the one leading to reduc_def. */ if (VECTORIZABLE_CYCLE_DEF (dt)) @@ -8211,8 +8245,21 @@ vect_transform_reduction (loop_vec_info loop_vinfo, vec_num = 1; } - code_helper code = canonicalize_code (op.code, op.type); - internal_fn cond_fn = get_conditional_internal_fn (code, op.type); + code_helper code (op.code); + internal_fn cond_fn; + + if (code.is_internal_fn ()) + { + internal_fn ifn = internal_fn (op.code); + code = canonicalize_code (conditional_internal_fn_code (ifn), op.type); + cond_fn = ifn; + } + else + { + code = canonicalize_code (op.code, op.type); + cond_fn = get_conditional_internal_fn (code, op.type); + } + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo); bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in); @@ -8240,8 +8287,8 @@ vect_transform_reduction (loop_vec_info loop_vinfo, gcc_assert (code.is_tree_code ()); return vectorize_fold_left_reduction (loop_vinfo, stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, - tree_code (code), reduc_fn, op.ops, vectype_in, reduc_index, masks, - lens); + tree_code (code), reduc_fn, op.ops, op.num_ops, vectype_in, + reduc_index, masks, lens); } bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);