On Mon, 11 Dec 2023, Tamar Christina wrote: > > > > > > > > Hmm, but we're visiting them then? I wonder how you get along > > > > without doing adjustmens on the uses if you consider > > > > > > > > _1 = a < b; > > > > _2 = c != d; > > > > _3 = _1 | _2; > > > > if (_3 != 0) > > > > exit loop; > > > > > > > > thus a combined condition like > > > > > > > > if (a < b || c != d) > > > > > > > > that we if-converted. We need to recognize that _1, _2 and _3 have > > > > mask uses and thus possibly adjust them. > > > > > > > > What bad happens if you drop 'analyze_only'? We're not really > > > > rewriting anything there. > > > > > > You mean drop it only in the above? We then fail to update the type for > > > the gcond. So in certain circumstances like with > > > > > > int a, c, d; > > > short b; > > > > > > int > > > main () > > > { > > > int e[1]; > > > for (; b < 2; b++) > > > { > > > a = 0; > > > if (b == 28378) > > > a = e[b]; > > > if (!(d || b)) > > > for (; c;) > > > ; > > > } > > > return 0; > > > } > > > > > > Unless we walk the statements regardless of whether they come from inside > > > the > > loop or not. > > > > What do you mean by "fail to update the type for the gcond"? If > > I understood correctly the 'analyze_only' short-cuts some > > checks, it doens't add some? > > > > But it's hard to follow what's actually done for a gcond ... > > > > Yes so I had realized I had misunderstood what this pattern was doing and once > I had made the first wrong change it snowballed. > > This is an updates patch where the only modification made is to > check_bool_pattern > to also return the type of the overall expression even if we are going to > handle the > conditional through an optab expansion. I'm piggybacking on the fact that > this function > has seen enough of the operands to be able to tell the precision needed when > vectorizing. > > This is needed because in the cases where the condition to the gcond was > already a bool > The precision would be 1 bit, to find the actual mask since we have to dig > through the > operands which this function already does. > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu and no > issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > * tree-vect-patterns.cc (vect_init_pattern_stmt): Support gconds. > (check_bool_pattern, vect_recog_bool_pattern): Support gconds type > analysis. > * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts without > lhs. > (vectorizable_early_exit): New. > (vect_analyze_stmt, vect_transform_stmt): Use it. > (vect_is_simple_use, vect_get_vector_types_for_stmt): Support gcond. > > --- inline copy of patch --- > > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index > 7debe7f0731673cd1bf25cd39d55e23990a73d0e..6bf1c0aba8ce94f70ce4e952efd1c5695b189690 > 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -132,6 +132,7 @@ vect_init_pattern_stmt (vec_info *vinfo, gimple > *pattern_stmt, > if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) > { > gcc_assert (!vectype > + || is_a <gcond *> (pattern_stmt) > || (VECTOR_BOOLEAN_TYPE_P (vectype) > == vect_use_mask_type_p (orig_stmt_info))); > STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; > @@ -5210,10 +5211,12 @@ vect_recog_mixed_size_cond_pattern (vec_info *vinfo, > true if bool VAR can and should be optimized that way. Assume it > shouldn't > in case it's a result of a comparison which can be directly vectorized > into > a vector comparison. Fills in STMTS with all stmts visited during the > - walk. */ > + walk. if VECTYPE then this value will contain the common type of the > + operations making up the comparisons. */ > > static bool > -check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts) > +check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts, > + tree *vectype) > { > tree rhs1; > enum tree_code rhs_code; > @@ -5234,27 +5237,28 @@ check_bool_pattern (tree var, vec_info *vinfo, > hash_set<gimple *> &stmts) > switch (rhs_code) > { > case SSA_NAME: > - if (! check_bool_pattern (rhs1, vinfo, stmts)) > + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)) > return false; > break; > > CASE_CONVERT: > if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) > return false; > - if (! check_bool_pattern (rhs1, vinfo, stmts)) > + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)) > return false; > break; > > case BIT_NOT_EXPR: > - if (! check_bool_pattern (rhs1, vinfo, stmts)) > + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype)) > return false; > break; > > case BIT_AND_EXPR: > case BIT_IOR_EXPR: > case BIT_XOR_EXPR: > - if (! check_bool_pattern (rhs1, vinfo, stmts) > - || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts)) > + if (! check_bool_pattern (rhs1, vinfo, stmts, vectype) > + || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts, > + vectype)) > return false; > break; > > @@ -5272,6 +5276,8 @@ check_bool_pattern (tree var, vec_info *vinfo, > hash_set<gimple *> &stmts) > if (comp_vectype == NULL_TREE) > return false; > > + if (vectype) > + *vectype = comp_vectype; > tree mask_type = get_mask_type_for_scalar_type (vinfo, > TREE_TYPE (rhs1)); > if (mask_type > @@ -5608,13 +5614,28 @@ vect_recog_bool_pattern (vec_info *vinfo, > enum tree_code rhs_code; > tree var, lhs, rhs, vectype; > gimple *pattern_stmt; > - > - if (!is_gimple_assign (last_stmt)) > + gcond* cond = NULL; > + if (!is_gimple_assign (last_stmt) > + && !(cond = dyn_cast <gcond *> (last_stmt))) > return NULL;
I still think the code will be much easier to follow if you add if (gcond *cond = dyn_cast <gcond *> (last_stmt)) { thread to all branches return; } if (!is_gimple_assign (last_stmt)) return NULL; .. original code unchanged .. you can then also choose better names for the local variables. > - var = gimple_assign_rhs1 (last_stmt); > - lhs = gimple_assign_lhs (last_stmt); > - rhs_code = gimple_assign_rhs_code (last_stmt); > + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > + if (is_gimple_assign (last_stmt)) > + { > + var = gimple_assign_rhs1 (last_stmt); > + lhs = gimple_assign_lhs (last_stmt); > + rhs_code = gimple_assign_rhs_code (last_stmt); > + } > + else if (loop_vinfo && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) > + { > + /* If not multiple exits, and loop vectorization don't bother analyzing > + the gcond as we don't support SLP today. */ > + lhs = gimple_cond_lhs (last_stmt); > + var = gimple_cond_lhs (last_stmt); > + rhs_code = gimple_cond_code (last_stmt); > + } > + else > + return NULL; > > if (rhs_code == VIEW_CONVERT_EXPR) > var = TREE_OPERAND (var, 0); > @@ -5632,7 +5653,7 @@ vect_recog_bool_pattern (vec_info *vinfo, > return NULL; > vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); > > - if (check_bool_pattern (var, vinfo, bool_stmts)) > + if (check_bool_pattern (var, vinfo, bool_stmts, NULL)) > { > rhs = adjust_bool_stmts (vinfo, bool_stmts, > TREE_TYPE (lhs), stmt_vinfo); > @@ -5680,7 +5701,7 @@ vect_recog_bool_pattern (vec_info *vinfo, > > return pattern_stmt; > } > - else if (rhs_code == COND_EXPR > + else if ((rhs_code == COND_EXPR || cond) > && TREE_CODE (var) == SSA_NAME) > { > vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); > @@ -5700,18 +5721,33 @@ vect_recog_bool_pattern (vec_info *vinfo, > if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE) > return NULL; > > - if (check_bool_pattern (var, vinfo, bool_stmts)) > + tree comp_type = NULL_TREE; > + if (check_bool_pattern (var, vinfo, bool_stmts, &comp_type)) > var = adjust_bool_stmts (vinfo, bool_stmts, type, stmt_vinfo); > - else if (integer_type_for_mask (var, vinfo)) > + else if (!cond && integer_type_for_mask (var, vinfo)) > + return NULL; > + else if (cond && !comp_type) > return NULL; > > - lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > - pattern_stmt > - = gimple_build_assign (lhs, COND_EXPR, > - build2 (NE_EXPR, boolean_type_node, > - var, build_int_cst (TREE_TYPE (var), 0)), > - gimple_assign_rhs2 (last_stmt), > - gimple_assign_rhs3 (last_stmt)); > + if (!cond) > + { > + lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL); > + pattern_stmt > + = gimple_build_assign (lhs, COND_EXPR, > + build2 (NE_EXPR, boolean_type_node, var, > + build_int_cst (TREE_TYPE (var), 0)), > + gimple_assign_rhs2 (last_stmt), > + gimple_assign_rhs3 (last_stmt)); > + } > + else > + { > + pattern_stmt > + = gimple_build_cond (NE_EXPR, > + var, build_int_cst (TREE_TYPE (var), 0), > + gimple_cond_true_label (cond), > + gimple_cond_false_label (cond)); the labels are always NULL, so just use NULL_TREE for them. > + vectype = truth_type_for (comp_type); so this leaves the producer of the mask in the GIMPLE_COND and we vectorize the GIMPLE_COND as mask_1 = ...; if (mask_1 != {-1,-1...}) .. ? In principle only the mask producer needs a vector type and that adjusted by bool handling, the branch itself doesn't need any STMT_VINFO_VECTYPE. As said I believe if you recognize a GIMPLE_COND pattern for conds that aren't bool != 0 producing the mask stmt this should be picked up by bool handling correctly already. Also as said piggy-backing on the COND_EXPR handling in this function which has the condition split out into a separate stmt(!) might not completely handle things correctly and you are likely missing the tcc_comparison handling of the embedded compare. > + } > *type_out = vectype; > vect_pattern_detected ("vect_recog_bool_pattern", last_stmt); > > @@ -5725,7 +5761,7 @@ vect_recog_bool_pattern (vec_info *vinfo, > if (!vectype || !VECTOR_MODE_P (TYPE_MODE (vectype))) > return NULL; > > - if (check_bool_pattern (var, vinfo, bool_stmts)) > + if (check_bool_pattern (var, vinfo, bool_stmts, NULL)) > rhs = adjust_bool_stmts (vinfo, bool_stmts, > TREE_TYPE (vectype), stmt_vinfo); > else > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index > 582c5e678fad802d6e76300fe3c939b9f2978f17..d0878250f6fb9de4d6e6a39d16956ca147be4b80 > 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -12489,7 +12489,7 @@ vectorizable_comparison_1 (vec_info *vinfo, tree > vectype, > vec<tree> vec_oprnds0 = vNULL; > vec<tree> vec_oprnds1 = vNULL; > tree mask_type; > - tree mask; > + tree mask = NULL_TREE; > > if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo) > return false; > @@ -12629,8 +12629,9 @@ vectorizable_comparison_1 (vec_info *vinfo, tree > vectype, > /* Transform. */ > > /* Handle def. */ > - lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info)); > - mask = vect_create_destination_var (lhs, mask_type); > + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)); > + if (lhs) > + mask = vect_create_destination_var (lhs, mask_type); > > vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, > rhs1, &vec_oprnds0, vectype, > @@ -12644,7 +12645,10 @@ vectorizable_comparison_1 (vec_info *vinfo, tree > vectype, > gimple *new_stmt; > vec_rhs2 = vec_oprnds1[i]; > > - new_temp = make_ssa_name (mask); > + if (lhs) > + new_temp = make_ssa_name (mask); > + else > + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp"); > if (bitop1 == NOP_EXPR) > { > new_stmt = gimple_build_assign (new_temp, code, > @@ -12723,6 +12727,198 @@ vectorizable_comparison (vec_info *vinfo, > return true; > } > > +/* Check to see if the current early break given in STMT_INFO is valid for > + vectorization. */ > + > +static bool > +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, > + gimple_stmt_iterator *gsi, gimple **vec_stmt, > + slp_tree slp_node, stmt_vector_for_cost *cost_vec) > +{ > + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); > + if (!loop_vinfo > + || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info))) > + return false; > + > + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def) > + return false; > + > + if (!STMT_VINFO_RELEVANT_P (stmt_info)) > + return false; > + > + auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info)); > + tree vectype = STMT_VINFO_VECTYPE (stmt_info); > + gcc_assert (vectype); > + > + tree vectype_op0 = NULL_TREE; > + slp_tree slp_op0; > + tree op0; > + enum vect_def_type dt0; > + if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, > &dt0, > + &vectype_op0)) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "use not simple.\n"); > + return false; > + } > + > + machine_mode mode = TYPE_MODE (vectype); > + int ncopies; > + > + if (slp_node) > + ncopies = 1; > + else > + ncopies = vect_get_num_copies (loop_vinfo, vectype); > + > + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); > + bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); > + > + /* Analyze only. */ > + if (!vec_stmt) > + { > + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "can't vectorize early exit because the " > + "target doesn't support flag setting vector " > + "comparisons.\n"); > + return false; > + } > + > + if (ncopies > 1 > + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing) > + { > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > + "can't vectorize early exit because the " > + "target does not support boolean vector OR for " > + "type %T.\n", vectype); > + return false; > + } > + > + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, > + vec_stmt, slp_node, cost_vec)) > + return false; > + > + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) > + { > + if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype, > + OPTIMIZE_FOR_SPEED)) > + return false; > + else > + vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL); > + } > + > + > + return true; > + } > + > + /* Tranform. */ > + > + tree new_temp = NULL_TREE; > + gimple *new_stmt = NULL; > + > + if (dump_enabled_p ()) > + dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n"); > + > + if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi, > + vec_stmt, slp_node, cost_vec)) > + gcc_unreachable (); > + > + gimple *stmt = STMT_VINFO_STMT (stmt_info); > + basic_block cond_bb = gimple_bb (stmt); > + gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb); > + > + auto_vec<tree> stmts; > + > + tree mask = NULL_TREE; > + if (masked_loop_p) > + mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0); > + > + if (slp_node) > + stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node)); > + else > + { > + auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info); > + stmts.reserve_exact (vec_stmts.length ()); > + for (auto stmt : vec_stmts) > + stmts.quick_push (gimple_assign_lhs (stmt)); > + } > + > + /* Determine if we need to reduce the final value. */ > + if (stmts.length () > 1) > + { > + /* We build the reductions in a way to maintain as much parallelism as > + possible. */ > + auto_vec<tree> workset (stmts.length ()); > + > + /* Mask the statements as we queue them up. */ > + if (masked_loop_p) > + for (auto stmt : stmts) > + workset.quick_push (prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), > + mask, stmt, &cond_gsi)); > + else > + workset.splice (stmts); > + > + while (workset.length () > 1) > + { > + new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc"); > + tree arg0 = workset.pop (); > + tree arg1 = workset.pop (); > + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, arg0, arg1); > + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, > + &cond_gsi); > + workset.quick_insert (0, new_temp); > + } > + } > + else > + new_temp = stmts[0]; > + > + gcc_assert (new_temp); > + > + tree cond = new_temp; > + /* If we have multiple statements after reduction we should check all the > + lanes and treat it as a full vector. */ > + if (masked_loop_p) > + cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond, > + &cond_gsi); You didn't fix any of the code above it seems, it's still wrong. Richard. > + /* Now build the new conditional. Pattern gimple_conds get dropped during > + codegen so we must replace the original insn. */ > + stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info)); > + gcond *cond_stmt = as_a <gcond *>(stmt); > + /* When vectorizing we assume that if the branch edge is taken that we're > + exiting the loop. This is not however always the case as the compiler > will > + rewrite conditions to always be a comparison against 0. To do this it > + sometimes flips the edges. This is fine for scalar, but for vector we > + then have to flip the test, as we're still assuming that if you take the > + branch edge that we found the exit condition. */ > + auto new_code = NE_EXPR; > + tree cst = build_zero_cst (vectype); > + if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), > + BRANCH_EDGE (gimple_bb (cond_stmt))->dest)) > + { > + new_code = EQ_EXPR; > + cst = build_minus_one_cst (vectype); > + } > + > + gimple_cond_set_condition (cond_stmt, new_code, cond, cst); > + update_stmt (stmt); > + > + if (slp_node) > + SLP_TREE_VEC_DEFS (slp_node).truncate (0); > + else > + STMT_VINFO_VEC_STMTS (stmt_info).truncate (0); > + > + > + if (!slp_node) > + *vec_stmt = stmt; > + > + return true; > +} > + > /* If SLP_NODE is nonnull, return true if vectorizable_live_operation > can handle all live statements in the node. Otherwise return true > if STMT_INFO is not live or if vectorizable_live_operation can handle it. > @@ -12949,7 +13145,9 @@ vect_analyze_stmt (vec_info *vinfo, > || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), > stmt_info, NULL, node) > || vectorizable_recurr (as_a <loop_vec_info> (vinfo), > - stmt_info, NULL, node, cost_vec)); > + stmt_info, NULL, node, cost_vec) > + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, > + cost_vec)); > else > { > if (bb_vinfo) > @@ -12972,7 +13170,10 @@ vect_analyze_stmt (vec_info *vinfo, > NULL, NULL, node, cost_vec) > || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, > cost_vec) > - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)); > + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec) > + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, > + cost_vec)); > + > } > > if (node) > @@ -13131,6 +13332,12 @@ vect_transform_stmt (vec_info *vinfo, > gcc_assert (done); > break; > > + case loop_exit_ctrl_vec_info_type: > + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt, > + slp_node, NULL); > + gcc_assert (done); > + break; > + > default: > if (!STMT_VINFO_LIVE_P (stmt_info)) > { > @@ -14321,10 +14528,19 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, > stmt_vec_info stmt_info, > } > else > { > + gcond *cond = NULL; > if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) > scalar_type = TREE_TYPE (DR_REF (dr)); > else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) > scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); > + else if ((cond = dyn_cast <gcond *> (stmt))) > + { > + /* We can't convert the scalar type to boolean yet, since booleans > have a > + single bit precision and we need the vector boolean to be a > + representation of the integer mask. So set the correct integer > type and > + convert to boolean vector once we have a vectype. */ > + scalar_type = TREE_TYPE (gimple_cond_lhs (cond)); > + } > else > scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); > > @@ -14339,12 +14555,18 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, > stmt_vec_info stmt_info, > "get vectype for scalar type: %T\n", scalar_type); > } > vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); > + > if (!vectype) > return opt_result::failure_at (stmt, > "not vectorized:" > " unsupported data-type %T\n", > scalar_type); > > + /* If we were a gcond, convert the resulting type to a vector boolean > type now > + that we have the correct integer mask type. */ > + if (cond) > + vectype = truth_type_for (vectype); > + > if (dump_enabled_p ()) > dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); > } > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)