Hi All, This implements vectorable_early_exit which is used as the codegen part of vectorizing a gcond.
For the most part it shares the majority of the code with vectorizable_comparison with addition that it needs to be able to reduce multiple resulting statements into a single one for use in the gcond, and also needs to be able to perform masking on the comparisons. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * tree-vect-stmts.cc (vectorizable_comparison_1): Support stmts without lhs. (vectorizable_early_exit): New. (vect_analyze_stmt, vect_transform_stmt): Use it. (vect_is_simple_use, vect_get_vector_types_for_stmt): Support gcond. --- inline copy of patch -- diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f3e33cd4ed125b9564ca81acd197693fc3457c31..87c4353fa5180fcb7f60b192897456cf24f3fdbe 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11330,8 +11330,186 @@ vectorizable_condition (vec_info *vinfo, return true; } -/* vectorizable_comparison. +static bool +vectorizable_comparison_1 (vec_info *, tree, stmt_vec_info, tree_code, + gimple_stmt_iterator *, gimple **, slp_tree, + stmt_vector_for_cost *); + +/* Check to see if the current early break given in STMT_INFO is valid for + vectorization. */ + +static bool +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, + gimple_stmt_iterator *gsi, gimple **vec_stmt, + slp_tree slp_node, stmt_vector_for_cost *cost_vec) +{ + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); + if (!loop_vinfo + || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info))) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_early_exit_def) + return false; + + if (!STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + gimple_match_op op; + if (!gimple_extract_op (stmt_info->stmt, &op)) + gcc_unreachable (); + gcc_assert (op.code.is_tree_code ()); + auto code = tree_code (op.code); + + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + gcc_assert (vectype_out); + + stmt_vec_info operand0_info + = loop_vinfo->lookup_stmt (SSA_NAME_DEF_STMT (op.ops[0])); + if (!operand0_info) + return false; + /* If we're in a pattern get the type of the original statement. */ + if (STMT_VINFO_IN_PATTERN_P (operand0_info)) + operand0_info = STMT_VINFO_RELATED_STMT (operand0_info); + tree vectype_op = STMT_VINFO_VECTYPE (operand0_info); + + tree truth_type = truth_type_for (vectype_op); + machine_mode mode = TYPE_MODE (truth_type); + int ncopies; + + if (slp_node) + ncopies = 1; + else + ncopies = vect_get_num_copies (loop_vinfo, truth_type); + + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + + /* Analyze only. */ + if (!vec_stmt) + { + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target doesn't support flag setting vector " + "comparisons.\n"); + return false; + } + + if (!expand_vec_cmp_expr_p (vectype_op, truth_type, NE_EXPR)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target does not support boolean vector " + "comparisons for type %T.\n", truth_type); + return false; + } + + if (ncopies > 1 + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target does not support boolean vector OR for " + "type %T.\n", truth_type); + return false; + } + + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + return false; + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) + vect_record_loop_mask (loop_vinfo, masks, ncopies, truth_type, NULL); + + return true; + } + + /* Tranform. */ + + tree new_temp = NULL_TREE; + gimple *new_stmt = NULL; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n"); + + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + gcc_unreachable (); + + gimple *stmt = STMT_VINFO_STMT (stmt_info); + basic_block cond_bb = gimple_bb (stmt); + gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb); + + vec<gimple *> stmts; + + if (slp_node) + stmts = SLP_TREE_VEC_STMTS (slp_node); + else + stmts = STMT_VINFO_VEC_STMTS (stmt_info); + + /* Determine if we need to reduce the final value. */ + if (stmts.length () > 1) + { + /* We build the reductions in a way to maintain as much parallelism as + possible. */ + auto_vec<gimple *> workset (stmts.length ()); + workset.splice (stmts); + while (workset.length () > 1) + { + new_temp = make_temp_ssa_name (truth_type, NULL, "vexit_reduc"); + gimple *arg0 = workset.pop (); + gimple *arg1 = workset.pop (); + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, + gimple_assign_lhs (arg0), + gimple_assign_lhs (arg1)); + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, + &cond_gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + workset.quick_insert (0, new_stmt); + } + } + else + new_stmt = stmts[0]; + + gcc_assert (new_stmt); + + tree cond = gimple_assign_lhs (new_stmt); + if (masked_loop_p) + { + tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, truth_type, 0); + cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond, + &cond_gsi); + } + + /* Now build the new conditional. Pattern gimple_conds get dropped during + codegen so we must replace the original insn. */ + if (is_pattern_stmt_p (stmt_info)) + stmt = STMT_VINFO_STMT (STMT_VINFO_RELATED_STMT (stmt_info)); + + tree t = fold_build2 (NE_EXPR, boolean_type_node, cond, + build_zero_cst (truth_type)); + t = canonicalize_cond_expr_cond (t); + gimple_cond_set_condition_from_tree ((gcond*)stmt, t); + update_stmt (stmt); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (stmt); + + + if (!slp_node) + *vec_stmt = stmt; + + return true; +} /* Helper of vectorizable_comparison. Check if STMT_INFO is comparison expression CODE that can be vectorized. @@ -11501,8 +11679,9 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, /* Transform. */ /* Handle def. */ - lhs = gimple_assign_lhs (stmt); - mask = vect_create_destination_var (lhs, mask_type); + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)); + if (lhs) + mask = vect_create_destination_var (lhs, mask_type); vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, rhs1, &vec_oprnds0, vectype, @@ -11516,7 +11695,10 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, gimple *new_stmt; vec_rhs2 = vec_oprnds1[i]; - new_temp = make_ssa_name (mask); + if (lhs) + new_temp = make_ssa_name (mask); + else + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp"); if (bitop1 == NOP_EXPR) { new_stmt = gimple_build_assign (new_temp, code, @@ -11816,7 +11998,9 @@ vect_analyze_stmt (vec_info *vinfo, || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), stmt_info, NULL, node) || vectorizable_recurr (as_a <loop_vec_info> (vinfo), - stmt_info, NULL, node, cost_vec)); + stmt_info, NULL, node, cost_vec) + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, + cost_vec)); else { if (bb_vinfo) @@ -11839,7 +12023,10 @@ vect_analyze_stmt (vec_info *vinfo, NULL, NULL, node, cost_vec) || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, cost_vec) - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)); + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec) + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, + cost_vec)); + } if (node) @@ -11997,6 +12184,12 @@ vect_transform_stmt (vec_info *vinfo, gcc_assert (done); break; + case loop_exit_ctrl_vec_info_type: + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt, + slp_node, NULL); + gcc_assert (done); + break; + default: if (!STMT_VINFO_LIVE_P (stmt_info)) { @@ -12395,6 +12588,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, case vect_first_order_recurrence: dump_printf (MSG_NOTE, "first order recurrence\n"); break; + case vect_early_exit_def: + dump_printf (MSG_NOTE, "early exit\n"); + break; case vect_unknown_def_type: dump_printf (MSG_NOTE, "unknown\n"); break; @@ -12511,6 +12707,14 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node, else *op = gimple_op (ass, operand + 1); } + else if (gcond *cond = dyn_cast <gcond *> (stmt->stmt)) + { + gimple_match_op m_op; + if (!gimple_extract_op (cond, &m_op)) + return false; + gcc_assert (m_op.code.is_tree_code ()); + *op = m_op.ops[operand]; + } else if (gcall *call = dyn_cast <gcall *> (stmt->stmt)) *op = gimple_call_arg (call, operand); else @@ -13121,6 +13325,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, *nunits_vectype_out = NULL_TREE; if (gimple_get_lhs (stmt) == NULL_TREE + /* Allow vector conditionals through here. */ + && !is_ctrl_stmt (stmt) /* MASK_STORE has no lhs, but is ok. */ && !gimple_call_internal_p (stmt, IFN_MASK_STORE)) { @@ -13137,7 +13343,7 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, } return opt_result::failure_at (stmt, - "not vectorized: irregular stmt.%G", stmt); + "not vectorized: irregular stmt: %G", stmt); } tree vectype; @@ -13166,6 +13372,14 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, scalar_type = TREE_TYPE (DR_REF (dr)); else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); + else if (is_ctrl_stmt (stmt)) + { + gcond *cond = dyn_cast <gcond *> (stmt); + if (!cond) + return opt_result::failure_at (stmt, "not vectorized: unsupported" + " control flow statement.\n"); + scalar_type = TREE_TYPE (gimple_cond_rhs (stmt)); + } else scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); --
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f3e33cd4ed125b9564ca81acd197693fc3457c31..87c4353fa5180fcb7f60b192897456cf24f3fdbe 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -11330,8 +11330,186 @@ vectorizable_condition (vec_info *vinfo, return true; } -/* vectorizable_comparison. +static bool +vectorizable_comparison_1 (vec_info *, tree, stmt_vec_info, tree_code, + gimple_stmt_iterator *, gimple **, slp_tree, + stmt_vector_for_cost *); + +/* Check to see if the current early break given in STMT_INFO is valid for + vectorization. */ + +static bool +vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info, + gimple_stmt_iterator *gsi, gimple **vec_stmt, + slp_tree slp_node, stmt_vector_for_cost *cost_vec) +{ + loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); + if (!loop_vinfo + || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info))) + return false; + + if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_early_exit_def) + return false; + + if (!STMT_VINFO_RELEVANT_P (stmt_info)) + return false; + + gimple_match_op op; + if (!gimple_extract_op (stmt_info->stmt, &op)) + gcc_unreachable (); + gcc_assert (op.code.is_tree_code ()); + auto code = tree_code (op.code); + + tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); + gcc_assert (vectype_out); + + stmt_vec_info operand0_info + = loop_vinfo->lookup_stmt (SSA_NAME_DEF_STMT (op.ops[0])); + if (!operand0_info) + return false; + /* If we're in a pattern get the type of the original statement. */ + if (STMT_VINFO_IN_PATTERN_P (operand0_info)) + operand0_info = STMT_VINFO_RELATED_STMT (operand0_info); + tree vectype_op = STMT_VINFO_VECTYPE (operand0_info); + + tree truth_type = truth_type_for (vectype_op); + machine_mode mode = TYPE_MODE (truth_type); + int ncopies; + + if (slp_node) + ncopies = 1; + else + ncopies = vect_get_num_copies (loop_vinfo, truth_type); + + vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); + bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); + + /* Analyze only. */ + if (!vec_stmt) + { + if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target doesn't support flag setting vector " + "comparisons.\n"); + return false; + } + + if (!expand_vec_cmp_expr_p (vectype_op, truth_type, NE_EXPR)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target does not support boolean vector " + "comparisons for type %T.\n", truth_type); + return false; + } + + if (ncopies > 1 + && direct_optab_handler (ior_optab, mode) == CODE_FOR_nothing) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "can't vectorize early exit because the " + "target does not support boolean vector OR for " + "type %T.\n", truth_type); + return false; + } + + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + return false; + if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) + vect_record_loop_mask (loop_vinfo, masks, ncopies, truth_type, NULL); + + return true; + } + + /* Tranform. */ + + tree new_temp = NULL_TREE; + gimple *new_stmt = NULL; + + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n"); + + if (!vectorizable_comparison_1 (vinfo, truth_type, stmt_info, code, gsi, + vec_stmt, slp_node, cost_vec)) + gcc_unreachable (); + + gimple *stmt = STMT_VINFO_STMT (stmt_info); + basic_block cond_bb = gimple_bb (stmt); + gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb); + + vec<gimple *> stmts; + + if (slp_node) + stmts = SLP_TREE_VEC_STMTS (slp_node); + else + stmts = STMT_VINFO_VEC_STMTS (stmt_info); + + /* Determine if we need to reduce the final value. */ + if (stmts.length () > 1) + { + /* We build the reductions in a way to maintain as much parallelism as + possible. */ + auto_vec<gimple *> workset (stmts.length ()); + workset.splice (stmts); + while (workset.length () > 1) + { + new_temp = make_temp_ssa_name (truth_type, NULL, "vexit_reduc"); + gimple *arg0 = workset.pop (); + gimple *arg1 = workset.pop (); + new_stmt = gimple_build_assign (new_temp, BIT_IOR_EXPR, + gimple_assign_lhs (arg0), + gimple_assign_lhs (arg1)); + vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt, + &cond_gsi); + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + workset.quick_insert (0, new_stmt); + } + } + else + new_stmt = stmts[0]; + + gcc_assert (new_stmt); + + tree cond = gimple_assign_lhs (new_stmt); + if (masked_loop_p) + { + tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, truth_type, 0); + cond = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask, cond, + &cond_gsi); + } + + /* Now build the new conditional. Pattern gimple_conds get dropped during + codegen so we must replace the original insn. */ + if (is_pattern_stmt_p (stmt_info)) + stmt = STMT_VINFO_STMT (STMT_VINFO_RELATED_STMT (stmt_info)); + + tree t = fold_build2 (NE_EXPR, boolean_type_node, cond, + build_zero_cst (truth_type)); + t = canonicalize_cond_expr_cond (t); + gimple_cond_set_condition_from_tree ((gcond*)stmt, t); + update_stmt (stmt); + + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (stmt); + + + if (!slp_node) + *vec_stmt = stmt; + + return true; +} /* Helper of vectorizable_comparison. Check if STMT_INFO is comparison expression CODE that can be vectorized. @@ -11501,8 +11679,9 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, /* Transform. */ /* Handle def. */ - lhs = gimple_assign_lhs (stmt); - mask = vect_create_destination_var (lhs, mask_type); + lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info)); + if (lhs) + mask = vect_create_destination_var (lhs, mask_type); vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, rhs1, &vec_oprnds0, vectype, @@ -11516,7 +11695,10 @@ vectorizable_comparison_1 (vec_info *vinfo, tree vectype, gimple *new_stmt; vec_rhs2 = vec_oprnds1[i]; - new_temp = make_ssa_name (mask); + if (lhs) + new_temp = make_ssa_name (mask); + else + new_temp = make_temp_ssa_name (mask_type, NULL, "cmp"); if (bitop1 == NOP_EXPR) { new_stmt = gimple_build_assign (new_temp, code, @@ -11816,7 +11998,9 @@ vect_analyze_stmt (vec_info *vinfo, || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo), stmt_info, NULL, node) || vectorizable_recurr (as_a <loop_vec_info> (vinfo), - stmt_info, NULL, node, cost_vec)); + stmt_info, NULL, node, cost_vec) + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, + cost_vec)); else { if (bb_vinfo) @@ -11839,7 +12023,10 @@ vect_analyze_stmt (vec_info *vinfo, NULL, NULL, node, cost_vec) || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node, cost_vec) - || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)); + || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec) + || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node, + cost_vec)); + } if (node) @@ -11997,6 +12184,12 @@ vect_transform_stmt (vec_info *vinfo, gcc_assert (done); break; + case loop_exit_ctrl_vec_info_type: + done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt, + slp_node, NULL); + gcc_assert (done); + break; + default: if (!STMT_VINFO_LIVE_P (stmt_info)) { @@ -12395,6 +12588,9 @@ vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt, case vect_first_order_recurrence: dump_printf (MSG_NOTE, "first order recurrence\n"); break; + case vect_early_exit_def: + dump_printf (MSG_NOTE, "early exit\n"); + break; case vect_unknown_def_type: dump_printf (MSG_NOTE, "unknown\n"); break; @@ -12511,6 +12707,14 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node, else *op = gimple_op (ass, operand + 1); } + else if (gcond *cond = dyn_cast <gcond *> (stmt->stmt)) + { + gimple_match_op m_op; + if (!gimple_extract_op (cond, &m_op)) + return false; + gcc_assert (m_op.code.is_tree_code ()); + *op = m_op.ops[operand]; + } else if (gcall *call = dyn_cast <gcall *> (stmt->stmt)) *op = gimple_call_arg (call, operand); else @@ -13121,6 +13325,8 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, *nunits_vectype_out = NULL_TREE; if (gimple_get_lhs (stmt) == NULL_TREE + /* Allow vector conditionals through here. */ + && !is_ctrl_stmt (stmt) /* MASK_STORE has no lhs, but is ok. */ && !gimple_call_internal_p (stmt, IFN_MASK_STORE)) { @@ -13137,7 +13343,7 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, } return opt_result::failure_at (stmt, - "not vectorized: irregular stmt.%G", stmt); + "not vectorized: irregular stmt: %G", stmt); } tree vectype; @@ -13166,6 +13372,14 @@ vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info, scalar_type = TREE_TYPE (DR_REF (dr)); else if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); + else if (is_ctrl_stmt (stmt)) + { + gcond *cond = dyn_cast <gcond *> (stmt); + if (!cond) + return opt_result::failure_at (stmt, "not vectorized: unsupported" + " control flow statement.\n"); + scalar_type = TREE_TYPE (gimple_cond_rhs (stmt)); + } else scalar_type = TREE_TYPE (gimple_get_lhs (stmt));