https://gcc.gnu.org/g:991c35ee974548156c01cb69f093694973468f51
commit r17-836-g991c35ee974548156c01cb69f093694973468f51 Author: Tamar Christina <[email protected]> Date: Wed May 27 10:52:27 2026 +0100 vect: refactor loop peeling to support explicit flag to redirect early exits [PR120352] This patch series is the first in a few to optimize early break vectorization. The first one addresses that certain loops don't require an epilog at all. An example is int a[N] = {0,0,0,1}; int b[N] = {0,0,0,1}; __attribute__((noipa, noinline)) int foo () { for (int i = 0; i < N; i++) { if (a[i] > b[i]) return 1; } return 0; } where we have no value or side-effect to compute. Naturally there's no need to redo any work to just return 1 or 0. Teaching the vectorizer this however re-enabled epilogue nomask for early break and so we still need to be able to peel for the epilogues. This peeling however should not redirect all the alternative exits to the epilog. To understand when this has to happen peeling now gets an extra parameter to indicate how to handle the multiple exits. This had an unfortunate interaction with uncounted loops, because uncounted loops re-used the layout (with the intermediate merge block) but just being a fall through block. When it did this it didn't put all PHI nodes in the final merge block and as such relied on fixups later. This made the actual changed needed for not needing epilogs more fragile than needed so I first refactored peeling to be more consistent between early break and uncounted loops and insure that all BB now explicitly mention and use all PHI nodes from the exits. The code should hopefully be a bit more robust now wrt to needed optimizations. gcc/ChangeLog: PR tree-optimization/120352 * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): Add redirect_exits. (vect_do_peeling): Use it. * tree-vectorizer.h (slpeel_tree_duplicate_loop_to_edge_cfg): Update prototype. Diff: --- gcc/tree-vect-loop-manip.cc | 151 ++++++++++++++++++++++++++++++-------------- gcc/tree-vectorizer.h | 3 +- 2 files changed, 106 insertions(+), 48 deletions(-) diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index cd1ea746ae45..3aae0dea25b0 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1484,7 +1484,8 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, edge scalar_exit, edge e, edge *new_e, bool flow_loops, vec<basic_block> *updated_doms, - bool uncounted_p, bool create_main_e) + bool uncounted_p, bool create_main_e, + bool redirect_exits) { class loop *new_loop; basic_block *new_bbs, *bbs, *pbbs; @@ -1601,7 +1602,11 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, } auto loop_exits = get_loop_exit_edges (loop); - bool multiple_exits_p = loop_exits.length () > 1; + bool has_multiple_exits_p = loop_exits.length () > 1; + + /* If REDIRECT_EXITS is false we leave the alternative exits untouched and + treat the duplication as if the loop only had the main exit. */ + bool redirect_multiple_exits_p = redirect_exits && has_multiple_exits_p; auto_vec<basic_block> doms; if (at_exit) /* Add the loop copy at exit. */ @@ -1641,10 +1646,10 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, loop_exit, UNKNOWN_LOCATION); } - bool multiple_exits_p = loop_exits.length () > 1; basic_block main_loop_exit_block = new_preheader; - basic_block alt_loop_exit_block = NULL; - /* Create the CFG for multiple exits. + basic_block alt_loop_exit_block = new_preheader; + /* When we redirect the other exits create the CFG + below to funnel everything through the merge block: | loop_exit | alt1 | altN v v ... v main_loop_exit_block: alt_loop_exit_block: @@ -1655,39 +1660,46 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, the continuation values into the epilogue header. Do not bother with exit PHIs for the early exits but their live virtual operand. We'll fix up things below. */ - if (multiple_exits_p || uncounted_p) + if (redirect_multiple_exits_p || uncounted_p) { edge loop_e = single_succ_edge (new_preheader); new_preheader = split_edge (loop_e); - gphi *vphi = NULL; - alt_loop_exit_block = new_preheader; - for (auto exit : loop_exits) - if (exit != loop_exit) - { - tree vphi_def = NULL_TREE; - if (gphi *evphi = get_virtual_phi (exit->dest)) - vphi_def = gimple_phi_arg_def_from_edge (evphi, exit); - edge res = redirect_edge_and_branch (exit, alt_loop_exit_block); - gcc_assert (res == exit); - redirect_edge_var_map_clear (exit); - if (alt_loop_exit_block == new_preheader) - alt_loop_exit_block = split_edge (exit); - if (!need_virtual_phi) - continue; - /* When the edge has no virtual LC PHI get at the live - virtual operand by other means. */ - if (!vphi_def) - vphi_def = get_live_virtual_operand_on_edge (exit); - if (!vphi) - vphi = create_phi_node (copy_ssa_name (vphi_def), + if (redirect_exits) + { + gphi *vphi = NULL; + alt_loop_exit_block = new_preheader; + for (auto exit : loop_exits) + if (exit != loop_exit) + { + tree vphi_def = NULL_TREE; + if (gphi *evphi = get_virtual_phi (exit->dest)) + vphi_def = gimple_phi_arg_def_from_edge (evphi, exit); + edge res + = redirect_edge_and_branch (exit, alt_loop_exit_block); + gcc_assert (res == exit); + redirect_edge_var_map_clear (exit); + + if (alt_loop_exit_block == new_preheader) + alt_loop_exit_block = split_edge (exit); + if (!need_virtual_phi) + continue; + + /* When the edge has no virtual LC PHI get at the live + virtual operand by other means. */ + if (!vphi_def) + vphi_def = get_live_virtual_operand_on_edge (exit); + + if (!vphi) + vphi = create_phi_node (copy_ssa_name (vphi_def), alt_loop_exit_block); - else - /* Edge redirection might re-allocate the PHI node - so we have to rediscover it. */ - vphi = get_virtual_phi (alt_loop_exit_block); - add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION); - } + else + /* Edge redirection might re-allocate the PHI node + so we have to rediscover it. */ + vphi = get_virtual_phi (alt_loop_exit_block); + add_phi_arg (vphi, vphi_def, exit, UNKNOWN_LOCATION); + } + } set_immediate_dominator (CDI_DOMINATORS, new_preheader, loop->header); @@ -1741,7 +1753,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, /* Create the merge PHI nodes in new_preheader and populate the arguments for the exits. */ - if (multiple_exits_p || uncounted_p) + if (redirect_multiple_exits_p) { for (auto gsi_from = gsi_start_phis (loop->header), gsi_to = gsi_start_phis (new_loop->header); @@ -1795,7 +1807,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, } } - if (multiple_exits_p) + if (redirect_multiple_exits_p) { /* After creating the merge PHIs handle the early exits those should use the values at the start of the loop. */ @@ -1826,14 +1838,24 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, SET_PHI_ARG_DEF (lc_phi, i, alt_arg); alt_arg = alt_def; } + + /* The merge PHIs live in NEW_PREHEADER; their + alternative argument always comes from the + successor edge of ALT_LOOP_EXIT_BLOCK. */ edge alt_e = single_succ_edge (alt_loop_exit_block); SET_PHI_ARG_DEF_ON_EDGE (to_phi, alt_e, alt_arg); } } + /* For the single exit case only create the missing LC PHI nodes for the continuation of the loop IVs that are not also already - reductions and thus had LC PHI nodes on the exit already. */ - if (!multiple_exits_p && !uncounted_p) + reductions and thus had LC PHI nodes on the exit already. When + we are not redirecting the alternative exits the layout is: + + loop_exit ---> new_preheader ---> epilog + alt_exit ---------------> original dest + */ + if (!redirect_multiple_exits_p) { for (auto gsi_from = gsi_start_phis (loop->header), gsi_to = gsi_start_phis (new_loop->header); @@ -1842,21 +1864,54 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, { gimple *from_phi = gsi_stmt (gsi_from); gimple *to_phi = gsi_stmt (gsi_to); - tree new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, - loop_latch_edge (loop)); + tree new_arg; + + /* Use the value on the exiting path. When the exit is from + the latch edge we want the post-iteration value on that + edge; when the exit is from the loop header (before the + latch ever executes) we must use the current header value, + otherwise we pick up a name that was never defined. */ + if (!has_multiple_exits_p && !uncounted_p) + new_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, + loop_latch_edge (loop)); + else + new_arg = gimple_phi_result (from_phi); + + /* Re-use the virtual LC PHI we already built when we are not + redirecting the other exits to avoid creating duplicate + virtual SSA names. */ + if (virtual_operand_p (new_arg)) + { + if (gphi *vphi = get_virtual_phi (main_loop_exit_block)) + { + adjust_phi_and_debug_stmts (to_phi, loop_entry, + gimple_phi_result (vphi)); + continue; + } + } /* Check if we've already created a new phi node during edge redirection. If we have, only propagate the value downwards. */ if (tree *res = new_phi_args.get (new_arg)) { - adjust_phi_and_debug_stmts (to_phi, loop_entry, *res); - continue; + /* Check if the new dest block already contains a use. */ + gimple *stmt = SSA_NAME_DEF_STMT (*res); + + /* If the value already exist, just update the destination + and if it doesn't we want a new node. */ + if (gimple_bb (stmt) == main_loop_exit_block) + { + adjust_phi_and_debug_stmts (to_phi, loop_entry, *res); + continue; + } + else + new_arg = *res; } tree new_res = copy_ssa_name (gimple_phi_result (from_phi)); - gphi *lcssa_phi = create_phi_node (new_res, new_preheader); - SET_PHI_ARG_DEF_ON_EDGE (lcssa_phi, loop_exit, new_arg); + gphi *lcssa_phi = create_phi_node (new_res, main_loop_exit_block); + SET_PHI_ARG_DEF (lcssa_phi, loop_exit->dest_idx, new_arg); adjust_phi_and_debug_stmts (to_phi, loop_entry, new_res); } } @@ -1876,7 +1931,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, /* Finally after wiring the new epilogue we need to update its main exit to the original function exit we recorded. Other exits are already correct. */ - if (multiple_exits_p || uncounted_p) + if (has_multiple_exits_p || uncounted_p) { class loop *update_loop = new_loop; doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); @@ -2000,7 +2055,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, loop_preheader_edge (new_loop)->src); /* Update dominators for multiple exits. */ - if (multiple_exits_p || create_main_e) + if (has_multiple_exits_p || create_main_e) { for (edge alt_e : loop_exits) { @@ -3449,7 +3504,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, exit_e, scalar_loop, scalar_e, e, &prolog_e, true, NULL, - false, uncounted_p); + uncounted_p, uncounted_p, + true); gcc_assert (prolog); prolog->force_vectorize = false; @@ -3564,7 +3620,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e, &new_epilog_e, true, &doms, - uncounted_p); + uncounted_p, false, + true); LOOP_VINFO_EPILOGUE_MAIN_EXIT (loop_vinfo) = new_epilog_e; gcc_assert (epilog); diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 3a01e1be0f15..6d7393809013 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2486,7 +2486,8 @@ class loop *slpeel_tree_duplicate_loop_to_edge_cfg (class loop *, edge, class loop *, edge, edge, edge *, bool = true, vec<basic_block> * = NULL, - bool = false, bool = false); + bool = false, bool = false, + bool = true); class loop *vect_loop_versioning (loop_vec_info, gimple *); extern class loop *vect_do_peeling (loop_vec_info, tree, tree, tree *, tree *, tree *, int, bool, bool,
