The following implements manual update for multi-exit loop prologue peeling during vectorization.
Boostrap / regtest running on x86_64-unknown-linux-gnu. I think the amount of coverage for prologue peeling with early exits is very low, so my testing success might not mean much. Richard. PR tree-optimization/114081 * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg): Perform manual dominator update for prologue peeling. (vect_do_peeling): Properly update dominators after adding the prologue-around guard. * gcc.dg/vect/vect-early-break_121-pr114081.c: New testcase. --- .../vect/vect-early-break_121-pr114081.c | 39 ++++++++++ gcc/tree-vect-loop-manip.cc | 78 +++++++++++++------ 2 files changed, 95 insertions(+), 22 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c new file mode 100644 index 00000000000..423ff0b566b --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-O3" } */ +/* { dg-additional-options "-mavx2" { target { x86_64-*-* i?86-*-* } } } */ + +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */ + +typedef struct filter_list_entry { + const char *name; + int id; + void (*function)(); +} filter_list_entry; + +static const filter_list_entry filter_list[9] = {0}; + +void php_zval_filter(int filter, int id1) { + filter_list_entry filter_func; + + int size = 9; + for (int i = 0; i < size; ++i) { + if (filter_list[i].id == filter) { + filter_func = filter_list[i]; + goto done; + } + } + +#pragma GCC novector + for (int i = 0; i < size; ++i) { + if (filter_list[i].id == 0x0204) { + filter_func = filter_list[i]; + goto done; + } + } +done: + if (!filter_func.id) + filter_func.function(); +} diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 137b053ac35..f72da915103 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -1594,7 +1594,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, auto loop_exits = get_loop_exit_edges (loop); bool multiple_exits_p = loop_exits.length () > 1; auto_vec<basic_block> doms; - class loop *update_loop = NULL; if (at_exit) /* Add the loop copy at exit. */ { @@ -1856,11 +1855,33 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, correct. */ if (multiple_exits_p) { - update_loop = new_loop; + class loop *update_loop = new_loop; doms = get_all_dominated_blocks (CDI_DOMINATORS, loop->header); for (unsigned i = 0; i < doms.length (); ++i) if (flow_bb_inside_loop_p (loop, doms[i])) doms.unordered_remove (i); + + for (edge e : get_loop_exit_edges (update_loop)) + { + edge ex; + edge_iterator ei; + FOR_EACH_EDGE (ex, ei, e->dest->succs) + { + /* Find the first non-fallthrough block as fall-throughs can't + dominate other blocks. */ + if (single_succ_p (ex->dest)) + { + doms.safe_push (ex->dest); + ex = single_succ_edge (ex->dest); + } + doms.safe_push (ex->dest); + } + doms.safe_push (e->dest); + } + + iterate_fix_dominators (CDI_DOMINATORS, doms, false); + if (updated_doms) + updated_doms->safe_splice (doms); } } else /* Add the copy at entry. */ @@ -1910,33 +1931,28 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop *loop, edge loop_exit, set_immediate_dominator (CDI_DOMINATORS, new_loop->header, loop_preheader_edge (new_loop)->src); + /* Update dominators for multiple exits. */ if (multiple_exits_p) - update_loop = loop; - } - - if (multiple_exits_p) - { - for (edge e : get_loop_exit_edges (update_loop)) { - edge ex; - edge_iterator ei; - FOR_EACH_EDGE (ex, ei, e->dest->succs) + for (edge alt_e : loop_exits) { - /* Find the first non-fallthrough block as fall-throughs can't - dominate other blocks. */ - if (single_succ_p (ex->dest)) + if (alt_e == loop_exit) + continue; + basic_block old_dom + = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest); + if (flow_bb_inside_loop_p (loop, old_dom)) { - doms.safe_push (ex->dest); - ex = single_succ_edge (ex->dest); + auto_vec<basic_block, 8> queue; + for (auto son = first_dom_son (CDI_DOMINATORS, old_dom); + son; son = next_dom_son (CDI_DOMINATORS, son)) + if (!flow_bb_inside_loop_p (loop, son)) + queue.safe_push (son); + for (auto son : queue) + set_immediate_dominator (CDI_DOMINATORS, + son, get_bb_copy (old_dom)); } - doms.safe_push (ex->dest); } - doms.safe_push (e->dest); } - - iterate_fix_dominators (CDI_DOMINATORS, doms, false); - if (updated_doms) - updated_doms->safe_splice (doms); } free (new_bbs); @@ -3368,6 +3384,24 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, guard_to, guard_bb, prob_prolog.invert (), irred_flag); + for (edge alt_e : get_loop_exit_edges (prolog)) + { + if (alt_e == prolog_e) + continue; + basic_block old_dom + = get_immediate_dominator (CDI_DOMINATORS, alt_e->dest); + if (flow_bb_inside_loop_p (prolog, old_dom)) + { + auto_vec<basic_block, 8> queue; + for (auto son = first_dom_son (CDI_DOMINATORS, old_dom); + son; son = next_dom_son (CDI_DOMINATORS, son)) + if (!flow_bb_inside_loop_p (prolog, son)) + queue.safe_push (son); + for (auto son : queue) + set_immediate_dominator (CDI_DOMINATORS, son, guard_bb); + } + } + e = EDGE_PRED (guard_to, 0); e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e); -- 2.35.3