https://gcc.gnu.org/g:89c443a7e9a0780a52a698fb02d4f5173e025918
commit r14-9335-g89c443a7e9a0780a52a698fb02d4f5173e025918 Author: Richard Biener <rguent...@suse.de> Date: Wed Mar 6 10:31:02 2024 +0100 tree-optimization/114239 - rework reduction epilogue driving The following reworks vectorizable_live_operation to pass the live stmt to vect_create_epilog_for_reduction also for early breaks and a peeled main exit. This is to be able to figure the scalar definition to replace. This reverts the PR114192 fix as it is subsumed by this cleanup. PR tree-optimization/114239 * tree-vect-loop.cc (vect_get_vect_def): Remove. (vect_create_epilog_for_reduction): The passed in stmt_info should now be the live stmt that produces the scalar reduction result. Revert PR114192 fix. Base reduction info off info_for_reduction. Remove special handling of early-break/peeled, restore original vector def gathering. Make sure to pick the correct exit PHIs. (vectorizable_live_operation): Pass in the proper stmt_info for early break exits. * gcc.dg/vect/vect-early-break_122-pr114239.c: New testcase. Diff: --- .../gcc.dg/vect/vect-early-break_122-pr114239.c | 29 ++++++ gcc/tree-vect-loop.cc | 105 +++++---------------- 2 files changed, 53 insertions(+), 81 deletions(-) diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_122-pr114239.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_122-pr114239.c new file mode 100644 index 00000000000..7bf4db14209 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_122-pr114239.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ + +int ip4_getbit_a, ip4_getbit_pos, ip4_clrbit_pos; +void ip4_clrbit(int *a) { *a &= ip4_clrbit_pos; } +typedef struct { + char pxlen; + int prefix; +} net_addr_ip4; +void fib_get_chain(); +int trie_match_longest_ip4(); +int trie_match_next_longest_ip4(net_addr_ip4 *n) { + int __trans_tmp_1; + while (n->pxlen) { + n->pxlen--; + ip4_clrbit(&n->prefix); + __trans_tmp_1 = ip4_getbit_a >> ip4_getbit_pos; + if (__trans_tmp_1) + return 1; + } + return 0; +} +void net_roa_check_ip4_trie_tab() { + net_addr_ip4 px0; + for (int _n = trie_match_longest_ip4(&px0); _n; + _n = trie_match_next_longest_ip4(&px0)) + fib_get_chain(); +} diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 761cdc67570..20ee0aad932 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -5897,35 +5897,6 @@ vect_create_partial_epilog (tree vec_def, tree vectype, code_helper code, return new_temp; } -/* Retrieves the definining statement to be used for a reduction. - For LAST_VAL_REDUC_P we use the current VEC_STMTs which correspond to the - final value after vectorization and otherwise we look at the reduction - definitions to get the first. */ - -tree -vect_get_vect_def (stmt_vec_info reduc_info, slp_tree slp_node, - slp_instance slp_node_instance, bool last_val_reduc_p, - unsigned i, vec <gimple *> &vec_stmts) -{ - tree def; - - if (slp_node) - { - if (!last_val_reduc_p) - slp_node = slp_node_instance->reduc_phis; - def = vect_get_slp_vect_def (slp_node, i); - } - else - { - if (!last_val_reduc_p) - reduc_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (reduc_info)); - vec_stmts = STMT_VINFO_VEC_STMTS (reduc_info); - def = gimple_get_lhs (vec_stmts[0]); - } - - return def; -} - /* Function vect_create_epilog_for_reduction Create code at the loop-epilog to finalize the result of a reduction @@ -5989,8 +5960,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, loop-closed PHI of the inner loop which we remember as def for the reduction PHI generation. */ bool double_reduc = false; - bool last_val_reduc_p = LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit - && !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo); stmt_vec_info rdef_info = stmt_info; if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def) { @@ -6000,8 +5969,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, (stmt_info->stmt, 0)); stmt_info = vect_stmt_to_vectorize (stmt_info); } - gphi *reduc_def_stmt - = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt); code_helper code = STMT_VINFO_REDUC_CODE (reduc_info); internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info); tree vectype; @@ -6066,33 +6033,9 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, stmt_vec_info single_live_out_stmt[] = { stmt_info }; array_slice<const stmt_vec_info> live_out_stmts = single_live_out_stmt; - if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo) - && loop_exit != LOOP_VINFO_IV_EXIT (loop_vinfo) - /* ??? We should fend this off earlier. For conversions we create - multiple epilogues, one dead. */ - && stmt_info == reduc_info->reduc_def) - { - gcc_assert (!slp_node); - single_live_out_stmt[0] = reduc_info; - } - else - { - if (slp_reduc) - /* All statements produce live-out values. */ - live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node); - else if (slp_node) - { - /* The last statement in the reduction chain produces the live-out - value. Note SLP optimization can shuffle scalar stmts to - optimize permutations so we have to search for the last stmt. */ - for (k = 0; k < group_size; ++k) - if (!REDUC_GROUP_NEXT_ELEMENT (SLP_TREE_SCALAR_STMTS (slp_node)[k])) - { - single_live_out_stmt[0] = SLP_TREE_SCALAR_STMTS (slp_node)[k]; - break; - } - } - } + if (slp_reduc) + /* All statements produce live-out values. */ + live_out_stmts = SLP_TREE_SCALAR_STMTS (slp_node); unsigned vec_num; int ncopies; @@ -6103,7 +6046,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, } else { - stmt_vec_info reduc_info = loop_vinfo->lookup_stmt (reduc_def_stmt); vec_num = 1; ncopies = STMT_VINFO_VEC_STMTS (reduc_info).length (); } @@ -6247,18 +6189,19 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, exit_bb = loop_exit->dest; exit_gsi = gsi_after_labels (exit_bb); reduc_inputs.create (slp_node ? vec_num : ncopies); - vec <gimple *> vec_stmts = vNULL; for (unsigned i = 0; i < vec_num; i++) { gimple_seq stmts = NULL; - def = vect_get_vect_def (rdef_info, slp_node, slp_node_instance, - last_val_reduc_p, i, vec_stmts); + if (slp_node) + def = vect_get_slp_vect_def (slp_node, i); + else + def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[0]); for (j = 0; j < ncopies; j++) { tree new_def = copy_ssa_name (def); phi = create_phi_node (new_def, exit_bb); if (j) - def = gimple_get_lhs (vec_stmts[j]); + def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); if (LOOP_VINFO_IV_EXIT (loop_vinfo) == loop_exit) SET_PHI_ARG_DEF (phi, loop_exit->dest_idx, def); else @@ -6963,7 +6906,8 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, { if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))) { - if (!is_gimple_debug (USE_STMT (use_p))) + if (!is_gimple_debug (USE_STMT (use_p)) + && gimple_bb (USE_STMT (use_p)) == loop_exit->dest) phis.safe_push (USE_STMT (use_p)); } else @@ -10765,26 +10709,21 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, { if (!vec_stmt_p) return true; - if (slp_node) - { - /* For reduction chains the meta-info is attached to - the group leader. */ - if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) - stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info); - /* For SLP reductions we vectorize the epilogue for - all involved stmts together. */ - else if (slp_index != 0) - return true; - } + /* For SLP reductions we vectorize the epilogue for all involved stmts + together. */ + if (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) && slp_index != 0) + return true; stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); gcc_assert (reduc_info->is_reduc_info); if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION) return true; - vect_create_epilog_for_reduction (loop_vinfo, stmt_info, slp_node, - slp_node_instance, - LOOP_VINFO_IV_EXIT (loop_vinfo)); + if (!LOOP_VINFO_EARLY_BREAKS (loop_vinfo) + || !LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) + vect_create_epilog_for_reduction (loop_vinfo, stmt_info, slp_node, + slp_node_instance, + LOOP_VINFO_IV_EXIT (loop_vinfo)); /* If early break we only have to materialize the reduction on the merge block, but we have to find an alternate exit first. */ @@ -10793,11 +10732,15 @@ vectorizable_live_operation (vec_info *vinfo, stmt_vec_info stmt_info, for (auto exit : get_loop_exit_edges (LOOP_VINFO_LOOP (loop_vinfo))) if (exit != LOOP_VINFO_IV_EXIT (loop_vinfo)) { - vect_create_epilog_for_reduction (loop_vinfo, stmt_info, + vect_create_epilog_for_reduction (loop_vinfo, reduc_info, slp_node, slp_node_instance, exit); break; } + if (LOOP_VINFO_EARLY_BREAKS_VECT_PEELED (loop_vinfo)) + vect_create_epilog_for_reduction (loop_vinfo, reduc_info, slp_node, + slp_node_instance, + LOOP_VINFO_IV_EXIT (loop_vinfo)); } return true;