Hi, this patch fixes profile update in tree-ssa-loop-im.c. This looks like quite a noticeable bug because the basic block storing flags ends up with count 0 and thus is optimized for size/possibly moved offline so perhaps even backporting to the release branches would make sense.
LSM profuces the following code: lsm = MEM; lsm_flag = false; ... for (...) { if (foo) stuff; else { lsm = TMP_VAR; lsm_flag = true; } } if (lsm_flag) <-- MEM = lsm; <-- What was missing was any profile on if (lsm_flag) path at the end. I don't think one can realistically figure probability of this conditional (perhaps estimate assuming that if (foo) is independent based on number of iterations, but that requires computation of powers and seems thus bit involved and also not necessarily realistic. Instead I just look for special case where lsm_flag is set always (common case) or almost never. Bootstrapped/regtested x86_64-linux, comitted. Honza * gcc.dg/tree-prof/cold_partition_label.c: Update template. * tree-ssa-loop-im.c (execute_sm_if_changed): Add FLAG_BBS parameter; update profile. (sm_set_flag_if_changed): Add bbs field. (execute_sm_if_changed_flag_set): Pass BBS. (execute_sm): Update. Index: testsuite/gcc.dg/tree-prof/cold_partition_label.c =================================================================== --- testsuite/gcc.dg/tree-prof/cold_partition_label.c (revision 248862) +++ testsuite/gcc.dg/tree-prof/cold_partition_label.c (working copy) @@ -1,7 +1,7 @@ /* Test case to check if function foo gets split and the cold function gets a label. */ /* { dg-require-effective-target freorder } */ -/* { dg-options "-O2 -freorder-blocks-and-partition -save-temps" } */ +/* { dg-options "-O2 -freorder-blocks-and-partition -save-temps -fdump-tree-optimized" } */ #define SIZE 10000 @@ -39,3 +39,4 @@ main (int argc, char *argv[]) /* { dg-final-use { scan-assembler "foo\[._\]+cold\[\._\]+0" { target *-*-linux* *-*-gnu* } } } */ /* { dg-final-use { scan-assembler "size\[ \ta-zA-Z0-0\]+foo\[._\]+cold\[\._\]+0" { target *-*-linux* *-*-gnu* } } } */ +/* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */ Index: tree-ssa-loop-im.c =================================================================== --- tree-ssa-loop-im.c (revision 248862) +++ tree-ssa-loop-im.c (working copy) @@ -1786,7 +1786,8 @@ struct prev_flag_edges { */ static void -execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag) +execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, + edge preheader, hash_set <basic_block> *flag_bbs) { basic_block new_bb, then_bb, old_dest; bool loop_has_only_one_exit; @@ -1796,6 +1797,54 @@ execute_sm_if_changed (edge ex, tree mem struct prev_flag_edges *prev_edges = (struct prev_flag_edges *) ex->aux; bool irr = ex->flags & EDGE_IRREDUCIBLE_LOOP; + int freq_sum = 0; + profile_count count_sum = profile_count::zero (); + int nbbs = 0, ncount = 0; + int flag_probability = -1; + + /* Flag is set in FLAG_BBS. Determine probability that flag will be true + at loop exit. + + This code may look fancy, but it can not update profile very realistically + because we do not know the probability that flag will be true at given + loop exit. + + We look for two interesting extremes + - when exit is dominated by block setting the flag, we know it will + always be true. This is a common case. + - when all blocks setting the flag have very low frequency we know + it will likely be false. + In all other cases we default to 2/3 for flag being true. */ + + for (hash_set<basic_block>::iterator it = flag_bbs->begin (); + it != flag_bbs->end (); ++it) + { + freq_sum += (*it)->frequency; + if ((*it)->count.initialized_p ()) + count_sum += (*it)->count, ncount ++; + if (dominated_by_p (CDI_DOMINATORS, ex->src, *it)) + flag_probability = REG_BR_PROB_BASE; + nbbs++; + } + + if (flag_probability != -1) + ; + else if (ncount == nbbs && count_sum > 0 && preheader->count >= count_sum) + { + flag_probability = count_sum.probability_in (preheader->count); + if (flag_probability > REG_BR_PROB_BASE * 2 / 3) + flag_probability = REG_BR_PROB_BASE * 2 / 3; + } + else if (freq_sum > 0 && EDGE_FREQUENCY (preheader) >= freq_sum) + { + flag_probability = GCOV_COMPUTE_SCALE (freq_sum, + EDGE_FREQUENCY (preheader)); + if (flag_probability > REG_BR_PROB_BASE * 2 / 3) + flag_probability = REG_BR_PROB_BASE * 2 / 3; + } + else + flag_probability = REG_BR_PROB_BASE * 2 / 3; + /* ?? Insert store after previous store if applicable. See note below. */ if (prev_edges) @@ -1826,6 +1875,8 @@ execute_sm_if_changed (edge ex, tree mem old_dest = ex->dest; new_bb = split_edge (ex); then_bb = create_empty_bb (new_bb); + then_bb->frequency = apply_probability (new_bb->frequency, flag_probability); + then_bb->count = new_bb->count.apply_probability (flag_probability); if (irr) then_bb->flags = BB_IRREDUCIBLE_LOOP; add_bb_to_loop (then_bb, new_bb->loop_father); @@ -1840,12 +1891,22 @@ execute_sm_if_changed (edge ex, tree mem stmt = gimple_build_assign (unshare_expr (mem), tmp_var); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); - make_edge (new_bb, then_bb, - EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); - make_edge (new_bb, old_dest, - EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); + edge e1 = single_succ_edge (new_bb); + edge e2 = make_edge (new_bb, then_bb, + EDGE_TRUE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); + e2->probability = flag_probability; + e2->count = then_bb->count; + + e1->flags |= EDGE_FALSE_VALUE | (irr ? EDGE_IRREDUCIBLE_LOOP : 0); + e1->flags &= ~EDGE_FALLTHRU; + + e1->probability = REG_BR_PROB_BASE - flag_probability; + e1->count = new_bb->count - then_bb->count; + then_old_edge = make_edge (then_bb, old_dest, EDGE_FALLTHRU | (irr ? EDGE_IRREDUCIBLE_LOOP : 0)); + then_old_edge->probability = REG_BR_PROB_BASE; + then_old_edge->count = then_bb->count; set_immediate_dominator (CDI_DOMINATORS, then_bb, new_bb); @@ -1889,18 +1950,17 @@ execute_sm_if_changed (edge ex, tree mem update_stmt (phi); } } - /* Remove the original fall through edge. This was the - single_succ_edge (new_bb). */ - EDGE_SUCC (new_bb, 0)->flags &= ~EDGE_FALLTHRU; } /* When REF is set on the location, set flag indicating the store. */ struct sm_set_flag_if_changed { - sm_set_flag_if_changed (tree flag_) : flag (flag_) {} + sm_set_flag_if_changed (tree flag_, hash_set <basic_block> *bbs_) + : flag (flag_), bbs (bbs_) {} bool operator () (mem_ref_loc *loc); tree flag; + hash_set <basic_block> *bbs; }; bool @@ -1913,6 +1973,7 @@ sm_set_flag_if_changed::operator () (mem gimple_stmt_iterator gsi = gsi_for_stmt (loc->stmt); gimple *stmt = gimple_build_assign (flag, boolean_true_node); gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + bbs->add (gimple_bb (stmt)); } return false; } @@ -1921,12 +1982,13 @@ sm_set_flag_if_changed::operator () (mem set, set an appropriate flag indicating the store. */ static tree -execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref) +execute_sm_if_changed_flag_set (struct loop *loop, im_mem_ref *ref, + hash_set <basic_block> *bbs) { tree flag; char *str = get_lsm_tmp_name (ref->mem.ref, ~0, "_flag"); flag = create_tmp_reg (boolean_type_node, str); - for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag)); + for_all_locs_in_loop (loop, ref, sm_set_flag_if_changed (flag, bbs)); return flag; } @@ -1946,6 +2008,7 @@ execute_sm (struct loop *loop, vec<edge> struct lim_aux_data *lim_data; bool multi_threaded_model_p = false; gimple_stmt_iterator gsi; + hash_set<basic_block> flag_bbs; if (dump_file && (dump_flags & TDF_DETAILS)) { @@ -1966,7 +2029,7 @@ execute_sm (struct loop *loop, vec<edge> multi_threaded_model_p = true; if (multi_threaded_model_p) - store_flag = execute_sm_if_changed_flag_set (loop, ref); + store_flag = execute_sm_if_changed_flag_set (loop, ref, &flag_bbs); rewrite_mem_refs (loop, ref, tmp_var); @@ -2002,7 +2065,8 @@ execute_sm (struct loop *loop, vec<edge> gsi_insert_on_edge (ex, store); } else - execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag); + execute_sm_if_changed (ex, ref->mem.ref, tmp_var, store_flag, + loop_preheader_edge (loop), &flag_bbs); } /* Hoists memory references MEM_REFS out of LOOP. EXITS is the list of exit