Hi, this patch fixes bug I noticed while looking for profile mismatches in tramp3d. When split part starts with the loop, the frequency of call to split function is not the same as frequency of this block. This also affects the heuristics which should not count the loopback edges.
Patch aslo gets rid of frequencies and uses counts consistently. Bootstrapped/regtested x86_64-linux, will commit it shortly. * ipa-split.c (struct split_point): Add count. (consider_split): Do not compute incoming frequency; compute incoming count and store it to split_point. (split_function): Set count of the call to split part correctly. * testsuite/gcc.dg/tree-ssa/fnsplit-2.c: New testcase. Index: ipa-split.c =================================================================== --- ipa-split.c (revision 254700) +++ ipa-split.c (working copy) @@ -129,6 +129,10 @@ struct split_point /* Basic block where we split (that will become entry point of new function. */ basic_block entry_bb; + /* Count for entering the split part. + This is not count of the entry_bb because it may be in loop. */ + profile_count count; + /* Basic blocks we are splitting away. */ bitmap split_bbs; @@ -426,7 +430,6 @@ consider_split (struct split_point *curr edge_iterator ei; gphi_iterator bsi; unsigned int i; - int incoming_freq = 0; tree retval; tree retbnd; bool back_edge = false; @@ -434,18 +437,21 @@ consider_split (struct split_point *curr if (dump_file && (dump_flags & TDF_DETAILS)) dump_split_point (dump_file, current); + current->count = profile_count::zero (); FOR_EACH_EDGE (e, ei, current->entry_bb->preds) { if (e->flags & EDGE_DFS_BACK) back_edge = true; if (!bitmap_bit_p (current->split_bbs, e->src->index)) - incoming_freq += EDGE_FREQUENCY (e); + current->count += e->count (); } - /* Do not split when we would end up calling function anyway. */ - if (incoming_freq - >= (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun) - * PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY) / 100)) + /* Do not split when we would end up calling function anyway. + Compares are three state, use !(...<...) to also give up when outcome + is unknown. */ + if (!(current->count + < (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.apply_scale + (PARAM_VALUE (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY), 100)))) { /* When profile is guessed, we can not expect it to give us realistic estimate on likelyness of function taking the @@ -454,14 +460,17 @@ consider_split (struct split_point *curr is likely noticeable win. */ if (back_edge && profile_status_for_fn (cfun) != PROFILE_READ - && incoming_freq - < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)) + && current->count + < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, - " Split before loop, accepting despite low frequencies %i %i.\n", - incoming_freq, - ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun)); + { + fprintf (dump_file, + " Split before loop, accepting despite low counts"); + current->count.dump (dump_file); + fprintf (dump_file, " "); + ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.dump (dump_file); + } } else { @@ -711,14 +720,13 @@ consider_split (struct split_point *curr if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, " Accepted!\n"); - /* At the moment chose split point with lowest frequency and that leaves + /* At the moment chose split point with lowest count and that leaves out smallest size of header. In future we might re-consider this heuristics. */ if (!best_split_point.split_bbs - || best_split_point.entry_bb->count.to_frequency (cfun) - > current->entry_bb->count.to_frequency (cfun) - || (best_split_point.entry_bb->count.to_frequency (cfun) - == current->entry_bb->count.to_frequency (cfun) + || best_split_point.count + > current->count + || (best_split_point.count == current->count && best_split_point.split_size < current->split_size)) { @@ -1446,6 +1454,7 @@ split_function (basic_block return_bb, s } else break; + call_bb->count = split_point->count; e = split_block (split_point->entry_bb, last_stmt); remove_edge (e); Index: testsuite/gcc.dg/tree-ssa/fnsplit-2.c =================================================================== --- testsuite/gcc.dg/tree-ssa/fnsplit-2.c (nonexistent) +++ testsuite/gcc.dg/tree-ssa/fnsplit-2.c (working copy) @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-tree-fnsplit-blocks-details" } */ +int b; +void test (void); +void +split_me (int *a) +{ + if (__builtin_expect (a==0, 0)) + do + { + test(); + test(); + test(); + test(); + test(); + } + while (b); + else + q(); +} + +int +main(void) +{ + int i; + for (i = 0; i < 1000; i++) + split_me(&i); + return 0; +} + +/* { dg-final { scan-tree-dump-times "Splitting function at:" 1 "fnsplit"} } */ +/* { dg-final { scan-tree-dump-times "Invalid sum" 0 "fnsplit"} } */