In AutoFDO, we early-inline callsites that was inlined in profiling runs regardless of the size limit. With this change, the existing ipa-inline tunings for AutoFDO is unnecessary: it's fine to just use the traditional FDO based heuristic. This patch cleans up the original tunings and make it easier to port to gcc 4_8.
Bootstrapped and passed all regression test. And passed benchmark performance tests. Is it ok for google-4_7 branch? Thanks, Dehao http://codereview.appspot.com/9250047
Index: gcc/ipa-inline.c =================================================================== --- gcc/ipa-inline.c (revision 198796) +++ gcc/ipa-inline.c (working copy) @@ -481,21 +481,13 @@ edge_hot_enough_p (struct cgraph_edge *edge) { if (cgraph_maybe_hot_edge_p (edge)) return true; - if (flag_auto_profile) - { - gcov_type callsite_total_count; - /* Check if total sample counts in the callee is available. */ - if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true) - && maybe_hot_count_p (callsite_total_count)) - return true; - /* We disable hot-caller heuristic if the callee's entry count is - 0 because in this case we do not have enough information to - calculate the scaling factor. */ - if (edge->callee->count == 0 && edge->callee->max_bb_count > 0) - return false; - /* In AutoFDO, if the preivous few heuristic fail, we will fall - back to use hot-caller heuristic as is used by FDO. */ - } + + /* We disable hot-caller heuristic if the callee's entry count is + 0 because in this case we do not have enough information to + calculate the scaling factor. */ + if (flag_auto_profile && edge->callee->count == 0 + && edge->callee->max_bb_count > 0) + return false; if (PARAM_VALUE (PARAM_INLINE_HOT_CALLER) && maybe_hot_count_p (edge->caller->max_bb_count)) return true; @@ -874,32 +866,10 @@ edge_badness (struct cgraph_edge *edge, bool dump) else if (max_count) { int relbenefit = relative_time_benefit (callee_info, edge, time_growth); - if (flag_auto_profile && edge->count == 0) - { - gcov_type callsite_count; - if (afdo_get_callsite_count (edge, &callsite_count, NULL, false)) - edge->count = callsite_count; - if (edge->count > max_count) - max_count = edge->count; - } badness = ((int) ((double) edge->count * INT_MIN / 2 / max_count / 512) * relative_time_benefit (callee_info, edge, time_growth)) / growth; - if (flag_auto_profile && profile_info->sum_all > 0) - { - gcov_type callsite_total_count; - if (afdo_get_callsite_count (edge, &callsite_total_count, NULL, true)) - { - gcov_type afdo_badness = - ((int) - ((double) callsite_total_count * INT_MIN / 2 / - profile_info->sum_all / 64) * - relative_time_benefit (callee_info, edge, time_growth)) / growth; - if (afdo_badness < badness) - badness = afdo_badness; - } - } /* Be sure that insanity of the profile won't lead to increasing counts in the scalling and thus to overflow in the computation above. */ @@ -1568,7 +1538,7 @@ inline_small_functions (void) of date value on it, we re-insert it now. */ current_badness = edge_badness (edge, false); gcc_assert (cached_badness == current_badness); - gcc_assert (flag_auto_profile || current_badness >= badness); + gcc_assert (current_badness >= badness); if (current_badness != badness) { edge->aux = fibheap_insert (heap, current_badness, edge); Index: gcc/ipa-inline-transform.c =================================================================== --- gcc/ipa-inline-transform.c (revision 198796) +++ gcc/ipa-inline-transform.c (working copy) @@ -139,23 +139,6 @@ void clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, bool update_original, int *overall_size) { - bool has_callsite_profile = false; - gcov_type callsite_total_count, callsite_max_count; - - if (flag_auto_profile) - { - has_callsite_profile = - afdo_get_callsite_count (e, &callsite_total_count, - &callsite_max_count, true); - /* If the callsite is inlined in the profile-collection build, - i.e. the cloned callee has its separate profile, we will use - this separate profile to annotate the callee, and the real - callee body will not be affected. Thus here we need to disable - update_original. */ - if (has_callsite_profile) - update_original = false; - } - if (duplicate) { /* We may eliminate the need for out-of-line copy to be output. @@ -196,23 +179,6 @@ clone_inlined_nodes (struct cgraph_edge *e, bool d } } - if (flag_auto_profile && has_callsite_profile) - { - /* The callee's total count will be non-zero if the callsite - was inlined in the profile-collection build, In this case, - the original callee may be label unlikely_executed, which - may prevent its callees being inlined. Thus we need to reset - its frequency to normal. */ - if (e->callee->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) - e->callee->frequency = NODE_FREQUENCY_NORMAL; - /* we do not have enough information to calculate the node count - and max_bb_count. Thus we set them to the same value to make - other optimizations aware that they are from cloned inline - instances. */ - e->callee->count = callsite_total_count; - e->callee->max_bb_count = callsite_max_count; - } - if (e->caller->global.inlined_to) e->callee->global.inlined_to = e->caller->global.inlined_to; else @@ -417,8 +383,6 @@ inline_call (struct cgraph_edge *e, bool update_or clone_inlined_nodes (e, true, update_original, overall_size); - if (flag_auto_profile) - afdo_add_copy_scale (e); gcc_assert (curr->callee->global.inlined_to == to); Index: gcc/auto-profile.c =================================================================== --- gcc/auto-profile.c (revision 198796) +++ gcc/auto-profile.c (working copy) @@ -178,9 +178,6 @@ static htab_t function_htab; /* Hash table to hold stack information. */ static htab_t stack_htab; -/* Hash table to hold inline scale information. */ -static htab_t stack_scale_htab; - /* Hash table to hold assembler name to bfd name mapping. */ static htab_t bfd_name_htab; @@ -815,142 +812,6 @@ afdo_add_bfd_name_mapping (const char *as_name, co free (entry); } -/* When EDGE is inlined, the callee is cloned recursively. This function - updates the copy scale recursively along the callee. STACK stores the - call stack info from the original inlined edge to the caller of EDGE. - - E.g. foo calls bar with call count 100; - bar calls baz with call count 300; - bar has an entry count of 400, baz has an entry count of 1000; - Initial callgraph looks like: - foo --(100)--> bar(400) - bar --(300)--> baz(1000) - - Consider baz is first inlined into bar, we will have a call graph like: - foo --(100)--> bar(400) - bar --(300)--> baz.clone(300) - baz(700) - At this point, a copyscale mapping is added: - (bar->baz) --> 0.3 - - Consider bar is then inlined into foo, we will have a call graph like: - foo --(100)--> bar.clone(100) - bar.clone --(75)-->baz.clone_2(75) - bar --(225)->baz.clone(225) - baz(700) - At this point, two copyscale mappings are added: - (foo->bar) --> 0.25 - (foo->bar->baz) --> 0.25 * 0.3 -*/ - -static void -afdo_propagate_copy_scale (struct cgraph_edge *edge, struct gcov_stack *stack) -{ - struct gcov_stack *new_stack, *entry, **stack_slot; - struct cgraph_edge *e; - - if (edge->callee->global.inlined_to == NULL) - return; - if (stack->count == 0) - return; - - new_stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack)); - new_stack->func_name = - IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->caller->decl)); - new_stack->callee_name = - IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)); - new_stack->size = get_inline_stack_size_by_stmt (edge->call_stmt); - new_stack->stack = (struct gcov_callsite_pos *) xmalloc ( - sizeof (struct gcov_callsite_pos) * (new_stack->size + stack->size)); - get_inline_stack_by_stmt (edge->call_stmt, edge->caller->decl, - new_stack->stack, false); - entry = (struct gcov_stack *) htab_find (stack_scale_htab, new_stack); - if (entry == NULL) - { - free (new_stack->stack); - free (new_stack); - return; - } - - new_stack->func_name = stack->func_name; - new_stack->count = entry->count * stack->count / REG_BR_PROB_BASE; - memcpy (new_stack->stack + new_stack->size, - stack->stack, stack->size * sizeof (struct gcov_callsite_pos)); - new_stack->size += stack->size; - stack_slot = (struct gcov_stack **) - htab_find_slot (stack_scale_htab, new_stack, INSERT); - if (!*stack_slot) - *stack_slot = new_stack; - else - (*stack_slot)->count = MAX ((*stack_slot)->count, new_stack->count); - - for (e = edge->callee->callees; e; e = e->next_callee) - afdo_propagate_copy_scale (e, new_stack); -} - -/* For an inlined EDGE, the scale (i.e. edge->count / edge->callee->count) - is recorded in a hash map. */ - -void -afdo_add_copy_scale (struct cgraph_edge *edge) -{ - struct gcov_stack *stack; - struct gcov_stack **stack_slot; - int scale; - int size = get_inline_stack_size_by_edge (edge); - struct cgraph_node *n = edge->callee->clone_of; - struct cgraph_edge *e; - gcov_type sum_cloned_count; - - if (edge->callee->clone_of) - { - n = edge->callee->clone_of->clones; - sum_cloned_count = edge->callee->clone_of->count; - } - else - { - n = edge->callee->clones; - sum_cloned_count = edge->callee->count; - } - - for (; n; n = n->next_sibling_clone) - sum_cloned_count += n->count; - if (sum_cloned_count > 0) - scale = (double) edge->count * REG_BR_PROB_BASE / sum_cloned_count; - else if (edge->caller->count == 0 && edge->caller->max_bb_count == 0) - scale = 0; - else - scale = REG_BR_PROB_BASE; - if (scale > REG_BR_PROB_BASE) - scale = REG_BR_PROB_BASE; - - if (size == 0) - return; - stack = (struct gcov_stack *) xmalloc (sizeof (struct gcov_stack)); - stack->func_name - = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME ( - edge->caller->global.inlined_to ? - edge->caller->global.inlined_to->decl : edge->caller->decl)); - stack->callee_name - = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)); - stack->size = size; - stack->stack = (struct gcov_callsite_pos *) - xmalloc (sizeof (struct gcov_callsite_pos) * size); - stack->count = scale; - - get_inline_stack_by_edge (edge, stack->stack); - - stack_slot = (struct gcov_stack **) - htab_find_slot (stack_scale_htab, stack, INSERT); - if (!*stack_slot) - *stack_slot = stack; - else - (*stack_slot)->count = MAX ((*stack_slot)->count, stack->count); - - for (e = edge->callee->callees; e; e = e->next_callee) - afdo_propagate_copy_scale (e, stack); -} - /* For a given POS_STACK with SIZE, get the COUNT, MAX_COUNT, NUM_INST, HIST_SIZE and HIST for the inline stack. If CALLEE_NAME is non-null, the COUNT/MAX_COUNT represents the total/max count in the inline stack. @@ -964,56 +825,24 @@ get_stack_count (struct gcov_callsite_pos *pos_sta gcov_type *count, gcov_type *max_count, gcov_type *num_inst, gcov_unsigned_t *hist_size, struct gcov_hist **hist) { - int i; - - for (i = 0; i < size; i++) + struct gcov_stack stack, *entry; + stack.func_name = pos_stack[size - 1].func; + stack.callee_name = callee_name; + stack.stack = pos_stack; + stack.size = size; + entry = (struct gcov_stack *) htab_find (stack_htab, &stack); + if (entry) { - struct gcov_stack stack, *entry; - stack.func_name = pos_stack[size - i - 1].func; - stack.callee_name = callee_name; - stack.stack = pos_stack; - stack.size = size - i; - entry = (struct gcov_stack *) htab_find (stack_htab, &stack); - if (entry) + *count = entry->count; + *num_inst = entry->num_inst; + if (max_count) + *max_count = entry->max_count; + if (hist_size) { - if (i == 0) - { - *count = entry->count; - *num_inst = entry->num_inst; - if (max_count) - *max_count = entry->max_count; - if (hist_size) - { - *hist_size = entry->hist_size; - *hist = entry->hist; - } - return true; - } - else - { - struct gcov_stack scale_stack, *scale_entry; - scale_stack.stack = pos_stack + size - i; - scale_stack.size = i; - scale_stack.func_name = pos_stack[size - 1].func; - scale_stack.callee_name = stack.func_name; - scale_entry = (struct gcov_stack *) - htab_find (stack_scale_htab, &scale_stack); - if (scale_entry) - { - *count = entry->count * scale_entry->count - / REG_BR_PROB_BASE; - *num_inst = entry->num_inst; - if (max_count) - *max_count = entry->max_count; - if (hist_size) - { - *hist_size = entry->hist_size; - *hist = entry->hist; - } - return true; - } - } + *hist_size = entry->hist_size; + *hist = entry->hist; } + return true; } *count = 0; *num_inst = 0; @@ -1057,14 +886,14 @@ get_stmt_count (gimple stmt, gcov_type *count, gco /* For a given EDGE, if IS_TOTAL is true, save EDGE->callee's total count to COUNT, otherwise save EDGE's count to COUNT. */ -bool +static bool afdo_get_callsite_count (struct cgraph_edge *edge, gcov_type *count, - gcov_type *max_count, bool is_total) + gcov_type *max_count) { struct gcov_callsite_pos *pos_stack; gcov_type num_inst; - const char *callee_name = is_total ? - IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)) : NULL; + const char *callee_name = + IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (edge->callee->decl)); int size = get_inline_stack_size_by_edge (edge); if (size == 0) @@ -1074,10 +903,6 @@ afdo_get_callsite_count (struct cgraph_edge *edge, get_inline_stack_by_edge (edge, pos_stack); - if (!is_total) - pos_stack[0].discr = - get_discriminator_from_locus (gimple_location (edge->call_stmt)); - return get_stack_count (pos_stack, callee_name, size, count, max_count, &num_inst, NULL, NULL); } @@ -1131,11 +956,19 @@ afdo_annotate_cfg (void) max_count = bb->count; } if (ENTRY_BLOCK_PTR->count > ENTRY_BLOCK_PTR->next_bb->count) - ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count; + { + ENTRY_BLOCK_PTR->next_bb->count = ENTRY_BLOCK_PTR->count; + ENTRY_BLOCK_PTR->next_bb->flags |= BB_ANNOTATED; + } + if (ENTRY_BLOCK_PTR->count > EXIT_BLOCK_PTR->prev_bb->count) + { + EXIT_BLOCK_PTR->prev_bb->count = ENTRY_BLOCK_PTR->count; + EXIT_BLOCK_PTR->prev_bb->flags |= BB_ANNOTATED; + } if (max_count > 0) { - counts_to_freqs (); afdo_calculate_branch_prob (); + counts_to_freqs (); profile_status = PROFILE_READ; } if (flag_value_profile_transformations) @@ -1416,13 +1249,6 @@ init_auto_profile (void) 0, xcalloc, free); - /* Initialize the stack scale hash table. */ - stack_scale_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, - afdo_stack_hash, - afdo_stack_eq, - 0, - xcalloc, - free); /* Initialize the bfd name mapping table. */ bfd_name_htab = htab_create_alloc ((size_t) SP_HTAB_INIT_SIZE, afdo_bfd_name_hash, @@ -1477,7 +1303,6 @@ end_auto_profile (void) free (file_names); htab_delete (function_htab); htab_delete (stack_htab); - htab_delete (stack_scale_htab); htab_delete (bfd_name_htab); htab_delete (module_htab); profile_info = NULL; @@ -1845,7 +1670,7 @@ bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *edge) { gcov_type count, max_count; - if (afdo_get_callsite_count (edge, &count, &max_count, true)) + if (afdo_get_callsite_count (edge, &count, &max_count)) { bool is_hot; const struct gcov_ctr_summary *saved_profile_info = profile_info; Index: gcc/auto-profile.h =================================================================== --- gcc/auto-profile.h (revision 198796) +++ gcc/auto-profile.h (working copy) @@ -32,16 +32,9 @@ extern void afdo_set_current_function_count (void) /* Add the assembly_name to bfd_name mapping. */ extern void afdo_add_bfd_name_mapping (const char *, const char *); -/* Add copy scale for an inlined edge to stack_scale_map. */ -extern void afdo_add_copy_scale (struct cgraph_edge *); - /* Calculate branch probability in both AutoFDO pass and after inlining. */ extern void afdo_calculate_branch_prob (void); -/* Calculate total sample count of an inlined callsite. */ -extern bool afdo_get_callsite_count (struct cgraph_edge *, gcov_type *, - gcov_type *, bool); - /* Returns TRUE if EDGE is hot enough to be inlined early. */ extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *);