> Index: gcc/cgraphclones.c
> ===================================================================
> --- gcc/cgraphclones.c        (revision 215826)
> +++ gcc/cgraphclones.c        (working copy)
> @@ -453,6 +453,11 @@
>      }
>    else
>      count_scale = 0;
> +  /* In AutoFDO, if edge count is larger than callee's entry block
> +     count, we will not update the original callee because it may
> +     mistakenly mark some hot function as cold.  */
> +  if (flag_auto_profile && gcov_count >= count)
> +    update_original = false;

lets drop this from initial patch.

> Index: gcc/bb-reorder.c
> ===================================================================
> --- gcc/bb-reorder.c  (revision 215826)
> +++ gcc/bb-reorder.c  (working copy)
> @@ -1569,15 +1569,14 @@
>    /* Mark which partition (hot/cold) each basic block belongs in.  */
>    FOR_EACH_BB_FN (bb, cfun)
>      {
> -      bool cold_bb = false;
> +      bool cold_bb = probably_never_executed_bb_p (cfun, bb);

and this too
(basically all the tweaks should IMO go in independently and ideally in
a way that does not need flag_auto_profile test).
> +/* Return true if BB contains indirect call.  */
> +
> +static bool
> +has_indirect_call (basic_block bb)
> +{
> +  gimple_stmt_iterator gsi;
> +
> +  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> +    {
> +      gimple stmt = gsi_stmt (gsi);
> +      if (gimple_code (stmt) == GIMPLE_CALL
> +       && (gimple_call_fn (stmt) == NULL
> +           || TREE_CODE (gimple_call_fn (stmt)) != FUNCTION_DECL))

You probably want to skip gimple_call_internal_p calls here.
> +
> +/* From AutoFDO profiles, find values inside STMT for that we want to measure
> +   histograms for indirect-call optimization.  */
> +
> +static void
> +afdo_indirect_call (gimple_stmt_iterator *gsi, const icall_target_map &map,
> +                 bool transform)
> +{
> +  gimple stmt = gsi_stmt (*gsi);
> +  tree callee;
> +
> +  if (map.size() == 0 || gimple_code (stmt) != GIMPLE_CALL
> +      || gimple_call_fndecl (stmt) != NULL_TREE)
> +    return;
> +
> +  callee = gimple_call_fn (stmt);
> +
> +  histogram_value hist = gimple_alloc_histogram_value (
> +      cfun, HIST_TYPE_INDIR_CALL, stmt, callee);
> +  hist->n_counters = 3;
> +  hist->hvalue.counters =  XNEWVEC (gcov_type, hist->n_counters);
> +  gimple_add_histogram_value (cfun, stmt, hist);
> +
> +  gcov_type total = 0;
> +  icall_target_map::const_iterator max_iter = map.end();
> +
> +  for (icall_target_map::const_iterator iter = map.begin();
> +       iter != map.end(); ++iter)
> +    {
> +      total += iter->second;
> +      if (max_iter == map.end() || max_iter->second < iter->second)
> +     max_iter = iter;
> +    }
> +
> +  hist->hvalue.counters[0] = (unsigned long long)
> +      afdo_string_table->get_name (max_iter->first);
> +  hist->hvalue.counters[1] = max_iter->second;
> +  hist->hvalue.counters[2] = total;
> +
> +  if (!transform)
> +    return;
> +
> +  if (gimple_ic_transform (gsi))
> +    {
> +      struct cgraph_edge *indirect_edge =
> +       cgraph_node::get (current_function_decl)->get_edge (stmt);
> +      struct cgraph_node *direct_call =
> +       find_func_by_profile_id ((int)hist->hvalue.counters[0]);
> +      if (DECL_STRUCT_FUNCTION (direct_call->decl) == NULL)
> +     return;
> +      struct cgraph_edge *new_edge =
> +       indirect_edge->make_speculative (direct_call, 0, 0);
> +      new_edge->redirect_call_stmt_to_callee ();
> +      gimple_remove_histogram_value (cfun, stmt, hist);
> +      inline_call (new_edge, true, NULL, NULL, false);
> +      return;
> +    }
> +  return;

Is it necessary to go via histogram and gimple_ic_transform here?  I would 
expect that all you
need is to make the speculative edge and inline it. (bypassing the work of 
producing fake
histogram value and calling igmple_ic_transofrm on it)

Also it seems to me that you want to set direct_count nad frequency argument of
make_speculative so the resulting function profile is not off.

The rest of interfaces seems quite sane now.  Can you please look into
using speculative edges directly instead of hooking into the vpt infrastructure
and fixing the formatting issues of the new pass?

I will try to make another pass over the actual streaming logic that I find bit 
difficult
to read, but I quite trust you it does the right thing ;)

Honza

Reply via email to