https://gcc.gnu.org/g:a93f80feeef744649dc426a59a5860224d13e68d
commit r16-4438-ga93f80feeef744649dc426a59a5860224d13e68d Author: Jan Hubicka <[email protected]> Date: Wed Oct 15 09:49:21 2025 +0200 Cleanup max of profile_count profile_count::max is not implemented same way as other arithmetics on profile counts which generally require counts to be compatible and returns minimum of qualities of input counts. Reason is that originally it was used to compute statistics of whole callgraph profile so inliner weights can be scaled to reasonable integers interprocedurally. It also combines qulities weird way so the same counter could be used to determine what quality of profile is available. That code had roundoff error issues and was replaced by sreals. Now max is mostly used to determine cfg->max_count which is used to scale counts to reasonable integers intraprocedurally and is still being used i.e. by IRA. There are also few places where max is used for normal arithmetics when updating profile. For computing max_count we need max to still be a bit special so max (uninitialized, initialized) returns initialized rather then uninitialized. Partial profiles are later handled specially. This patch renames max to max_prefer_initialized to make it clear and updates implementation to require compatible profiles. I checked this behaviour is good for other places using it as well. I also turned function to static, since a = a->max (b) looks odd. gcc/ChangeLog: * auto-profile.cc (scale_bb_profile): Use profile_count::max_prefer_initialized. (afdo_adjust_guessed_profile): Likewise. * bb-reorder.cc (edge_order): Do not use max. * cfghooks.cc (merge_blocks): Likewise. * ipa-fnsummary.cc (param_change_prob): Likewise. * ipa-inline-transform.cc (inline_transform): Likewise. * predict.cc (update_max_bb_count): Likewise. (estimate_bb_frequencies): Likewise. (rebuild_frequencies): Likewise. * tree-ssa-loop-unswitch.cc (struct unswitch_predicate): Likewise. * profile-count.h (profile_count::max): Rename to (profile_count::max_prefer_initialized): this; update handling of qualities. Diff: --- gcc/auto-profile.cc | 10 ++++++---- gcc/bb-reorder.cc | 6 ++++-- gcc/cfghooks.cc | 2 +- gcc/ipa-fnsummary.cc | 3 ++- gcc/ipa-inline-transform.cc | 3 ++- gcc/predict.cc | 10 +++++++--- gcc/profile-count.h | 39 ++++++++++++++++----------------------- gcc/tree-ssa-loop-unswitch.cc | 3 ++- 8 files changed, 40 insertions(+), 36 deletions(-) diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc index 6971204ddf55..cf7a21913361 100644 --- a/gcc/auto-profile.cc +++ b/gcc/auto-profile.cc @@ -3600,11 +3600,12 @@ scale_bb_profile () { profile_count cnt = bb->count; bbs.safe_push (bb); - max_count = max_count.max (bb->count); + max_count = profile_count::max_prefer_initialized (max_count, cnt); if (afdo_set_bb_count (bb, zero_bbs)) { std::swap (cnt, bb->count); - max_count_in_fn = max_count_in_fn.max (cnt); + max_count_in_fn + = profile_count::max_prefer_initialized (max_count_in_fn, cnt); add_scale (&scales, cnt, bb->count); } } @@ -3646,7 +3647,8 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb) if (is_bb_annotated (seed_bb, *annotated_bb)) { component[seed_bb->index] = 1; - max_count_in_fn = max_count_in_fn.max (seed_bb->count); + max_count_in_fn + = profile_count::max_prefer_initialized (max_count_in_fn, seed_bb->count); } else component[seed_bb->index] = 0; @@ -3669,7 +3671,7 @@ afdo_adjust_guessed_profile (bb_set *annotated_bb) basic_block b = stack.pop (); bbs.quick_push (b); - max_count = max_count.max (b->count); + max_count = profile_count::max_prefer_initialized (max_count, b->count); for (edge e: b->preds) if (!component[e->src->index]) diff --git a/gcc/bb-reorder.cc b/gcc/bb-reorder.cc index 641b4928ffba..e4efdee0b16e 100644 --- a/gcc/bb-reorder.cc +++ b/gcc/bb-reorder.cc @@ -2389,8 +2389,10 @@ edge_order (const void *ve1, const void *ve2) /* Since profile_count::operator< does not establish a strict weak order in presence of uninitialized counts, use 'max': this makes them appear as if having execution frequency less than any initialized count. */ - profile_count m = c1.max (c2); - return (m == c2) - (m == c1); + gcov_type gc1 = c1.initialized_p () ? c1.to_gcov_type () : 0; + gcov_type gc2 = c2.initialized_p () ? c2.to_gcov_type () : 0; + gcov_type m = MAX (gc1, gc2); + return (m == gc1) - (m == gc2); } /* Reorder basic blocks using the "simple" algorithm. This tries to diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc index 25bc5d4b2732..01169e22fbb9 100644 --- a/gcc/cfghooks.cc +++ b/gcc/cfghooks.cc @@ -824,7 +824,7 @@ merge_blocks (basic_block a, basic_block b) else if (a->count.quality () < b->count.quality ()) a->count = b->count; else if (a->count.quality () == b->count.quality ()) - a->count = a->count.max (b->count); + a->count = profile_count::max_prefer_initialized (a->count, b->count); cfg_hooks->merge_blocks (a, b); diff --git a/gcc/ipa-fnsummary.cc b/gcc/ipa-fnsummary.cc index dd41de98ff1a..28f79aad29e9 100644 --- a/gcc/ipa-fnsummary.cc +++ b/gcc/ipa-fnsummary.cc @@ -2356,7 +2356,8 @@ param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i) /* Lookup the most frequent update of the value and believe that it dominates all the other; precise analysis here is difficult. */ EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) - max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count); + max = profile_count::max_prefer_initialized + (max, BASIC_BLOCK_FOR_FN (cfun, index)->count); if (dump_file) { fprintf (dump_file, " Set with count "); diff --git a/gcc/ipa-inline-transform.cc b/gcc/ipa-inline-transform.cc index a20485496654..99969aae1891 100644 --- a/gcc/ipa-inline-transform.cc +++ b/gcc/ipa-inline-transform.cc @@ -833,7 +833,8 @@ inline_transform (struct cgraph_node *node) FOR_ALL_BB_FN (bb, cfun) { bb->count = bb->count.apply_scale (num, den); - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + cfun->cfg->count_max = profile_count::max_prefer_initialized + (cfun->cfg->count_max, bb->count); } ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; } diff --git a/gcc/predict.cc b/gcc/predict.cc index 895c5f959d02..d937cc699b07 100644 --- a/gcc/predict.cc +++ b/gcc/predict.cc @@ -3849,7 +3849,7 @@ update_max_bb_count (void) basic_block bb; FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) - true_count_max = true_count_max.max (bb->count); + true_count_max = profile_count::max_prefer_initialized (true_count_max, bb->count); cfun->cfg->count_max = true_count_max; @@ -4162,7 +4162,9 @@ estimate_bb_frequencies () executed, then preserve this info. */ if (!(bb->count == profile_count::zero ())) bb->count = count.guessed_local ().combine_with_ipa_count (ipa_count); - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + cfun->cfg->count_max + = profile_count::max_prefer_initialized (cfun->cfg->count_max, + bb->count); } free_aux_for_blocks (); @@ -4473,7 +4475,9 @@ rebuild_frequencies (void) cfun->cfg->count_max = profile_count::uninitialized (); FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, next_bb) { - cfun->cfg->count_max = cfun->cfg->count_max.max (bb->count); + cfun->cfg->count_max + = profile_count::max_prefer_initialized (cfun->cfg->count_max, + bb->count); if (bb->count.nonzero_p () && bb->count.quality () >= AFDO) feedback_found = true; /* Uninitialized count may be result of inlining or an omision in an diff --git a/gcc/profile-count.h b/gcc/profile-count.h index 89746c6749fd..85e601e5001f 100644 --- a/gcc/profile-count.h +++ b/gcc/profile-count.h @@ -1109,30 +1109,23 @@ public: /* Make counter forcibly nonzero. */ profile_count force_nonzero () const; - profile_count max (profile_count other) const - { - profile_count val = *this; - /* Always prefer nonzero IPA counts over local counts. */ - if (ipa ().nonzero_p () || other.ipa ().nonzero_p ()) - { - val = ipa (); - other = other.ipa (); - } - if (!initialized_p ()) - return other; - if (!other.initialized_p ()) - return *this; - if (*this == zero ()) - return other; - if (other == zero ()) - return *this; - gcc_checking_assert (compatible_p (other)); - if (val.m_val < other.m_val || (m_val == other.m_val - && val.m_quality < other.m_quality)) - return other; - return *this; - } + /* Return maximum of A and B. If one of values is uninitialized return the + other. */ + + static profile_count + max_prefer_initialized (const profile_count a, const profile_count b) + { + if (!a.initialized_p ()) + return b; + if (!b.initialized_p ()) + return a; + profile_count ret; + gcc_checking_assert (a.compatible_p (b)); + ret.m_val = MAX (a.m_val, b.m_val); + ret.m_quality = MIN (a.m_quality, b.m_quality); + return ret; + } /* PROB is a probability in scale 0...REG_BR_PROB_BASE. Scale counter accordingly. */ diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc index c5ca4ff89452..dc0cb245754c 100644 --- a/gcc/tree-ssa-loop-unswitch.cc +++ b/gcc/tree-ssa-loop-unswitch.cc @@ -136,7 +136,8 @@ struct unswitch_predicate tree rhs = gimple_cond_rhs (stmt); enum tree_code code = gimple_cond_code (stmt); condition = build2 (code, boolean_type_node, lhs, rhs); - count = EDGE_SUCC (bb, 0)->count ().max (EDGE_SUCC (bb, 1)->count ()); + count = profile_count::max_prefer_initialized (EDGE_SUCC (bb, 0)->count (), + EDGE_SUCC (bb, 1)->count ()); if (irange::supports_p (TREE_TYPE (lhs))) { auto range_op = range_op_handler (code);
