Hi, this patch fixes another issue where basic block is incorrectly marked as unlikely which is caught by Martin's hack to bb-reorder to insert trap to all blocks in cold partition.
The problem solved here is that I have missed logic to set probabilities to adjusted when doing basic arithmetic on them. While looking into this I have also noticed that there is remaining FIXME in cfgcleanup and because combine_with_freq was also wrong, I merged the RTL and tree tailmerging logic. Finally I have noticed that we do not put into cold section functions which have local guessed profile but globally they are known to be executed 0 times. This is the case of all functions not executed in train run wiht profile feedback that definitly are supposed to land in cold section. This is fixed in probably_never_executed predicate. Bootstrapped/regtested x86_64-linux, comitted. Honza * cfgcleanup.c (try_crossjump_to_edge): Use combine_with_count to merge probabilities. * predict.c (probably_never_executed): Also mark as cold functions with global 0 profile and guessed local profile. * profile-count.c (profile_probability::combine_with_count): New member function. * profile-count.h (profile_probability::operator*, profile_probability::operator*=, profile_probability::operator/, profile_probability::operator/=): Reduce precision to adjusted and set value to guessed on contradictory divisions. (profile_probability::combine_with_freq): Remove. (profile_probability::combine_wiht_count): Declare. (profile_count::force_nonzero):: Set to adjusted. (profile_count::probability_in):: Set quality to adjusted. * tree-ssa-tail-merge.c (replace_block_by): Use combine_with_count. Index: cfgcleanup.c =================================================================== --- cfgcleanup.c (revision 256987) +++ cfgcleanup.c (working copy) @@ -2130,11 +2130,9 @@ try_crossjump_to_edge (int mode, edge e1 if (FORWARDER_BLOCK_P (s2->dest)) s2->dest->count -= s->count (); - /* FIXME: Is this correct? Should be rewritten to count API. */ - if (redirect_edges_to->count.nonzero_p () && src1->count.nonzero_p ()) - s->probability = s->probability.combine_with_freq - (redirect_edges_to->count.to_frequency (cfun), - s2->probability, src1->count.to_frequency (cfun)); + s->probability = s->probability.combine_with_count + (redirect_edges_to->count, + s2->probability, src1->count); } /* Adjust count for the block. An earlier jump Index: predict.c =================================================================== --- predict.c (revision 256987) +++ predict.c (working copy) @@ -210,7 +210,7 @@ probably_never_executed (struct function profile_count count) { gcc_checking_assert (fun); - if (count == profile_count::zero ()) + if (count.ipa () == profile_count::zero ()) return true; /* Do not trust adjusted counts. This will make us to drop int cold section code with low execution count as a result of inlining. These low counts Index: profile-count.c =================================================================== --- profile-count.c (revision 256987) +++ profile-count.c (working copy) @@ -345,3 +345,29 @@ profile_count::from_gcov_type (gcov_type return ret; } + +/* COUNT1 times event happens with *THIS probability, COUNT2 times OTHER + happens with COUNT2 probablity. Return probablity that either *THIS or + OTHER happens. */ + +profile_probability +profile_probability::combine_with_count (profile_count count1, + profile_probability other, + profile_count count2) const +{ + /* If probabilities are same, we are done. + If counts are nonzero we can distribute accordingly. In remaining + cases just avreage the values and hope for the best. */ + if (*this == other || count1 == count2 + || (count2 == profile_count::zero () + && !(count1 == profile_count::zero ()))) + return *this; + if (count1 == profile_count::zero () && !(count2 == profile_count::zero ())) + return other; + else if (count1.nonzero_p () || count2.nonzero_p ()) + return *this * count1.probability_in (count1 + count2) + + other * count2.probability_in (count1 + count2); + else + return *this * profile_probability::even () + + other * profile_probability::even (); +} Index: profile-count.h =================================================================== --- profile-count.h (revision 256987) +++ profile-count.h (working copy) @@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. #define GCC_PROFILE_COUNT_H struct function; +class profile_count; /* Quality of the profile count. Because gengtype does not support enums inside of classes, this is in global namespace. */ @@ -350,7 +351,7 @@ public: return profile_probability::uninitialized (); profile_probability ret; ret.m_val = RDIV ((uint64_t)m_val * other.m_val, max_probability); - ret.m_quality = MIN (m_quality, other.m_quality); + ret.m_quality = MIN (MIN (m_quality, other.m_quality), profile_adjusted); return ret; } profile_probability &operator*= (const profile_probability &other) @@ -363,7 +364,7 @@ public: else { m_val = RDIV ((uint64_t)m_val * other.m_val, max_probability); - m_quality = MIN (m_quality, other.m_quality); + m_quality = MIN (MIN (m_quality, other.m_quality), profile_adjusted); } return *this; } @@ -374,8 +375,14 @@ public: if (!initialized_p () || !other.initialized_p ()) return profile_probability::uninitialized (); profile_probability ret; + /* If we get probability above 1, mark it as unreliable and return 1. */ if (m_val >= other.m_val) - ret.m_val = max_probability; + { + ret.m_val = max_probability; + ret.m_quality = MIN (MIN (m_quality, other.m_quality), + profile_guessed); + return ret; + } else if (!m_val) ret.m_val = 0; else @@ -385,7 +392,7 @@ public: other.m_val), max_probability); } - ret.m_quality = MIN (m_quality, other.m_quality); + ret.m_quality = MIN (MIN (m_quality, other.m_quality), profile_adjusted); return ret; } profile_probability &operator/= (const profile_probability &other) @@ -396,8 +403,15 @@ public: return *this = profile_probability::uninitialized (); else { + /* If we get probability above 1, mark it as unreliable + and return 1. */ if (m_val > other.m_val) - m_val = max_probability; + { + m_val = max_probability; + m_quality = MIN (MIN (m_quality, other.m_quality), + profile_guessed); + return *this; + } else if (!m_val) ; else @@ -407,7 +421,7 @@ public: other.m_val), max_probability); } - m_quality = MIN (m_quality, other.m_quality); + m_quality = MIN (MIN (m_quality, other.m_quality), profile_adjusted); } return *this; } @@ -465,27 +479,6 @@ public: return ret; } - profile_probability combine_with_freq (int freq1, profile_probability other, - int freq2) const - { - profile_probability ret; - - if (*this == profile_probability::uninitialized () - || other == profile_probability::uninitialized ()) - return profile_probability::uninitialized (); - - gcc_checking_assert (freq1 >= 0 && freq2 >= 0); - if (!freq1 && !freq2) - { - ret.m_val = (m_val + other.m_val) / 2; - } - else - ret.m_val = RDIV (m_val * (uint64_t) freq1 - + other.m_val * (uint64_t) freq2, freq1 + freq2); - ret.m_quality = MIN (m_quality, other.m_quality); - return ret; - } - /* Return *THIS * NUM / DEN. */ profile_probability apply_scale (int64_t num, int64_t den) const { @@ -569,6 +562,12 @@ public: bool differs_from_p (profile_probability other) const; /* Return if difference is greater than 50%. */ bool differs_lot_from_p (profile_probability other) const; + /* COUNT1 times event happens with *THIS probability, COUNT2 times OTHER + happens with COUNT2 probablity. Return probablity that either *THIS or + OTHER happens. */ + profile_probability combine_with_count (profile_count count1, + profile_probability other, + profile_count count2) const; /* LTO streaming support. */ static profile_probability stream_in (struct lto_input_block *); @@ -906,7 +905,10 @@ public: return *this; profile_count ret = *this; if (ret.m_val == 0) - ret.m_val = 1; + { + ret.m_val = 1; + ret.m_quality = MIN (m_quality, profile_adjusted); + } return ret; } @@ -1062,20 +1064,28 @@ public: OVERALL. */ profile_probability probability_in (const profile_count overall) const { - if (*this == profile_count::zero ()) + if (*this == profile_count::zero () + && !(overall == profile_count::zero ())) return profile_probability::never (); if (!initialized_p () || !overall.initialized_p () || !overall.m_val) return profile_probability::uninitialized (); + if (*this == overall && m_quality == profile_precise) + return profile_probability::always (); profile_probability ret; gcc_checking_assert (compatible_p (overall)); if (overall.m_val < m_val) - ret.m_val = profile_probability::max_probability; + { + ret.m_val = profile_probability::max_probability; + ret.m_quality = profile_guessed; + return ret; + } else ret.m_val = RDIV (m_val * profile_probability::max_probability, overall.m_val); - ret.m_quality = MAX (MIN (m_quality, overall.m_quality), profile_guessed); + ret.m_quality = MIN (MAX (MIN (m_quality, overall.m_quality), + profile_guessed), profile_adjusted); return ret; } Index: tree-ssa-tail-merge.c =================================================================== --- tree-ssa-tail-merge.c (revision 256987) +++ tree-ssa-tail-merge.c (working copy) @@ -1570,17 +1570,8 @@ replace_block_by (basic_block bb1, basic /* If probabilities are same, we are done. If counts are nonzero we can distribute accordingly. In remaining cases just avreage the values and hope for the best. */ - if (e1->probability == e2->probability) - ; - else if (bb1->count.nonzero_p () || bb2->count.nonzero_p ()) - e2->probability - = e2->probability - * bb2->count.probability_in (bb1->count + bb2->count) - + e1->probability - * bb1->count.probability_in (bb1->count + bb2->count); - else - e2->probability = e2->probability * profile_probability::even () - + e1->probability * profile_probability::even (); + e2->probability = e1->probability.combine_with_count + (bb1->count, e2->probability, bb2->count); } bb2->count += bb1->count;