On Thu, Oct 12, 2023 at 10:15 AM Richard Sandiford <richard.sandif...@arm.com> wrote: > > Richard Biener <richard.guent...@gmail.com> writes: > > On Tue, Aug 22, 2023 at 12:42 PM Szabolcs Nagy via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > >> > >> From: Richard Sandiford <richard.sandif...@arm.com> > >> > >> The prologue/epilogue pass allows the prologue sequence > >> to contain jumps. The sequence is then partitioned into > >> basic blocks using find_many_sub_basic_blocks. > >> > >> This patch treats epilogues in the same way. It's needed for > >> a follow-on aarch64 patch that adds conditional code to both > >> the prologue and the epilogue. > >> > >> Tested on aarch64-linux-gnu (including with a follow-on patch) > >> and x86_64-linux-gnu. OK to install? > >> > >> Richard > >> > >> gcc/ > >> * function.cc (thread_prologue_and_epilogue_insns): Handle > >> epilogues that contain jumps. > >> --- > >> > >> This is a previously approved patch that was not committed > >> because it was not needed at the time, but i'd like to commit > >> it as it is needed for the followup aarch64 eh_return changes: > >> > >> https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605769.html > >> > >> --- > >> gcc/function.cc | 10 ++++++++++ > >> 1 file changed, 10 insertions(+) > >> > >> diff --git a/gcc/function.cc b/gcc/function.cc > >> index dd2c1136e07..70d1cd65303 100644 > >> --- a/gcc/function.cc > >> +++ b/gcc/function.cc > >> @@ -6120,6 +6120,11 @@ thread_prologue_and_epilogue_insns (void) > >> && returnjump_p (BB_END (e->src))) > >> e->flags &= ~EDGE_FALLTHRU; > >> } > >> + > >> + auto_sbitmap blocks (last_basic_block_for_fn (cfun)); > >> + bitmap_clear (blocks); > >> + bitmap_set_bit (blocks, BLOCK_FOR_INSN (epilogue_seq)->index); > >> + find_many_sub_basic_blocks (blocks); > >> } > >> else if (next_active_insn (BB_END (exit_fallthru_edge->src))) > >> { > >> @@ -6218,6 +6223,11 @@ thread_prologue_and_epilogue_insns (void) > >> set_insn_locations (seq, epilogue_location); > >> > >> emit_insn_before (seq, insn); > >> + > >> + auto_sbitmap blocks (last_basic_block_for_fn (cfun)); > >> + bitmap_clear (blocks); > >> + bitmap_set_bit (blocks, BLOCK_FOR_INSN (insn)->index); > >> + find_many_sub_basic_blocks (blocks); > > > > I'll note that clearing a full sbitmap to pass down a single basic block > > to find_many_sub_basic_blocks is a quite expensive operation. May I suggest > > to add an overload operating on a single basic block? It's only > > > > FOR_EACH_BB_FN (bb, cfun) > > SET_STATE (bb, > > bitmap_bit_p (blocks, bb->index) ? BLOCK_TO_SPLIT : > > BLOCK_ORIGINAL); > > > > using the bitmap, so factoring the rest of the function and customizing this > > walk would do the trick. Note that the whole function could be refactored > > to > > handle single blocks more efficiently. > > Sorry for the late reply, but does this look OK? Tested on > aarch64-linux-gnu and x86_64-linux-gnu.
LGTM, not sure if I'm qualified enough to approve though (I think you are more qualified here, so ..) Thanks, Richard. > Thanks, > Richard > > --- > > The prologue/epilogue pass allows the prologue sequence to contain > jumps. The sequence is then partitioned into basic blocks using > find_many_sub_basic_blocks. > > This patch treats epilogues in a similar way. Since only one block > might need to be split, the patch (re)introduces a find_sub_basic_blocks > routine to handle a single block. > > The new routine hard-codes the assumption that split_block will chain > the new block immediately after the original block. The routine doesn't > try to replicate the fix for PR81030, since that was specific to > gimple->rtl expansion. > > The patch is needed for follow-on aarch64 patches that add conditional > code to the epilogue. The tests are part of those patches. > > gcc/ > * cfgbuild.h (find_sub_basic_blocks): Declare. > * cfgbuild.cc (update_profile_for_new_sub_basic_block): New function, > split out from... > (find_many_sub_basic_blocks): ...here. > (find_sub_basic_blocks): New function. > * function.cc (thread_prologue_and_epilogue_insns): Handle > epilogues that contain jumps. > --- > gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++---------------- > gcc/cfgbuild.h | 1 + > gcc/function.cc | 4 +++ > 3 files changed, 70 insertions(+), 30 deletions(-) > > diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc > index 15ed4deb5f7..9a6b34fb4b1 100644 > --- a/gcc/cfgbuild.cc > +++ b/gcc/cfgbuild.cc > @@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b) > } > } > > +/* Update the profile information for BB, which was created by splitting > + an RTL block that had a non-final jump. */ > + > +static void > +update_profile_for_new_sub_basic_block (basic_block bb) > +{ > + edge e; > + edge_iterator ei; > + > + bool initialized_src = false, uninitialized_src = false; > + bb->count = profile_count::zero (); > + FOR_EACH_EDGE (e, ei, bb->preds) > + { > + if (e->count ().initialized_p ()) > + { > + bb->count += e->count (); > + initialized_src = true; > + } > + else > + uninitialized_src = true; > + } > + /* When some edges are missing with read profile, this is > + most likely because RTL expansion introduced loop. > + When profile is guessed we may have BB that is reachable > + from unlikely path as well as from normal path. > + > + TODO: We should handle loops created during BB expansion > + correctly here. For now we assume all those loop to cycle > + precisely once. */ > + if (!initialized_src > + || (uninitialized_src > + && profile_status_for_fn (cfun) < PROFILE_GUESSED)) > + bb->count = profile_count::uninitialized (); > + > + compute_outgoing_frequencies (bb); > +} > + > /* Assume that some pass has inserted labels or control flow > instructions within a basic block. Split basic blocks as needed > and create edges. */ > @@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks) > if (profile_status_for_fn (cfun) != PROFILE_ABSENT) > FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb) > { > - edge e; > - edge_iterator ei; > - > if (STATE (bb) == BLOCK_ORIGINAL) > continue; > if (STATE (bb) == BLOCK_NEW) > { > - bool initialized_src = false, uninitialized_src = false; > - bb->count = profile_count::zero (); > - FOR_EACH_EDGE (e, ei, bb->preds) > - { > - if (e->count ().initialized_p ()) > - { > - bb->count += e->count (); > - initialized_src = true; > - } > - else > - uninitialized_src = true; > - } > - /* When some edges are missing with read profile, this is > - most likely because RTL expansion introduced loop. > - When profile is guessed we may have BB that is reachable > - from unlikely path as well as from normal path. > - > - TODO: We should handle loops created during BB expansion > - correctly here. For now we assume all those loop to cycle > - precisely once. */ > - if (!initialized_src > - || (uninitialized_src > - && profile_status_for_fn (cfun) < PROFILE_GUESSED)) > - bb->count = profile_count::uninitialized (); > + update_profile_for_new_sub_basic_block (bb); > + continue; > } > - /* If nothing changed, there is no need to create new BBs. */ > - else if (EDGE_COUNT (bb->succs) == n_succs[bb->index]) > + /* If nothing changed, there is no need to create new BBs. */ > + if (EDGE_COUNT (bb->succs) == n_succs[bb->index]) > { > /* In rare occassions RTL expansion might have mistakely assigned > a probabilities different from what is in CFG. This happens > @@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks) > update_br_prob_note (bb); > continue; > } > - > compute_outgoing_frequencies (bb); > } > > FOR_EACH_BB_FN (bb, cfun) > SET_STATE (bb, 0); > } > + > +/* Like find_many_sub_basic_blocks, but look only within BB. */ > + > +void > +find_sub_basic_blocks (basic_block bb) > +{ > + basic_block end_bb = bb->next_bb; > + find_bb_boundaries (bb); > + if (bb->next_bb == end_bb) > + return; > + > + /* Re-scan and wire in all edges. This expects simple (conditional) > + jumps at the end of each new basic blocks. */ > + make_edges (bb, end_bb->prev_bb, 1); > + > + /* Update branch probabilities. Expect only (un)conditional jumps > + to be created with only the forward edges. */ > + if (profile_status_for_fn (cfun) != PROFILE_ABSENT) > + { > + compute_outgoing_frequencies (bb); > + for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb) > + update_profile_for_new_sub_basic_block (bb); > + } > +} > diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h > index 51d3eccb1ae..4191fb3fcba 100644 > --- a/gcc/cfgbuild.h > +++ b/gcc/cfgbuild.h > @@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *); > extern bool control_flow_insn_p (const rtx_insn *); > extern void rtl_make_eh_edge (sbitmap, basic_block, rtx); > extern void find_many_sub_basic_blocks (sbitmap); > +extern void find_sub_basic_blocks (basic_block); > > #endif /* GCC_CFGBUILD_H */ > diff --git a/gcc/function.cc b/gcc/function.cc > index 336af28fb22..afb0b33da9e 100644 > --- a/gcc/function.cc > +++ b/gcc/function.cc > @@ -6112,6 +6112,8 @@ thread_prologue_and_epilogue_insns (void) > && returnjump_p (BB_END (e->src))) > e->flags &= ~EDGE_FALLTHRU; > } > + > + find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq)); > } > else if (next_active_insn (BB_END (exit_fallthru_edge->src))) > { > @@ -6210,6 +6212,8 @@ thread_prologue_and_epilogue_insns (void) > set_insn_locations (seq, epilogue_location); > > emit_insn_before (seq, insn); > + > + find_sub_basic_blocks (BLOCK_FOR_INSN (insn)); > } > } > > -- > 2.25.1 >