On 3 May 2016 at 08:59, Segher Boessenkool <seg...@kernel.crashing.org> wrote: > Now that cfgcleanup knows how to optimize with return statements, the > epilogue insertion code doesn't have to deal with it itself anymore. > > > 2016-05-03 Segher Boessenkool <seg...@kernel.crashing.org> > > * function.c (emit_use_return_register_into_block): Delete. > (gen_return_pattern): Delete. > (emit_return_into_block): Delete. > (active_insn_between): Delete. > (convert_jumps_to_returns): Delete. > (emit_return_for_exit): Delete. > (thread_prologue_and_epilogue_insns): Delete all code dealing with > simple_return for shrink-wrapped blocks. > * shrink-wrap.c (try_shrink_wrapping): Insert simple_return at the > end of blocks that need one. > (get_unconverted_simple_return): Delete. > (convert_to_simple_return): Delete. > * shrink-wrap.c (get_unconverted_simple_return): Delete declaration. > (convert_to_simple_return): Ditto. >
Hi, After this patch, I've noticed that gcc.target/arm/pr43920-2.c now fails at: /* { dg-final { scan-assembler-times "pop" 2 } } */ Before the patch, the generated code was: [...] pop {r3, r4, r5, r6, r7, pc} .L4: mov r0, #-1 .L1: pop {r3, r4, r5, r6, r7, pc} it is now: [...] .L1: pop {r3, r4, r5, r6, r7, pc} .L4: mov r0, #-1 b .L1 The new version does not seem better, as it adds a branch on the path and it is not smaller. Christophe. > --- > gcc/function.c | 213 > +----------------------------------------------------- > gcc/shrink-wrap.c | 171 ++++--------------------------------------- > gcc/shrink-wrap.h | 6 -- > 3 files changed, 16 insertions(+), 374 deletions(-) > > diff --git a/gcc/function.c b/gcc/function.c > index f6eb56c..b9a6338 100644 > --- a/gcc/function.c > +++ b/gcc/function.c > @@ -5753,49 +5753,6 @@ prologue_epilogue_contains (const_rtx insn) > return 0; > } > > -/* Insert use of return register before the end of BB. */ > - > -static void > -emit_use_return_register_into_block (basic_block bb) > -{ > - start_sequence (); > - use_return_register (); > - rtx_insn *seq = get_insns (); > - end_sequence (); > - rtx_insn *insn = BB_END (bb); > - if (HAVE_cc0 && reg_mentioned_p (cc0_rtx, PATTERN (insn))) > - insn = prev_cc0_setter (insn); > - > - emit_insn_before (seq, insn); > -} > - > - > -/* Create a return pattern, either simple_return or return, depending on > - simple_p. */ > - > -static rtx_insn * > -gen_return_pattern (bool simple_p) > -{ > - return (simple_p > - ? targetm.gen_simple_return () > - : targetm.gen_return ()); > -} > - > -/* Insert an appropriate return pattern at the end of block BB. This > - also means updating block_for_insn appropriately. SIMPLE_P is > - the same as in gen_return_pattern and passed to it. */ > - > -void > -emit_return_into_block (bool simple_p, basic_block bb) > -{ > - rtx_jump_insn *jump = emit_jump_insn_after (gen_return_pattern (simple_p), > - BB_END (bb)); > - rtx pat = PATTERN (jump); > - if (GET_CODE (pat) == PARALLEL) > - pat = XVECEXP (pat, 0, 0); > - gcc_assert (ANY_RETURN_P (pat)); > - JUMP_LABEL (jump) = pat; > -} > > /* Set JUMP_LABEL for a return insn. */ > > @@ -5811,135 +5768,6 @@ set_return_jump_label (rtx_insn *returnjump) > JUMP_LABEL (returnjump) = ret_rtx; > } > > -/* Return true if there are any active insns between HEAD and TAIL. */ > -bool > -active_insn_between (rtx_insn *head, rtx_insn *tail) > -{ > - while (tail) > - { > - if (active_insn_p (tail)) > - return true; > - if (tail == head) > - return false; > - tail = PREV_INSN (tail); > - } > - return false; > -} > - > -/* LAST_BB is a block that exits, and empty of active instructions. > - Examine its predecessors for jumps that can be converted to > - (conditional) returns. */ > -vec<edge> > -convert_jumps_to_returns (basic_block last_bb, bool simple_p, > - vec<edge> unconverted ATTRIBUTE_UNUSED) > -{ > - int i; > - basic_block bb; > - edge_iterator ei; > - edge e; > - auto_vec<basic_block> src_bbs (EDGE_COUNT (last_bb->preds)); > - > - FOR_EACH_EDGE (e, ei, last_bb->preds) > - if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)) > - src_bbs.quick_push (e->src); > - > - rtx_insn *label = BB_HEAD (last_bb); > - > - FOR_EACH_VEC_ELT (src_bbs, i, bb) > - { > - rtx_insn *jump = BB_END (bb); > - > - if (!JUMP_P (jump) || JUMP_LABEL (jump) != label) > - continue; > - > - e = find_edge (bb, last_bb); > - > - /* If we have an unconditional jump, we can replace that > - with a simple return instruction. */ > - if (simplejump_p (jump)) > - { > - /* The use of the return register might be present in the exit > - fallthru block. Either: > - - removing the use is safe, and we should remove the use in > - the exit fallthru block, or > - - removing the use is not safe, and we should add it here. > - For now, we conservatively choose the latter. Either of the > - 2 helps in crossjumping. */ > - emit_use_return_register_into_block (bb); > - > - emit_return_into_block (simple_p, bb); > - delete_insn (jump); > - } > - > - /* If we have a conditional jump branching to the last > - block, we can try to replace that with a conditional > - return instruction. */ > - else if (condjump_p (jump)) > - { > - rtx dest; > - > - if (simple_p) > - dest = simple_return_rtx; > - else > - dest = ret_rtx; > - if (!redirect_jump (as_a <rtx_jump_insn *> (jump), dest, 0)) > - { > - if (targetm.have_simple_return () && simple_p) > - { > - if (dump_file) > - fprintf (dump_file, > - "Failed to redirect bb %d branch.\n", bb->index); > - unconverted.safe_push (e); > - } > - continue; > - } > - > - /* See comment in simplejump_p case above. */ > - emit_use_return_register_into_block (bb); > - > - /* If this block has only one successor, it both jumps > - and falls through to the fallthru block, so we can't > - delete the edge. */ > - if (single_succ_p (bb)) > - continue; > - } > - else > - { > - if (targetm.have_simple_return () && simple_p) > - { > - if (dump_file) > - fprintf (dump_file, > - "Failed to redirect bb %d branch.\n", bb->index); > - unconverted.safe_push (e); > - } > - continue; > - } > - > - /* Fix up the CFG for the successful change we just made. */ > - redirect_edge_succ (e, EXIT_BLOCK_PTR_FOR_FN (cfun)); > - e->flags &= ~EDGE_CROSSING; > - } > - src_bbs.release (); > - return unconverted; > -} > - > -/* Emit a return insn for the exit fallthru block. */ > -basic_block > -emit_return_for_exit (edge exit_fallthru_edge, bool simple_p) > -{ > - basic_block last_bb = exit_fallthru_edge->src; > - > - if (JUMP_P (BB_END (last_bb))) > - { > - last_bb = split_edge (exit_fallthru_edge); > - exit_fallthru_edge = single_succ_edge (last_bb); > - } > - emit_barrier_after (BB_END (last_bb)); > - emit_return_into_block (simple_p, last_bb); > - exit_fallthru_edge->flags &= ~EDGE_FALLTHRU; > - return last_bb; > -} > - > > /* Generate the prologue and epilogue RTL if the machine supports it. Thread > this into place with notes indicating where the prologue ends and where > @@ -5993,7 +5821,6 @@ void > thread_prologue_and_epilogue_insns (void) > { > bool inserted; > - vec<edge> unconverted_simple_returns = vNULL; > bitmap_head bb_flags; > rtx_insn *returnjump; > rtx_insn *epilogue_end ATTRIBUTE_UNUSED; > @@ -6088,40 +5915,8 @@ thread_prologue_and_epilogue_insns (void) > > exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR_FOR_FN > (cfun)->preds); > > - if (targetm.have_simple_return () && entry_edge != orig_entry_edge) > - exit_fallthru_edge > - = get_unconverted_simple_return (exit_fallthru_edge, bb_flags, > - &unconverted_simple_returns, > - &returnjump); > - if (targetm.have_return ()) > - { > - if (exit_fallthru_edge == NULL) > - goto epilogue_done; > - > - if (optimize) > - { > - basic_block last_bb = exit_fallthru_edge->src; > - > - if (LABEL_P (BB_HEAD (last_bb)) > - && !active_insn_between (BB_HEAD (last_bb), BB_END (last_bb))) > - convert_jumps_to_returns (last_bb, false, vNULL); > - > - if (EDGE_COUNT (last_bb->preds) != 0 > - && single_succ_p (last_bb)) > - { > - last_bb = emit_return_for_exit (exit_fallthru_edge, false); > - epilogue_end = returnjump = BB_END (last_bb); > - > - /* Emitting the return may add a basic block. > - Fix bb_flags for the added block. */ > - if (targetm.have_simple_return () > - && last_bb != exit_fallthru_edge->src) > - bitmap_set_bit (&bb_flags, last_bb->index); > - > - goto epilogue_done; > - } > - } > - } > + if (targetm.have_return () && exit_fallthru_edge == NULL) > + goto epilogue_done; > > /* A small fib -- epilogue is not yet completed, but we wish to re-use > this marker for the splits of EH_RETURN patterns, and nothing else > @@ -6229,10 +6024,6 @@ epilogue_done: > } > } > > - if (targetm.have_simple_return ()) > - convert_to_simple_return (entry_edge, orig_entry_edge, bb_flags, > - returnjump, unconverted_simple_returns); > - > /* Emit sibling epilogues before any sibling call sites. */ > for (ei = ei_start (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); (e = > ei_safe_edge > (ei)); > diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c > index fe79519..0ba1fed 100644 > --- a/gcc/shrink-wrap.c > +++ b/gcc/shrink-wrap.c > @@ -946,10 +946,9 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head > *bb_with, > redirect_edge_and_branch_force (e, (basic_block) e->dest->aux); > } > > - /* Change all the exits that should get a simple_return to FAKE. > - They will be converted later. */ > + /* Make a simple_return for those exits that run without prologue. */ > > - FOR_EACH_BB_FN (bb, cfun) > + FOR_EACH_BB_REVERSE_FN (bb, cfun) > if (!bitmap_bit_p (bb_with, bb->index)) > FOR_EACH_EDGE (e, ei, bb->succs) > if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) > @@ -958,7 +957,18 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head > *bb_with, > > e->flags &= ~EDGE_FALLTHRU; > if (!(e->flags & EDGE_SIBCALL)) > - e->flags |= EDGE_FAKE; > + { > + rtx_insn *ret = targetm.gen_simple_return (); > + rtx_insn *end = BB_END (e->src); > + rtx_jump_insn *start = emit_jump_insn_after (ret, end); > + JUMP_LABEL (start) = simple_return_rtx; > + e->flags &= ~EDGE_FAKE; > + > + if (dump_file) > + fprintf (dump_file, > + "Made simple_return with UID %d in bb %d\n", > + INSN_UID (start), e->src->index); > + } > > emit_barrier_after_bb (e->src); > } > @@ -996,156 +1006,3 @@ try_shrink_wrapping (edge *entry_edge, bitmap_head > *bb_with, > > free_dominance_info (CDI_DOMINATORS); > } > - > -/* If we're allowed to generate a simple return instruction, then by > - definition we don't need a full epilogue. If the last basic > - block before the exit block does not contain active instructions, > - examine its predecessors and try to emit (conditional) return > - instructions. */ > - > -edge > -get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags, > - vec<edge> *unconverted_simple_returns, > - rtx_insn **returnjump) > -{ > - if (optimize) > - { > - unsigned i, last; > - > - /* convert_jumps_to_returns may add to preds of the exit block > - (but won't remove). Stop at end of current preds. */ > - last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); > - for (i = 0; i < last; i++) > - { > - edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i); > - if (LABEL_P (BB_HEAD (e->src)) > - && !bitmap_bit_p (&bb_flags, e->src->index) > - && !active_insn_between (BB_HEAD (e->src), BB_END (e->src))) > - *unconverted_simple_returns > - = convert_jumps_to_returns (e->src, true, > - *unconverted_simple_returns); > - } > - } > - > - if (exit_fallthru_edge != NULL > - && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0 > - && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index)) > - { > - basic_block last_bb; > - > - last_bb = emit_return_for_exit (exit_fallthru_edge, true); > - *returnjump = BB_END (last_bb); > - exit_fallthru_edge = NULL; > - } > - return exit_fallthru_edge; > -} > - > -/* If there were branches to an empty LAST_BB which we tried to > - convert to conditional simple_returns, but couldn't for some > - reason, create a block to hold a simple_return insn and redirect > - those remaining edges. */ > - > -void > -convert_to_simple_return (edge entry_edge, edge orig_entry_edge, > - bitmap_head bb_flags, rtx_insn *returnjump, > - vec<edge> unconverted_simple_returns) > -{ > - edge e; > - edge_iterator ei; > - > - if (!unconverted_simple_returns.is_empty ()) > - { > - basic_block simple_return_block_hot = NULL; > - basic_block simple_return_block_cold = NULL; > - edge pending_edge_hot = NULL; > - edge pending_edge_cold = NULL; > - basic_block exit_pred; > - int i; > - > - gcc_assert (entry_edge != orig_entry_edge); > - > - /* See if we can reuse the last insn that was emitted for the > - epilogue. */ > - if (returnjump != NULL_RTX > - && JUMP_LABEL (returnjump) == simple_return_rtx) > - { > - e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN > (returnjump)); > - if (BB_PARTITION (e->src) == BB_HOT_PARTITION) > - simple_return_block_hot = e->dest; > - else > - simple_return_block_cold = e->dest; > - } > - > - /* Also check returns we might need to add to tail blocks. */ > - FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > - if (EDGE_COUNT (e->src->preds) != 0 > - && (e->flags & EDGE_FAKE) != 0 > - && !bitmap_bit_p (&bb_flags, e->src->index)) > - { > - if (BB_PARTITION (e->src) == BB_HOT_PARTITION) > - pending_edge_hot = e; > - else > - pending_edge_cold = e; > - } > - > - /* Save a pointer to the exit's predecessor BB for use in > - inserting new BBs at the end of the function. Do this > - after the call to split_block above which may split > - the original exit pred. */ > - exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; > - > - FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e) > - { > - basic_block *pdest_bb; > - edge pending; > - > - if (BB_PARTITION (e->src) == BB_HOT_PARTITION) > - { > - pdest_bb = &simple_return_block_hot; > - pending = pending_edge_hot; > - } > - else > - { > - pdest_bb = &simple_return_block_cold; > - pending = pending_edge_cold; > - } > - > - if (*pdest_bb == NULL && pending != NULL) > - { > - emit_return_into_block (true, pending->src); > - pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); > - *pdest_bb = pending->src; > - } > - else if (*pdest_bb == NULL) > - { > - basic_block bb; > - > - bb = create_basic_block (NULL, NULL, exit_pred); > - BB_COPY_PARTITION (bb, e->src); > - rtx_insn *ret = targetm.gen_simple_return (); > - rtx_jump_insn *start = emit_jump_insn_after (ret, BB_END (bb)); > - JUMP_LABEL (start) = simple_return_rtx; > - emit_barrier_after (start); > - > - *pdest_bb = bb; > - make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); > - } > - redirect_edge_and_branch_force (e, *pdest_bb); > - } > - unconverted_simple_returns.release (); > - } > - > - if (entry_edge != orig_entry_edge) > - { > - FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > - if (EDGE_COUNT (e->src->preds) != 0 > - && (e->flags & EDGE_FAKE) != 0 > - && !bitmap_bit_p (&bb_flags, e->src->index)) > - { > - e = fix_fake_fallthrough_edge (e); > - > - emit_return_into_block (true, e->src); > - e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); > - } > - } > -} > diff --git a/gcc/shrink-wrap.h b/gcc/shrink-wrap.h > index 6e7a4f7..4d821d7 100644 > --- a/gcc/shrink-wrap.h > +++ b/gcc/shrink-wrap.h > @@ -26,12 +26,6 @@ along with GCC; see the file COPYING3. If not see > extern bool requires_stack_frame_p (rtx_insn *, HARD_REG_SET, HARD_REG_SET); > extern void try_shrink_wrapping (edge *entry_edge, bitmap_head *bb_flags, > rtx_insn *prologue_seq); > -extern edge get_unconverted_simple_return (edge, bitmap_head, > - vec<edge> *, rtx_insn **); > -extern void convert_to_simple_return (edge entry_edge, edge orig_entry_edge, > - bitmap_head bb_flags, > - rtx_insn *returnjump, > - vec<edge> unconverted_simple_returns); > #define SHRINK_WRAPPING_ENABLED \ > (flag_shrink_wrap && targetm.have_simple_return ()) > > -- > 1.9.3 >