[SH] Add simple_return pattern
This patch implements the simple_return pattern to enable -fshrink-wrap on SH. It also clean up some redundancies for expand_epilogue (called twice from the "return" and "epilogue" patterns and the sh_expand_prologue parameter type. No regressions with sh-superh-elf and sh4-linux gcc testsuites. Thanks Christian 2012-08-29 Christian Bruel * config/sh/sh-protos.h (sh_need_epilogue): Delete. * config/sh/sh.c (sh_need_epilogue): Delete. (sh_need_epilogue_known): Delete. (sh_output_function_epilogue): Remove sh_need_epilogue_known. * config/sh/sh.md (any_return): New iterator and optab. (simple_return): Define. (return): Check epilogue_completed. (epilogue): Use inline return rtl. (sh_expand_epilogue): Cleanup parameters boolean type. Index: gcc/config/sh/sh-protos.h === --- gcc/config/sh/sh-protos.h (revision 191129) +++ gcc/config/sh/sh-protos.h (working copy) @@ -117,7 +117,6 @@ extern int sh_media_register_for_return (void); extern void sh_expand_prologue (void); extern void sh_expand_epilogue (bool); -extern bool sh_need_epilogue (void); extern void sh_set_return_address (rtx, rtx); extern int initial_elimination_offset (int, int); extern bool fldi_ok (void); Index: gcc/config/sh/sh.c === --- gcc/config/sh/sh.c (revision 191129) +++ gcc/config/sh/sh.c (working copy) @@ -7901,22 +7901,6 @@ static int sh_need_epilogue_known = 0; -bool -sh_need_epilogue (void) -{ - if (! sh_need_epilogue_known) -{ - rtx epilogue; - - start_sequence (); - sh_expand_epilogue (0); - epilogue = get_insns (); - end_sequence (); - sh_need_epilogue_known = (epilogue == NULL ? -1 : 1); -} - return sh_need_epilogue_known > 0; -} - /* Emit code to change the current function's return address to RA. TEMP is available as a scratch register, if needed. */ @@ -7996,7 +7980,6 @@ sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT size ATTRIBUTE_UNUSED) { - sh_need_epilogue_known = 0; } static rtx Index: gcc/config/sh/sh.md === --- gcc/config/sh/sh.md (revision 191129) +++ gcc/config/sh/sh.md (working copy) @@ -177,6 +177,10 @@ (UNSPECV_EH_RETURN 12) ]) +(define_code_iterator any_return [return simple_return]) +(define_code_attr optab [(return "return") + (simple_return "simple_return")]) + ;; - ;; Attributes ;; - @@ -9280,7 +9284,7 @@ [(return)] "" { - sh_expand_epilogue (1); + sh_expand_epilogue (true); if (TARGET_SHCOMPACT) { rtx insn, set; @@ -10099,9 +10103,13 @@ } [(set_attr "type" "load_media")]) +(define_expand "simple_return" + [(simple_return)] + "") + (define_expand "return" - [(return)] - "reload_completed && ! sh_need_epilogue ()" + [(simple_return)] + "reload_completed && epilogue_completed" { if (TARGET_SHMEDIA) { @@ -10117,8 +10125,8 @@ } }) -(define_insn "*return_i" - [(return)] +(define_insn "*_i" + [(any_return)] "TARGET_SH1 && ! (TARGET_SHCOMPACT && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))) @@ -10244,19 +10252,12 @@ (define_expand "prologue" [(const_int 0)] "" -{ - sh_expand_prologue (); - DONE; -}) + "sh_expand_prologue (); DONE;") (define_expand "epilogue" [(return)] "" -{ - sh_expand_epilogue (0); - emit_jump_insn (gen_return ()); - DONE; -}) + "sh_expand_epilogue (false);") (define_expand "eh_return" [(use (match_operand 0 "register_operand" ""))]
Re: [SH] Add simple_return pattern
On Mon, 2012-09-10 at 15:51 +0200, Christian Bruel wrote: > This patch implements the simple_return pattern to enable -fshrink-wrap > on SH. It also clean up some redundancies for expand_epilogue (called > twice from the "return" and "epilogue" patterns and the > sh_expand_prologue parameter type. > > No regressions with sh-superh-elf and sh4-linux gcc testsuites. > > Thanks > > Christian > Regarding the iterators, maybe it's better to put them in config/sh/iterators.md. The optab code attr is not needed in this case, "" is sufficient. How about the attached patch instead? BTW, I'm now also testing the modified attached patch and your previous newlib related patch. Cheers, Oleg Index: gcc/config/sh/sh.md === --- gcc/config/sh/sh.md (revision 191161) +++ gcc/config/sh/sh.md (working copy) @@ -9335,7 +9335,7 @@ [(return)] "" { - sh_expand_epilogue (1); + sh_expand_epilogue (true); if (TARGET_SHCOMPACT) { rtx insn, set; @@ -10154,9 +10154,12 @@ } [(set_attr "type" "load_media")]) +(define_expand "simple_return" + [(simple_return)]) + (define_expand "return" - [(return)] - "reload_completed && ! sh_need_epilogue ()" + [(simple_return)] + "reload_completed && epilogue_completed" { if (TARGET_SHMEDIA) { @@ -10172,8 +10175,8 @@ } }) -(define_insn "*return_i" - [(return)] +(define_insn "*_i" + [(RETURN)] "TARGET_SH1 && ! (TARGET_SHCOMPACT && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))) @@ -10299,19 +10302,12 @@ (define_expand "prologue" [(const_int 0)] "" -{ - sh_expand_prologue (); - DONE; -}) + "sh_expand_prologue (); DONE;") (define_expand "epilogue" [(return)] "" -{ - sh_expand_epilogue (0); - emit_jump_insn (gen_return ()); - DONE; -}) + "sh_expand_epilogue (false);") (define_expand "eh_return" [(use (match_operand 0 "register_operand" ""))] Index: gcc/config/sh/iterators.md === --- gcc/config/sh/iterators.md (revision 191161) +++ gcc/config/sh/iterators.md (working copy) @@ -34,3 +34,5 @@ (define_mode_attr disp04 [(QI "K04") (HI "K05")]) (define_mode_attr disp12 [(QI "K12") (HI "K13")]) +;; Code iterator for return codes. +(define_code_iterator RETURN [return simple_return]) Index: gcc/config/sh/sh-protos.h === --- gcc/config/sh/sh-protos.h (revision 191161) +++ gcc/config/sh/sh-protos.h (working copy) @@ -117,7 +117,6 @@ extern int sh_media_register_for_return (void); extern void sh_expand_prologue (void); extern void sh_expand_epilogue (bool); -extern bool sh_need_epilogue (void); extern void sh_set_return_address (rtx, rtx); extern int initial_elimination_offset (int, int); extern bool fldi_ok (void); Index: gcc/config/sh/sh.c === --- gcc/config/sh/sh.c (revision 191161) +++ gcc/config/sh/sh.c (working copy) @@ -7901,22 +7901,6 @@ static int sh_need_epilogue_known = 0; -bool -sh_need_epilogue (void) -{ - if (! sh_need_epilogue_known) -{ - rtx epilogue; - - start_sequence (); - sh_expand_epilogue (0); - epilogue = get_insns (); - end_sequence (); - sh_need_epilogue_known = (epilogue == NULL ? -1 : 1); -} - return sh_need_epilogue_known > 0; -} - /* Emit code to change the current function's return address to RA. TEMP is available as a scratch register, if needed. */ @@ -7996,7 +7980,6 @@ sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT size ATTRIBUTE_UNUSED) { - sh_need_epilogue_known = 0; } static rtx
Re: [SH] Add simple_return pattern
Christian Bruel wrote: > This patch implements the simple_return pattern to enable -fshrink-wrap > on SH. It also clean up some redundancies for expand_epilogue (called > twice from the "return" and "epilogue" patterns and the > sh_expand_prologue parameter type. > > No regressions with sh-superh-elf and sh4-linux gcc testsuites. With the patch + revision 191106, I've got a new failure: FAIL: gcc.dg/tree-prof/bb-reorg.c compilation, -fprofile-use -D_PROFILE_USE (internal compiler error) for sh4-unknown-linux-gnu. My testsuite/gcc/gcc.log says /exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c -fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use -D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02 /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In function 'main': /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: error: EDGE_CROSSING missing across section boundary /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: internal compiler error: verify_flow_info failed Please submit a full bug report, Regards, kaz
Re: [SH] Add simple_return pattern
On 09/11/2012 12:28 AM, Oleg Endo wrote: > On Mon, 2012-09-10 at 15:51 +0200, Christian Bruel wrote: >> This patch implements the simple_return pattern to enable -fshrink-wrap >> on SH. It also clean up some redundancies for expand_epilogue (called >> twice from the "return" and "epilogue" patterns and the >> sh_expand_prologue parameter type. >> >> No regressions with sh-superh-elf and sh4-linux gcc testsuites. >> >> Thanks >> >> Christian >> > > Regarding the iterators, maybe it's better to put them in > config/sh/iterators.md. The optab code attr is not needed in this case, > "" is sufficient. How about the attached patch instead? yes, there is this new iterator.md file. I'm moving the iterator there. Will resent. Thanks Christian
Re: [SH] Add simple_return pattern
On 09/11/2012 03:05 AM, Kaz Kojima wrote: > Christian Bruel wrote: >> This patch implements the simple_return pattern to enable -fshrink-wrap >> on SH. It also clean up some redundancies for expand_epilogue (called >> twice from the "return" and "epilogue" patterns and the >> sh_expand_prologue parameter type. >> >> No regressions with sh-superh-elf and sh4-linux gcc testsuites. > > With the patch + revision 191106, I've got a new failure: > > FAIL: gcc.dg/tree-prof/bb-reorg.c compilation, -fprofile-use -D_PROFILE_USE > (internal compiler error) > > for sh4-unknown-linux-gnu. My testsuite/gcc/gcc.log says > > /exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c > -fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use > -D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02 > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In > function 'main': > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: > error: EDGE_CROSSING missing across section boundary > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: > internal compiler error: verify_flow_info failed > Please submit a full bug report, > > Regards, Ugh, indeed, I forgot a SPEC file that set the release mode on my SH-Linux distri, so verify_flow_info was not called :-(. I need to test again. thanks ! Christian > kaz >
Re: [SH] Add simple_return pattern
Hi Kaz, The failure turned out to be issues with the profile count and handling or region partitioning. So, I prefer to handle those separately, For now, I disable shrink-wrap when partitioning, even if the problem seems to have disappeared with the more constrained heuristics. This is probably latent also on other targets BTW. I added a sh_can_use_simple_return_p function that makes the heuristic refinements more convenient. For instance, measured that shrink-wrap is generally not good when optimizing for size because we might introduce new return instructions or split blocks to avoid the epilogue, that is still in the code somewhere anyway. Cycle-accurate benchmarks show a few very small improvements (there and there, about max 2%. accordingly, the prologue is rarely in the critical path...) but no regression. Manual assembly peering of CSiBE show that the transformation are decent. Checked with all assertions this time, Candidate for trunk. Many thanks Christian On 09/11/2012 03:05 AM, Kaz Kojima wrote: > Christian Bruel wrote: >> This patch implements the simple_return pattern to enable -fshrink-wrap >> on SH. It also clean up some redundancies for expand_epilogue (called >> twice from the "return" and "epilogue" patterns and the >> sh_expand_prologue parameter type. >> >> No regressions with sh-superh-elf and sh4-linux gcc testsuites. > > With the patch + revision 191106, I've got a new failure: > > FAIL: gcc.dg/tree-prof/bb-reorg.c compilation, -fprofile-use -D_PROFILE_USE > (internal compiler error) > > for sh4-unknown-linux-gnu. My testsuite/gcc/gcc.log says > > /exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c > -fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use > -D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02 > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In > function 'main': > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: > error: EDGE_CROSSING missing across section boundary > /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: > internal compiler error: verify_flow_info failed > Please submit a full bug report, > > Regards, > kaz > 2012-09-12 Christian Bruel PR target/54546 * config/sh/sh-protos.h (sh_need_epilogue): Delete. (sh_can_use_simple_return_p): Declare. * config/sh/sh.c (sh_can_use_simple_return_p): Define. (sh_need_epilogue, sh_need_epilogue_known): Delete. (sh_output_function_epilogue): Remove sh_need_epilogue_known. * config/sh/sh.md (simple_return, return): Define. (epilogue): Use inline return rtl. (sh_expand_epilogue): Cleanup parameters boolean type. * config/sh/iterators.md (any_return): New iterator. Index: config/sh/sh-protos.h === --- config/sh/sh-protos.h (revision 191129) +++ config/sh/sh-protos.h (working copy) @@ -117,7 +117,6 @@ extern rtx get_fpscr_rtx (void); extern int sh_media_register_for_return (void); extern void sh_expand_prologue (void); extern void sh_expand_epilogue (bool); -extern bool sh_need_epilogue (void); extern void sh_set_return_address (rtx, rtx); extern int initial_elimination_offset (int, int); extern bool fldi_ok (void); @@ -155,4 +154,5 @@ extern int sh2a_get_function_vector_number (rtx); extern bool sh2a_is_function_vector_call (rtx); extern void sh_fix_range (const char *); extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode); +extern bool sh_can_use_simple_return_p (void); #endif /* ! GCC_SH_PROTOS_H */ Index: config/sh/sh.c === --- config/sh/sh.c (revision 191129) +++ config/sh/sh.c (working copy) @@ -7899,24 +7899,6 @@ sh_expand_epilogue (bool sibcall_p) emit_use (gen_rtx_REG (SImode, PR_REG)); } -static int sh_need_epilogue_known = 0; - -bool -sh_need_epilogue (void) -{ - if (! sh_need_epilogue_known) -{ - rtx epilogue; - - start_sequence (); - sh_expand_epilogue (0); - epilogue = get_insns (); - end_sequence (); - sh_need_epilogue_known = (epilogue == NULL ? -1 : 1); -} - return sh_need_epilogue_known > 0; -} - /* Emit code to change the current function's return address to RA. TEMP is available as a scratch register, if needed. */ @@ -7996,7 +7978,6 @@ static void sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT size ATTRIBUTE_UNUSED) { - sh_need_epilogue_known = 0; } static rtx @@ -12959,4 +12940,34 @@ sh_init_sync_libfuncs (void) init_sync_libfuncs (UNITS_PER_WORD); } +/* Return true if it is appropriate to emit `ret' instructions in the + body of a function. */ + +bool +sh_can_use_simple_return_p (void) +{ + HARD_REG_SET live_regs_mask; + int d; + + if (! reload_completed || frame_pointer_needed) +
Re: [SH] Add simple_return pattern
Christian Bruel wrote: > The failure turned out to be issues with the profile count and handling > or region partitioning. So, I prefer to handle those separately, > For now, I disable shrink-wrap when partitioning, even if the problem > seems to have disappeared with the more constrained heuristics. This is > probably latent also on other targets BTW. > > I added a sh_can_use_simple_return_p function that makes the heuristic > refinements more convenient. For instance, measured that shrink-wrap is > generally not good when optimizing for size because we might introduce > new return instructions or split blocks to avoid the epilogue, that is > still in the code somewhere anyway. > > Cycle-accurate benchmarks show a few very small improvements (there and > there, about max 2%. accordingly, the prologue is rarely in the critical > path...) but no regression. Manual assembly peering of CSiBE show that > the transformation are decent. > > Checked with all assertions this time, Candidate for trunk. The patch is OK for trunk. Thanks for looking into the problem. Regards, kaz