[SH] Add simple_return pattern

2012-09-10 Thread Christian Bruel
This patch implements the simple_return pattern to enable -fshrink-wrap
on SH. It also clean up some redundancies for expand_epilogue (called
twice from the "return" and "epilogue" patterns and the
sh_expand_prologue parameter type.

No regressions with sh-superh-elf and sh4-linux gcc testsuites.

Thanks

Christian

2012-08-29  Christian Bruel  

	* config/sh/sh-protos.h (sh_need_epilogue): Delete.
	* config/sh/sh.c (sh_need_epilogue): Delete.
	(sh_need_epilogue_known): Delete.
	(sh_output_function_epilogue): Remove sh_need_epilogue_known.
	* config/sh/sh.md (any_return): New iterator and optab.
	(simple_return): Define.
	(return): Check epilogue_completed.
	(epilogue): Use inline return rtl.
	(sh_expand_epilogue): Cleanup parameters boolean type.

Index: gcc/config/sh/sh-protos.h
===
--- gcc/config/sh/sh-protos.h	(revision 191129)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -117,7 +117,6 @@
 extern int sh_media_register_for_return (void);
 extern void sh_expand_prologue (void);
 extern void sh_expand_epilogue (bool);
-extern bool sh_need_epilogue (void);
 extern void sh_set_return_address (rtx, rtx);
 extern int initial_elimination_offset (int, int);
 extern bool fldi_ok (void);
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 191129)
+++ gcc/config/sh/sh.c	(working copy)
@@ -7901,22 +7901,6 @@
 
 static int sh_need_epilogue_known = 0;
 
-bool
-sh_need_epilogue (void)
-{
-  if (! sh_need_epilogue_known)
-{
-  rtx epilogue;
-
-  start_sequence ();
-  sh_expand_epilogue (0);
-  epilogue = get_insns ();
-  end_sequence ();
-  sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
-}
-  return sh_need_epilogue_known > 0;
-}
-
 /* Emit code to change the current function's return address to RA.
TEMP is available as a scratch register, if needed.  */
 
@@ -7996,7 +7980,6 @@
 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
 			 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
 {
-  sh_need_epilogue_known = 0;
 }
 
 static rtx
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 191129)
+++ gcc/config/sh/sh.md	(working copy)
@@ -177,6 +177,10 @@
   (UNSPECV_EH_RETURN	12)
 ])
 
+(define_code_iterator any_return [return simple_return])
+(define_code_attr optab [(return "return")
+			 (simple_return "simple_return")])
+
 ;; -
 ;; Attributes
 ;; -
@@ -9280,7 +9284,7 @@
   [(return)]
   ""
 {
-  sh_expand_epilogue (1);
+  sh_expand_epilogue (true);
   if (TARGET_SHCOMPACT)
 {
   rtx insn, set;
@@ -10099,9 +10103,13 @@
 }
   [(set_attr "type" "load_media")])
 
+(define_expand "simple_return"
+  [(simple_return)]
+ "")
+
 (define_expand "return"
-  [(return)]
-  "reload_completed && ! sh_need_epilogue ()"
+  [(simple_return)]
+ "reload_completed && epilogue_completed"
 {
   if (TARGET_SHMEDIA)
 {
@@ -10117,8 +10125,8 @@
 }
 })
 
-(define_insn "*return_i"
-  [(return)]
+(define_insn "*_i"
+  [(any_return)]
   "TARGET_SH1 && ! (TARGET_SHCOMPACT
 		&& (crtl->args.info.call_cookie
 			& CALL_COOKIE_RET_TRAMP (1)))
@@ -10244,19 +10252,12 @@
 (define_expand "prologue"
   [(const_int 0)]
   ""
-{
-  sh_expand_prologue ();
-  DONE;
-})
+  "sh_expand_prologue (); DONE;")
 
 (define_expand "epilogue"
   [(return)]
   ""
-{
-  sh_expand_epilogue (0);
-  emit_jump_insn (gen_return ());
-  DONE;
-})
+  "sh_expand_epilogue (false);")
 
 (define_expand "eh_return"
   [(use (match_operand 0 "register_operand" ""))]


Re: [SH] Add simple_return pattern

2012-09-10 Thread Oleg Endo
On Mon, 2012-09-10 at 15:51 +0200, Christian Bruel wrote:
> This patch implements the simple_return pattern to enable -fshrink-wrap
> on SH. It also clean up some redundancies for expand_epilogue (called
> twice from the "return" and "epilogue" patterns and the
> sh_expand_prologue parameter type.
> 
> No regressions with sh-superh-elf and sh4-linux gcc testsuites.
> 
> Thanks
> 
> Christian
> 

Regarding the iterators, maybe it's better to put them in
config/sh/iterators.md.  The optab code attr is not needed in this case,
"" is sufficient.  How about the attached patch instead?

BTW, I'm now also testing the modified attached patch and your previous
newlib related patch.

Cheers,
Oleg
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 191161)
+++ gcc/config/sh/sh.md	(working copy)
@@ -9335,7 +9335,7 @@
   [(return)]
   ""
 {
-  sh_expand_epilogue (1);
+  sh_expand_epilogue (true);
   if (TARGET_SHCOMPACT)
 {
   rtx insn, set;
@@ -10154,9 +10154,12 @@
 }
   [(set_attr "type" "load_media")])
 
+(define_expand "simple_return"
+  [(simple_return)])
+
 (define_expand "return"
-  [(return)]
-  "reload_completed && ! sh_need_epilogue ()"
+  [(simple_return)]
+ "reload_completed && epilogue_completed"
 {
   if (TARGET_SHMEDIA)
 {
@@ -10172,8 +10175,8 @@
 }
 })
 
-(define_insn "*return_i"
-  [(return)]
+(define_insn "*_i"
+  [(RETURN)]
   "TARGET_SH1 && ! (TARGET_SHCOMPACT
 		&& (crtl->args.info.call_cookie
 			& CALL_COOKIE_RET_TRAMP (1)))
@@ -10299,19 +10302,12 @@
 (define_expand "prologue"
   [(const_int 0)]
   ""
-{
-  sh_expand_prologue ();
-  DONE;
-})
+  "sh_expand_prologue (); DONE;")
 
 (define_expand "epilogue"
   [(return)]
   ""
-{
-  sh_expand_epilogue (0);
-  emit_jump_insn (gen_return ());
-  DONE;
-})
+  "sh_expand_epilogue (false);")
 
 (define_expand "eh_return"
   [(use (match_operand 0 "register_operand" ""))]
Index: gcc/config/sh/iterators.md
===
--- gcc/config/sh/iterators.md	(revision 191161)
+++ gcc/config/sh/iterators.md	(working copy)
@@ -34,3 +34,5 @@
 (define_mode_attr disp04 [(QI "K04") (HI "K05")])
 (define_mode_attr disp12 [(QI "K12") (HI "K13")])
 
+;; Code iterator for return codes.
+(define_code_iterator RETURN [return simple_return])
Index: gcc/config/sh/sh-protos.h
===
--- gcc/config/sh/sh-protos.h	(revision 191161)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -117,7 +117,6 @@
 extern int sh_media_register_for_return (void);
 extern void sh_expand_prologue (void);
 extern void sh_expand_epilogue (bool);
-extern bool sh_need_epilogue (void);
 extern void sh_set_return_address (rtx, rtx);
 extern int initial_elimination_offset (int, int);
 extern bool fldi_ok (void);
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 191161)
+++ gcc/config/sh/sh.c	(working copy)
@@ -7901,22 +7901,6 @@
 
 static int sh_need_epilogue_known = 0;
 
-bool
-sh_need_epilogue (void)
-{
-  if (! sh_need_epilogue_known)
-{
-  rtx epilogue;
-
-  start_sequence ();
-  sh_expand_epilogue (0);
-  epilogue = get_insns ();
-  end_sequence ();
-  sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
-}
-  return sh_need_epilogue_known > 0;
-}
-
 /* Emit code to change the current function's return address to RA.
TEMP is available as a scratch register, if needed.  */
 
@@ -7996,7 +7980,6 @@
 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
 			 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
 {
-  sh_need_epilogue_known = 0;
 }
 
 static rtx


Re: [SH] Add simple_return pattern

2012-09-10 Thread Kaz Kojima
Christian Bruel  wrote:
> This patch implements the simple_return pattern to enable -fshrink-wrap
> on SH. It also clean up some redundancies for expand_epilogue (called
> twice from the "return" and "epilogue" patterns and the
> sh_expand_prologue parameter type.
> 
> No regressions with sh-superh-elf and sh4-linux gcc testsuites.

With the patch + revision 191106, I've got a new failure:

FAIL: gcc.dg/tree-prof/bb-reorg.c compilation,  -fprofile-use -D_PROFILE_USE 
(internal compiler error)

for sh4-unknown-linux-gnu.  My testsuite/gcc/gcc.log says

/exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ 
/exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c 
-fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use 
-D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02
/exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In 
function 'main':
/exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: 
error: EDGE_CROSSING missing across section boundary
/exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: 
internal compiler error: verify_flow_info failed
Please submit a full bug report,

Regards,
kaz


Re: [SH] Add simple_return pattern

2012-09-10 Thread Christian Bruel


On 09/11/2012 12:28 AM, Oleg Endo wrote:
> On Mon, 2012-09-10 at 15:51 +0200, Christian Bruel wrote:
>> This patch implements the simple_return pattern to enable -fshrink-wrap
>> on SH. It also clean up some redundancies for expand_epilogue (called
>> twice from the "return" and "epilogue" patterns and the
>> sh_expand_prologue parameter type.
>>
>> No regressions with sh-superh-elf and sh4-linux gcc testsuites.
>>
>> Thanks
>>
>> Christian
>>
> 
> Regarding the iterators, maybe it's better to put them in
> config/sh/iterators.md.  The optab code attr is not needed in this case,
> "" is sufficient.  How about the attached patch instead?


yes, there is this new iterator.md file. I'm moving the iterator there.
Will resent.

Thanks

Christian


Re: [SH] Add simple_return pattern

2012-09-11 Thread Christian Bruel

On 09/11/2012 03:05 AM, Kaz Kojima wrote:
> Christian Bruel  wrote:
>> This patch implements the simple_return pattern to enable -fshrink-wrap
>> on SH. It also clean up some redundancies for expand_epilogue (called
>> twice from the "return" and "epilogue" patterns and the
>> sh_expand_prologue parameter type.
>>
>> No regressions with sh-superh-elf and sh4-linux gcc testsuites.
> 
> With the patch + revision 191106, I've got a new failure:
> 
> FAIL: gcc.dg/tree-prof/bb-reorg.c compilation,  -fprofile-use -D_PROFILE_USE 
> (internal compiler error)
> 
> for sh4-unknown-linux-gnu.  My testsuite/gcc/gcc.log says
> 
> /exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ 
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c 
> -fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use 
> -D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In 
> function 'main':
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: 
> error: EDGE_CROSSING missing across section boundary
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: 
> internal compiler error: verify_flow_info failed
> Please submit a full bug report,
> 
> Regards,

Ugh, indeed, I forgot a SPEC file that set the release mode on my
SH-Linux distri, so verify_flow_info was not called :-(. I need to test
again.

thanks !

Christian

>   kaz
> 


Re: [SH] Add simple_return pattern

2012-09-13 Thread Christian Bruel
Hi Kaz,

The failure turned out to be issues with the profile count and handling
or region partitioning. So, I prefer to handle those separately,
For now, I disable shrink-wrap when partitioning, even if the problem
seems to have disappeared with the more constrained heuristics. This is
probably latent also on other targets BTW.

I added a sh_can_use_simple_return_p function that makes the heuristic
refinements more convenient. For instance, measured that shrink-wrap is
generally not good when optimizing for size because we might introduce
new return instructions or split blocks to avoid the epilogue, that is
still in the code somewhere anyway.

Cycle-accurate benchmarks show a few very small improvements (there and
there, about max 2%. accordingly, the prologue is rarely in the critical
path...) but no regression. Manual assembly peering of CSiBE show that
the transformation are decent.

Checked with all assertions this time, Candidate for trunk.

Many thanks

Christian


On 09/11/2012 03:05 AM, Kaz Kojima wrote:
> Christian Bruel  wrote:
>> This patch implements the simple_return pattern to enable -fshrink-wrap
>> on SH. It also clean up some redundancies for expand_epilogue (called
>> twice from the "return" and "epilogue" patterns and the
>> sh_expand_prologue parameter type.
>>
>> No regressions with sh-superh-elf and sh4-linux gcc testsuites.
> 
> With the patch + revision 191106, I've got a new failure:
> 
> FAIL: gcc.dg/tree-prof/bb-reorg.c compilation,  -fprofile-use -D_PROFILE_USE 
> (internal compiler error)
> 
> for sh4-unknown-linux-gnu.  My testsuite/gcc/gcc.log says
> 
> /exp/ldroot/dodes/xsh-gcc/gcc/xgcc -B/exp/ldroot/dodes/xsh-gcc/gcc/ 
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c 
> -fno-diagnostics-show-caret -O2 -freorder-blocks-and-partition -fprofile-use 
> -D_PROFILE_USE -lm -o /exp/ldroot/dodes/xsh-gcc/gcc/testsuite/gcc/bb-reorg.x02
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c: In 
> function 'main':
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: 
> error: EDGE_CROSSING missing across section boundary
> /exp/ldroot/dodes/LOCAL/trunk/gcc/testsuite/gcc.dg/tree-prof/bb-reorg.c:38:1: 
> internal compiler error: verify_flow_info failed
> Please submit a full bug report,
> 
> Regards,
>   kaz
> 
2012-09-12  Christian Bruel  

	PR target/54546
	* config/sh/sh-protos.h (sh_need_epilogue): Delete.
	(sh_can_use_simple_return_p): Declare.
	* config/sh/sh.c (sh_can_use_simple_return_p): Define.
	(sh_need_epilogue, sh_need_epilogue_known): Delete.
	(sh_output_function_epilogue): Remove sh_need_epilogue_known.
	* config/sh/sh.md (simple_return, return): Define.
	(epilogue): Use inline return rtl.
	(sh_expand_epilogue): Cleanup parameters boolean type.
	* config/sh/iterators.md (any_return): New iterator.

Index: config/sh/sh-protos.h
===
--- config/sh/sh-protos.h	(revision 191129)
+++ config/sh/sh-protos.h	(working copy)
@@ -117,7 +117,6 @@ extern rtx get_fpscr_rtx (void);
 extern int sh_media_register_for_return (void);
 extern void sh_expand_prologue (void);
 extern void sh_expand_epilogue (bool);
-extern bool sh_need_epilogue (void);
 extern void sh_set_return_address (rtx, rtx);
 extern int initial_elimination_offset (int, int);
 extern bool fldi_ok (void);
@@ -155,4 +154,5 @@ extern int sh2a_get_function_vector_number (rtx);
 extern bool sh2a_is_function_vector_call (rtx);
 extern void sh_fix_range (const char *);
 extern bool sh_hard_regno_mode_ok (unsigned int, enum machine_mode);
+extern bool sh_can_use_simple_return_p (void);
 #endif /* ! GCC_SH_PROTOS_H */
Index: config/sh/sh.c
===
--- config/sh/sh.c	(revision 191129)
+++ config/sh/sh.c	(working copy)
@@ -7899,24 +7899,6 @@ sh_expand_epilogue (bool sibcall_p)
 emit_use (gen_rtx_REG (SImode, PR_REG));
 }
 
-static int sh_need_epilogue_known = 0;
-
-bool
-sh_need_epilogue (void)
-{
-  if (! sh_need_epilogue_known)
-{
-  rtx epilogue;
-
-  start_sequence ();
-  sh_expand_epilogue (0);
-  epilogue = get_insns ();
-  end_sequence ();
-  sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
-}
-  return sh_need_epilogue_known > 0;
-}
-
 /* Emit code to change the current function's return address to RA.
TEMP is available as a scratch register, if needed.  */
 
@@ -7996,7 +7978,6 @@ static void
 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
 			 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
 {
-  sh_need_epilogue_known = 0;
 }
 
 static rtx
@@ -12959,4 +12940,34 @@ sh_init_sync_libfuncs (void)
   init_sync_libfuncs (UNITS_PER_WORD);
 }
 
+/* Return true if it is appropriate to emit `ret' instructions in the
+   body of a function.  */
+
+bool
+sh_can_use_simple_return_p (void)
+{
+  HARD_REG_SET live_regs_mask;
+  int d;
+
+  if (! reload_completed || frame_pointer_needed)
+

Re: [SH] Add simple_return pattern

2012-09-13 Thread Kaz Kojima
Christian Bruel  wrote:
> The failure turned out to be issues with the profile count and handling
> or region partitioning. So, I prefer to handle those separately,
> For now, I disable shrink-wrap when partitioning, even if the problem
> seems to have disappeared with the more constrained heuristics. This is
> probably latent also on other targets BTW.
> 
> I added a sh_can_use_simple_return_p function that makes the heuristic
> refinements more convenient. For instance, measured that shrink-wrap is
> generally not good when optimizing for size because we might introduce
> new return instructions or split blocks to avoid the epilogue, that is
> still in the code somewhere anyway.
> 
> Cycle-accurate benchmarks show a few very small improvements (there and
> there, about max 2%. accordingly, the prologue is rarely in the critical
> path...) but no regression. Manual assembly peering of CSiBE show that
> the transformation are decent.
> 
> Checked with all assertions this time, Candidate for trunk.

The patch is OK for trunk.  Thanks for looking into the problem.

Regards,
kaz