Re: [SH][committed] Fix PR 67391

2015-09-28 Thread Oleg Endo
On Sun, 2015-09-27 at 21:03 +0900, Oleg Endo wrote:
> On Wed, 2015-09-23 at 21:04 +0900, Oleg Endo wrote:
> > Hi,
> > 
> > The attached patch fixes PR 67391.  Some additional reg overlapping were
> > added to the addsi3 patterns while making LRA on SH work, but not all of
> > them seem to be good.  Removing them, seems to be working just fine.
> > Tested on sh-elf (LRA enabled) with make -k check
> > RUNTESTFLAGS="--target_board=sh-sim
> > \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> > and by Kaz on sh4-linux.
> > 
> > Committed to trunk as r228046 and to the GCC 5 branch as r228047.
> 
> This has opened a small can of worms.  The follow up patch is attached
> and has been commited to trunk as r228176.  Tested by me on sh-elf and
> by Kaz on sh4-linux with LRA enabled/disabled.
> 
> As a positive side effect, we get some code size reduction on the CSiBE
> set: 3345527 bytes -> 3334351 bytes-11176 bytes / -0.334058 %
> 
> However, this is only when LRA is disabled because of some problems with
> LRA and its usage/handling of addsi3 insns.
> 
> Backport to GCC 5 branch will follow.

Now also committed to GCC 5 branch as r228201.  Tested on the branch as
above.

Cheers,
Oleg



Re: [SH][committed] Fix PR 67391

2015-09-27 Thread Oleg Endo
On Wed, 2015-09-23 at 21:04 +0900, Oleg Endo wrote:
> Hi,
> 
> The attached patch fixes PR 67391.  Some additional reg overlapping were
> added to the addsi3 patterns while making LRA on SH work, but not all of
> them seem to be good.  Removing them, seems to be working just fine.
> Tested on sh-elf (LRA enabled) with make -k check
> RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> and by Kaz on sh4-linux.
> 
> Committed to trunk as r228046 and to the GCC 5 branch as r228047.

This has opened a small can of worms.  The follow up patch is attached
and has been commited to trunk as r228176.  Tested by me on sh-elf and
by Kaz on sh4-linux with LRA enabled/disabled.

As a positive side effect, we get some code size reduction on the CSiBE
set: 3345527 bytes -> 3334351 bytes-11176 bytes / -0.334058 %

However, this is only when LRA is disabled because of some problems with
LRA and its usage/handling of addsi3 insns.

Backport to GCC 5 branch will follow.

Cheers,
Oleg

gcc/ChangeLog:
PR target/67391
* config/sh/sh-protos.h (sh_lra_p): Declare.
* config/sh/sh.c (sh_lra_p): Make non-static.
* config/sh/sh.md (addsi3): Use arith_reg_dest for operands[0] and
arith_reg_operand for operands[1].  Remove TARGET_SHMEDIA case.
Expand into addsi3_scr if operands[2] if needed.
(*addsi3_compact): Rename to *addsi3_compact_lra.  Use
arith_reg_operand for operands[1].  Allow it only when LRA is enabled.
(addsi3_scr, *addsi3): New insn_and_split patterns.
Index: gcc/config/sh/sh-protos.h
===
--- gcc/config/sh/sh-protos.h	(revision 228117)
+++ gcc/config/sh/sh-protos.h	(working copy)
@@ -93,6 +93,7 @@
 extern rtx sh_fsca_int2sf (void);
 
 /* Declare functions defined in sh.c and used in templates.  */
+extern bool sh_lra_p (void);
 
 extern const char *output_branch (int, rtx_insn *, rtx *);
 extern const char *output_ieee_ccmpeq (rtx_insn *, rtx *);
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c	(revision 228117)
+++ gcc/config/sh/sh.c	(working copy)
@@ -216,7 +216,6 @@
 static int sh_mode_entry (int);
 static int sh_mode_exit (int);
 static int sh_mode_priority (int entity, int n);
-static bool sh_lra_p (void);
 
 static rtx mark_constant_pool_use (rtx);
 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
@@ -14507,7 +14506,7 @@
 */
 
 /* Return true if we use LRA instead of reload pass.  */
-static bool
+bool
 sh_lra_p (void)
 {
   return sh_lra_flag;
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 228117)
+++ gcc/config/sh/sh.md	(working copy)
@@ -2122,13 +2122,19 @@
 })
 
 (define_expand "addsi3"
-  [(set (match_operand:SI 0 "arith_reg_operand" "")
-	(plus:SI (match_operand:SI 1 "arith_operand" "")
-		 (match_operand:SI 2 "arith_or_int_operand" "")))]
+  [(set (match_operand:SI 0 "arith_reg_dest")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand")
+		 (match_operand:SI 2 "arith_or_int_operand")))]
   ""
 {
-  if (TARGET_SHMEDIA)
-operands[1] = force_reg (SImode, operands[1]);
+  if (TARGET_SH1 && !arith_operand (operands[2], SImode))
+{
+  if (!sh_lra_p () || reg_overlap_mentioned_p (operands[0], operands[1]))
+	{
+	  emit_insn (gen_addsi3_scr (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+}
 })
 
 (define_insn "addsi3_media"
@@ -2163,15 +2169,22 @@
 ;; copy or constant load before the actual add insn.
 ;; Use u constraint for that case to avoid the invalid value in the stack
 ;; pointer.
-(define_insn_and_split "*addsi3_compact"
+;; This also results in better code when LRA is not used.  However, we have
+;; to use different sets of patterns and the order of these patterns is
+;; important.
+;; In some cases the constant zero might end up in operands[2] of the
+;; patterns.  We have to accept that and convert it into a reg-reg move.
+(define_insn_and_split "*addsi3_compact_lra"
   [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u")
-	(plus:SI (match_operand:SI 1 "arith_operand" "%0,r")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r")
 		 (match_operand:SI 2 "arith_or_int_operand" "rI08,rn")))]
-  "TARGET_SH1"
+  "TARGET_SH1 && sh_lra_p ()
+   && (! reg_overlap_mentioned_p (operands[0], operands[1])
+   || arith_operand (operands[2], SImode))"
   "@
 	add	%2,%0
 	#"
-  "reload_completed
+  "&& reload_completed
&& ! reg_overlap_mentioned_p (operands[0], operands[1])"
   [(set (match_dup 0) (match_dup 2))
(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))]
@@ -2182,6 +2195,58 @@
 }
   [(set_attr "type" "arith")])
 
+(define_insn_and_split "addsi3_scr"
+  [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u,&u")
+	(plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r,r")
+		 (match_operand:SI 2 "arith_or_int_operand

[SH][committed] Fix PR 67391

2015-09-23 Thread Oleg Endo
Hi,

The attached patch fixes PR 67391.  Some additional reg overlapping were
added to the addsi3 patterns while making LRA on SH work, but not all of
them seem to be good.  Removing them, seems to be working just fine.
Tested on sh-elf (LRA enabled) with make -k check
RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
and by Kaz on sh4-linux.

Committed to trunk as r228046 and to the GCC 5 branch as r228047.

Cheers,
Oleg

gcc/ChangeLog:
PR target/67391
* config/sh/sh.md (addsi3, *addsi3_compact): Don't check for overlapping
regs when matching the pattern.
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md	(revision 228020)
+++ gcc/config/sh/sh.md	(working copy)
@@ -2129,11 +2129,6 @@
 {
   if (TARGET_SHMEDIA)
 operands[1] = force_reg (SImode, operands[1]);
-  else if (! arith_operand (operands[2], SImode))
-{
-  if (reg_overlap_mentioned_p (operands[0], operands[1]))
-	FAIL;
-}
 })
 
 (define_insn "addsi3_media"
@@ -2172,10 +2167,7 @@
   [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u")
 	(plus:SI (match_operand:SI 1 "arith_operand" "%0,r")
 		 (match_operand:SI 2 "arith_or_int_operand" "rI08,rn")))]
-  "TARGET_SH1
-   && ((rtx_equal_p (operands[0], operands[1])
-&& arith_operand (operands[2], SImode))
-   || ! reg_overlap_mentioned_p (operands[0], operands[1]))"
+  "TARGET_SH1"
   "@
 	add	%2,%0
 	#"