On Wed, 2015-09-23 at 21:04 +0900, Oleg Endo wrote:
> Hi,
>
> The attached patch fixes PR 67391. Some additional reg overlapping were
> added to the addsi3 patterns while making LRA on SH work, but not all of
> them seem to be good. Removing them, seems to be working just fine.
> Tested on sh-elf (LRA enabled) with make -k check
> RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m4/-ml,-m4/-mb,-m4a/-ml,-m4a/-mb}"
> and by Kaz on sh4-linux.
>
> Committed to trunk as r228046 and to the GCC 5 branch as r228047.
This has opened a small can of worms. The follow up patch is attached
and has been commited to trunk as r228176. Tested by me on sh-elf and
by Kaz on sh4-linux with LRA enabled/disabled.
As a positive side effect, we get some code size reduction on the CSiBE
set: 3345527 bytes -> 3334351 bytes-11176 bytes / -0.334058 %
However, this is only when LRA is disabled because of some problems with
LRA and its usage/handling of addsi3 insns.
Backport to GCC 5 branch will follow.
Cheers,
Oleg
gcc/ChangeLog:
PR target/67391
* config/sh/sh-protos.h (sh_lra_p): Declare.
* config/sh/sh.c (sh_lra_p): Make non-static.
* config/sh/sh.md (addsi3): Use arith_reg_dest for operands[0] and
arith_reg_operand for operands[1]. Remove TARGET_SHMEDIA case.
Expand into addsi3_scr if operands[2] if needed.
(*addsi3_compact): Rename to *addsi3_compact_lra. Use
arith_reg_operand for operands[1]. Allow it only when LRA is enabled.
(addsi3_scr, *addsi3): New insn_and_split patterns.
Index: gcc/config/sh/sh-protos.h
===
--- gcc/config/sh/sh-protos.h (revision 228117)
+++ gcc/config/sh/sh-protos.h (working copy)
@@ -93,6 +93,7 @@
extern rtx sh_fsca_int2sf (void);
/* Declare functions defined in sh.c and used in templates. */
+extern bool sh_lra_p (void);
extern const char *output_branch (int, rtx_insn *, rtx *);
extern const char *output_ieee_ccmpeq (rtx_insn *, rtx *);
Index: gcc/config/sh/sh.c
===
--- gcc/config/sh/sh.c (revision 228117)
+++ gcc/config/sh/sh.c (working copy)
@@ -216,7 +216,6 @@
static int sh_mode_entry (int);
static int sh_mode_exit (int);
static int sh_mode_priority (int entity, int n);
-static bool sh_lra_p (void);
static rtx mark_constant_pool_use (rtx);
static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
@@ -14507,7 +14506,7 @@
*/
/* Return true if we use LRA instead of reload pass. */
-static bool
+bool
sh_lra_p (void)
{
return sh_lra_flag;
Index: gcc/config/sh/sh.md
===
--- gcc/config/sh/sh.md (revision 228117)
+++ gcc/config/sh/sh.md (working copy)
@@ -2122,13 +2122,19 @@
})
(define_expand "addsi3"
- [(set (match_operand:SI 0 "arith_reg_operand" "")
- (plus:SI (match_operand:SI 1 "arith_operand" "")
- (match_operand:SI 2 "arith_or_int_operand" "")))]
+ [(set (match_operand:SI 0 "arith_reg_dest")
+ (plus:SI (match_operand:SI 1 "arith_reg_operand")
+ (match_operand:SI 2 "arith_or_int_operand")))]
""
{
- if (TARGET_SHMEDIA)
-operands[1] = force_reg (SImode, operands[1]);
+ if (TARGET_SH1 && !arith_operand (operands[2], SImode))
+{
+ if (!sh_lra_p () || reg_overlap_mentioned_p (operands[0], operands[1]))
+ {
+ emit_insn (gen_addsi3_scr (operands[0], operands[1], operands[2]));
+ DONE;
+ }
+}
})
(define_insn "addsi3_media"
@@ -2163,15 +2169,22 @@
;; copy or constant load before the actual add insn.
;; Use u constraint for that case to avoid the invalid value in the stack
;; pointer.
-(define_insn_and_split "*addsi3_compact"
+;; This also results in better code when LRA is not used. However, we have
+;; to use different sets of patterns and the order of these patterns is
+;; important.
+;; In some cases the constant zero might end up in operands[2] of the
+;; patterns. We have to accept that and convert it into a reg-reg move.
+(define_insn_and_split "*addsi3_compact_lra"
[(set (match_operand:SI 0 "arith_reg_dest" "=r,&u")
- (plus:SI (match_operand:SI 1 "arith_operand" "%0,r")
+ (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r")
(match_operand:SI 2 "arith_or_int_operand" "rI08,rn")))]
- "TARGET_SH1"
+ "TARGET_SH1 && sh_lra_p ()
+ && (! reg_overlap_mentioned_p (operands[0], operands[1])
+ || arith_operand (operands[2], SImode))"
"@
add %2,%0
#"
- "reload_completed
+ "&& reload_completed
&& ! reg_overlap_mentioned_p (operands[0], operands[1])"
[(set (match_dup 0) (match_dup 2))
(set (match_dup 0) (plus:SI (match_dup 0) (match_dup 1)))]
@@ -2182,6 +2195,58 @@
}
[(set_attr "type" "arith")])
+(define_insn_and_split "addsi3_scr"
+ [(set (match_operand:SI 0 "arith_reg_dest" "=r,&u,&u")
+ (plus:SI (match_operand:SI 1 "arith_reg_operand" "%0,r,r")
+ (match_operand:SI 2 "arith_or_int_operand