On Wed, Nov 16, 2016 at 04:15:10PM +0000, Andrew Stubbs wrote:
> On 16/11/16 13:10, Michael Meissner wrote:
> >Yeah, SFmode and DFmode should not have the TARGET_{S,D}F_FPR checks.
>
> So, I can safely resolve my initial problem by simply removing them?
> And that wouldn't break the other use of that predicate?
>
> >But a secondary problem is the early clobber in the match_scratch.
>
> So, the FPR_FUSION insn works because operands 1 and 2 cannot
> conflict, which means the early-clobber is not necessary, but the
> GPR_FUSION insn cannot work because there's no way to ensure that
> operands 1 and 2 don't conflict without also specifying that
> operands 0 and 2 don't conflict, which they commonly do.
>
> We could fix it, for now, by adding new patterns that fit both cases
> (given that the register numbers are known at peephole time).
>
> Or, we could disable the peephole in the case where this would occur
> (as my original patch does, albeit bluntly).
I'm starting to test this patch right now (it's on LE power8 stage3 right now,
and I need to build BE power8 and BE power7 versions when I get into the office
shortly, and build spec 2017 with it for PR 78101):
[gcc]
2016-11-16 Michael Meissner <[email protected]>
PR target/78101
* config/rs6000/predicates.md (fusion_addis_mem_combo_load): Add
the appropriate checks for SFmode/DFmode load/stores in GPR
registers.
(fusion_addis_mem_combo_store): Likewise.
* config/rs6000/rs6000.c (rs6000_init_hard_regno_mode_ok): Rename
fusion_fpr_* to fusion_vsx_* and add in support for ISA 3.0 scalar
d-form instructions for traditional Altivec registers.
(emit_fusion_p9_load): Likewise.
(emit_fusion_p9_store): Likewise.
* config/rs6000/rs6000.md (p9 fusion store peephole2): Remove
early clobber from scratch register. Do not match if the register
being stored is the scratch register.
(fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load): Rename fusion_fpr_*
to fusion_vsx_* and add in support for ISA 3.0 scalar d-form
instructions for traditional Altivec registers.
(fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load): Likewise.
(fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store): Likewise.
(fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store): Likewise.
[gcc/testsuite]
2016-11-16 Michael Meissner <[email protected]>
PR target/78101
* gcc.target/powerpc/fusion4.c: New test.
--
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: [email protected], phone: +1 (978) 899-4797
Index: gcc/config/rs6000/predicates.md
===================================================================
--- gcc/config/rs6000/predicates.md
(.../svn+ssh://[email protected]/svn/gcc/trunk/gcc/config/rs6000)
(revision 242456)
+++ gcc/config/rs6000/predicates.md (.../gcc/config/rs6000) (working copy)
@@ -1844,7 +1844,7 @@ (define_predicate "fusion_gpr_mem_load"
;; Match a GPR load (lbz, lhz, lwz, ld) that uses a combined address in the
;; memory field with both the addis and the memory offset. Sign extension
;; is not handled here, since lha and lwa are not fused.
-;; With extended fusion, also match a FPR load (lfd, lfs) and float_extend
+;; With P9 fusion, also match a fpr/vector load and float_extend
(define_predicate "fusion_addis_mem_combo_load"
(match_code "mem,zero_extend,float_extend")
{
@@ -1873,11 +1873,15 @@ (define_predicate "fusion_addis_mem_comb
break;
case SFmode:
- case DFmode:
if (!TARGET_P9_FUSION)
return 0;
break;
+ case DFmode:
+ if ((!TARGET_POWERPC64 && !TARGET_DF_FPR) || !TARGET_P9_FUSION)
+ return 0;
+ break;
+
default:
return 0;
}
@@ -1920,6 +1924,7 @@ (define_predicate "fusion_addis_mem_comb
case QImode:
case HImode:
case SImode:
+ case SFmode:
break;
case DImode:
@@ -1927,13 +1932,8 @@ (define_predicate "fusion_addis_mem_comb
return 0;
break;
- case SFmode:
- if (!TARGET_SF_FPR)
- return 0;
- break;
-
case DFmode:
- if (!TARGET_DF_FPR)
+ if (!TARGET_POWERPC64 && !TARGET_DF_FPR)
return 0;
break;
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c
(.../svn+ssh://[email protected]/svn/gcc/trunk/gcc/config/rs6000)
(revision 242456)
+++ gcc/config/rs6000/rs6000.c (.../gcc/config/rs6000) (working copy)
@@ -3441,28 +3441,28 @@ rs6000_init_hard_regno_mode_ok (bool glo
static const struct fuse_insns addis_insns[] = {
{ SFmode, DImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_fpr_di_sf_load,
- CODE_FOR_fusion_fpr_di_sf_store },
+ CODE_FOR_fusion_vsx_di_sf_load,
+ CODE_FOR_fusion_vsx_di_sf_store },
{ SFmode, SImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_fpr_si_sf_load,
- CODE_FOR_fusion_fpr_si_sf_store },
+ CODE_FOR_fusion_vsx_si_sf_load,
+ CODE_FOR_fusion_vsx_si_sf_store },
{ DFmode, DImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_fpr_di_df_load,
- CODE_FOR_fusion_fpr_di_df_store },
+ CODE_FOR_fusion_vsx_di_df_load,
+ CODE_FOR_fusion_vsx_di_df_store },
{ DFmode, SImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_fpr_si_df_load,
- CODE_FOR_fusion_fpr_si_df_store },
+ CODE_FOR_fusion_vsx_si_df_load,
+ CODE_FOR_fusion_vsx_si_df_store },
{ DImode, DImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_fpr_di_di_load,
- CODE_FOR_fusion_fpr_di_di_store },
+ CODE_FOR_fusion_vsx_di_di_load,
+ CODE_FOR_fusion_vsx_di_di_store },
{ DImode, SImode, RELOAD_REG_FPR,
- CODE_FOR_fusion_fpr_si_di_load,
- CODE_FOR_fusion_fpr_si_di_store },
+ CODE_FOR_fusion_vsx_si_di_load,
+ CODE_FOR_fusion_vsx_si_di_store },
{ QImode, DImode, RELOAD_REG_GPR,
CODE_FOR_fusion_gpr_di_qi_load,
@@ -3522,6 +3522,14 @@ rs6000_init_hard_regno_mode_ok (bool glo
reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load;
reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store;
+
+ if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR)
+ {
+ reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX]
+ = addis_insns[i].load;
+ reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX]
+ = addis_insns[i].store;
+ }
}
}
@@ -39817,6 +39825,15 @@ emit_fusion_p9_load (rtx reg, rtx mem, r
else
gcc_unreachable ();
}
+ else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
+ {
+ if (mode == SFmode)
+ load_string = "lxssp";
+ else if (mode == DFmode || mode == DImode)
+ load_string = "lxsd";
+ else
+ gcc_unreachable ();
+ }
else if (INT_REGNO_P (r))
{
switch (mode)
@@ -39895,6 +39912,15 @@ emit_fusion_p9_store (rtx mem, rtx reg,
else
gcc_unreachable ();
}
+ else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR)
+ {
+ if (mode == SFmode)
+ store_string = "stxssp";
+ else if (mode == DFmode || mode == DImode)
+ store_string = "stxsd";
+ else
+ gcc_unreachable ();
+ }
else if (INT_REGNO_P (r))
{
switch (mode)
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md
(.../svn+ssh://[email protected]/svn/gcc/trunk/gcc/config/rs6000)
(revision 242456)
+++ gcc/config/rs6000/rs6000.md (.../gcc/config/rs6000) (working copy)
@@ -13438,7 +13438,8 @@ (define_peephole2
(set (match_operand:SFDF 2 "offsettable_mem_operand" "")
(match_operand:SFDF 3 "toc_fusion_or_p9_reg_operand" ""))]
"TARGET_P9_FUSION && peep2_reg_dead_p (2, operands[0])
- && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])"
+ && fusion_p9_p (operands[0], operands[1], operands[2], operands[3])
+ && !rtx_equal_p (operands[0], operands[3])"
[(const_int 0)]
{
expand_fusion_p9_store (operands);
@@ -13496,7 +13497,7 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F
(unspec:GPR_FUSION
[(match_operand:GPR_FUSION 1 "int_reg_operand" "r")]
UNSPEC_FUSION_P9))
- (clobber (match_operand:P 2 "base_reg_operand" "=&b"))]
+ (clobber (match_operand:P 2 "base_reg_operand" "=b"))]
"TARGET_P9_FUSION"
{
return emit_fusion_p9_store (operands[0], operands[1], operands[2]);
@@ -13504,8 +13505,8 @@ (define_insn "fusion_gpr_<P:mode>_<GPR_F
[(set_attr "type" "store")
(set_attr "length" "8")])
-(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_load"
- [(set (match_operand:FPR_FUSION 0 "fpr_reg_operand" "=d")
+(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_load"
+ [(set (match_operand:FPR_FUSION 0 "vsx_register_operand" "=dwb")
(unspec:FPR_FUSION
[(match_operand:FPR_FUSION 1 "fusion_addis_mem_combo_load" "wF")]
UNSPEC_FUSION_P9))
@@ -13517,10 +13518,10 @@ (define_insn "fusion_fpr_<P:mode>_<FPR_F
[(set_attr "type" "fpload")
(set_attr "length" "8")])
-(define_insn "fusion_fpr_<P:mode>_<FPR_FUSION:mode>_store"
+(define_insn "fusion_vsx_<P:mode>_<FPR_FUSION:mode>_store"
[(set (match_operand:FPR_FUSION 0 "fusion_addis_mem_combo_store" "=wF")
(unspec:FPR_FUSION
- [(match_operand:FPR_FUSION 1 "fpr_reg_operand" "d")]
+ [(match_operand:FPR_FUSION 1 "vsx_register_operand" "dwb")]
UNSPEC_FUSION_P9))
(clobber (match_operand:P 2 "base_reg_operand" "=b"))]
"TARGET_P9_FUSION"
Index: gcc/testsuite/gcc.target/powerpc/fusion4.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/fusion4.c
(.../svn+ssh://[email protected]/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/fusion4.c
(.../gcc/testsuite/gcc.target/powerpc) (revision 242499)
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { powerpc*-*-* && ilp32 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } {
"-mcpu=power7" } } */
+/* { dg-options "-mcpu=power7 -mtune=power9 -O3 -msoft-float -m32" } */
+
+#define LARGE 0x12345
+
+float fusion_float_read (float *p){ return p[LARGE]; }
+
+void fusion_float_write (float *p, float f){ p[LARGE] = f; }
+
+/* { dg-final { scan-assembler "store fusion, type SF" } } */