On Wed, 2020-11-04 at 12:12 -0600, Aaron Sawdey via Gcc-patches wrote: > Ping. > > Aaron Sawdey, Ph.D. saw...@linux.ibm.com > IBM Linux on POWER Toolchain > > > > On Oct 26, 2020, at 4:44 PM, acsaw...@linux.ibm.com wrote: > > > > From: Aaron Sawdey <acsaw...@linux.ibm.com> > >
Hi, > > This patch adds the first couple patterns to support p10 fusion. These > > will allow combine to create a single insn for a pair of instructions > > that that power10 can fuse and execute. These particular ones have the that the power10 s/particular ones/particular insns/ > > requirement that only cr0 can be used when fusing a load with a compare > > immediate of -1/0/1, so we want combine to put that requirement in, and > > if it doesn't work out later the splitter can get used. > > > > This also adds option -mpower10-fusion which defaults on for power10 and > > will gate all these fusion patterns. In addition I have added an > > undocumented option -mpower10-fusion-ld-cmpi (which may be removed later) > > that just controls the load+compare-immediate patterns. ok > > I have make made > > these default on for power10 but they are not disallowed for earlier to on > > processors because it is still valid code. This allows us to test the > > correctness of fusion code generation by turning it on explicitly. > > > > The intention is to work through more patterns of this style to support > > the rest of the power10 fusion pairs. > > > > Bootstrap and regtest looks good on ppc64le power9 with these patterns > > enabled in stage2/stage3 and for regtest. Ok for trunk? > > > > gcc/ChangeLog: > > > > * config/rs6000/predicates.md: Add const_me_to_1_operand. > > * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and > > OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER. to ... and OTHER_P9_VECTOR_MASKS > > * config/rs6000/rs6000-protos.h (address_ok_for_form): Add > > prototype. > > * config/rs6000/rs6000.c (rs6000_option_override_internal): > > automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi > > if target is power10. (rs600_opt_masks): Allow -mpower10-fusion > > in function attributes. (address_ok_for_form): New function. ok > > * config/rs6000/rs6000.h: Add MASK_P10_FUSION. > > * config/rs6000/rs6000.md (*ld_cmpi_cr0): New > > define_insn_and_split. > > (*lwa_cmpdi_cr0): New define_insn_and_split. > > (*lwa_cmpwi_cr0): New define_insn_and_split. > > * config/rs6000/rs6000.opt: Add -mpower10-fusion > > and -mpower10-fusion-ld-cmpi. > > --- > > gcc/config/rs6000/predicates.md | 5 +++ > > gcc/config/rs6000/rs6000-cpus.def | 6 ++- > > gcc/config/rs6000/rs6000-protos.h | 2 + > > gcc/config/rs6000/rs6000.c | 34 ++++++++++++++++ > > gcc/config/rs6000/rs6000.h | 1 + > > gcc/config/rs6000/rs6000.md | 68 +++++++++++++++++++++++++++++++ > > gcc/config/rs6000/rs6000.opt | 8 ++++ > > 7 files changed, 123 insertions(+), 1 deletion(-) > > > > diff --git a/gcc/config/rs6000/predicates.md > > b/gcc/config/rs6000/predicates.md > > index 4c2fe7fa312..b75c1ddfb69 100644 > > --- a/gcc/config/rs6000/predicates.md > > +++ b/gcc/config/rs6000/predicates.md > > @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand" > > (and (match_code "const_int") > > (match_test "IN_RANGE (INTVAL (op), 0, 1)"))) > > > > +;; Match op = -1, op = 0, or op = 1. > > +(define_predicate "const_m1_to_1_operand" > > + (and (match_code "const_int") > > + (match_test "IN_RANGE (INTVAL (op), -1, 1)"))) > > + > > ;; Match op = 0..3. > > (define_predicate "const_0_to_3_operand" > > (and (match_code "const_int") ok > > diff --git a/gcc/config/rs6000/rs6000-cpus.def > > b/gcc/config/rs6000/rs6000-cpus.def > > index 8d2c1ffd6cf..3e65289d8df 100644 > > --- a/gcc/config/rs6000/rs6000-cpus.def > > +++ b/gcc/config/rs6000/rs6000-cpus.def > > @@ -82,7 +82,9 @@ > > > > #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER > > \ > > | OPTION_MASK_POWER10 \ > > - | OTHER_POWER10_MASKS) > > + | OTHER_POWER10_MASKS \ > > + | OPTION_MASK_P10_FUSION \ > > + | OPTION_MASK_P10_FUSION_LD_CMPI) > > > > /* Flags that need to be turned off if -mno-power9-vector. */ > > #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW > > \ > > @@ -129,6 +131,8 @@ > > | OPTION_MASK_FLOAT128_KEYWORD \ > > | OPTION_MASK_FPRND \ > > | OPTION_MASK_POWER10 \ > > + | OPTION_MASK_P10_FUSION \ > > + | OPTION_MASK_P10_FUSION_LD_CMPI \ > > | OPTION_MASK_HTM \ > > | OPTION_MASK_ISEL \ > > | OPTION_MASK_MFCRF \ ok > > diff --git a/gcc/config/rs6000/rs6000-protos.h > > b/gcc/config/rs6000/rs6000-protos.h > > index 25fa5dd57cd..d8a344245e6 100644 > > --- a/gcc/config/rs6000/rs6000-protos.h > > +++ b/gcc/config/rs6000/rs6000-protos.h > > @@ -190,6 +190,8 @@ enum non_prefixed_form { > > > > extern enum insn_form address_to_insn_form (rtx, machine_mode, > > enum non_prefixed_form); > > +extern bool address_ok_for_form (rtx, machine_mode, > > + enum non_prefixed_form); > > extern bool prefixed_load_p (rtx_insn *); > > extern bool prefixed_store_p (rtx_insn *); > > extern bool prefixed_paddi_p (rtx_insn *); > > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > > index 4d528a39a37..b8de318a0bc 100644 > > --- a/gcc/config/rs6000/rs6000.c > > +++ b/gcc/config/rs6000/rs6000.c > > @@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p) > > if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0) > > rs6000_isa_flags |= OPTION_MASK_MMA; > > > > + if (TARGET_POWER10 && (rs6000_isa_flags_explicit & > > OPTION_MASK_P10_FUSION) == 0) > > + rs6000_isa_flags |= OPTION_MASK_P10_FUSION; > > + > > + if (TARGET_POWER10 && (rs6000_isa_flags_explicit & > > OPTION_MASK_P10_FUSION_LD_CMPI) == 0) > > + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI; > > + > > /* Turn off vector pair/mma options on non-power10 systems. */ > > else if (!TARGET_POWER10 && TARGET_MMA) > > { > > @@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const > > rs6000_opt_masks[] = > > { "power9-minmax", OPTION_MASK_P9_MINMAX, false, > > true }, > > { "power9-misc", OPTION_MASK_P9_MISC, false, true }, > > { "power9-vector", OPTION_MASK_P9_VECTOR, false, > > true }, > > + { "power10-fusion", OPTION_MASK_P10_FUSION, false, > > true }, > > { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, > > true }, > > { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, > > true }, > > { "prefixed", OPTION_MASK_PREFIXED, false, > > true }, > > @@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr, > > return INSN_FORM_BAD; > > } > > > > +bool > > +address_ok_for_form (rtx addr, > > + machine_mode mode, > > + enum non_prefixed_form non_prefixed_format) > > +{ > > + enum insn_form result_form; > > + > > + result_form = address_to_insn_form (addr, mode, non_prefixed_format); > > + > > + switch (non_prefixed_format) > > + { > > + case NON_PREFIXED_DS: > > + switch (result_form) > > + { > > + case INSN_FORM_DS: > > + case INSN_FORM_BASE_REG: > > + return true; > > + default: > > + break; > > + } > > + break; > > + default: > > + break; > > + } > > + return false; > > +} > > + > > /* Helper function to see if we're potentially looking at lfs/stfs. > > - PARALLEL containing a SET and a CLOBBER > > - stfs: > > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h > > index bbd8060e143..884452fc6d9 100644 > > --- a/gcc/config/rs6000/rs6000.h > > +++ b/gcc/config/rs6000/rs6000.h > > @@ -539,6 +539,7 @@ extern int rs6000_vector_align[]; > > #define MASK_UPDATE OPTION_MASK_UPDATE > > #define MASK_VSX OPTION_MASK_VSX > > #define MASK_POWER10 OPTION_MASK_POWER10 > > +#define MASK_P10_FUSION OPTION_MASK_P10_FUSION > > > > #ifndef IN_LIBGCC2 > > #define MASK_POWERPC64 OPTION_MASK_POWERPC64 > > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > > index dc060143104..bbcc6abe0f9 100644 > > --- a/gcc/config/rs6000/rs6000.md > > +++ b/gcc/config/rs6000/rs6000.md > > @@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2" > > (set_attr "dot" "yes") > > (set_attr "length" "4,8")]) > > > > +;; Define an insn for ld+cmpi so we can force it to use CR0 on p10 > > +;; immediate has to be -1/0/1 > > +(define_insn_and_split "*ld_cmpi_cr0" > > + [(set (match_operand:CC 2 "cc_reg_operand" "=x") > > + (compare:CC (match_operand:DI 1 "memory_operand" "m") > > + (match_operand:GPR 3 "const_m1_to_1_operand" "n"))) > > + (set (match_operand:DI 0 "gpc_reg_operand" "=r") > > + (match_dup 1)) > > + ] > > + "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" > > + "ld %0,%1\;cmpi 0,1,%0,%3" > > + "&& reload_completed > > + && (cc_reg_not_cr0_operand (operands[2], CCmode) > > + || !address_ok_for_form (XEXP (operands[1],0), DImode, > > NON_PREFIXED_DS))" > > + [(set (match_dup 0) (match_dup 1)) > > + (set (match_dup 2) > > + (compare:CC (match_dup 0) > > + (match_dup 3)))] > > + "" > > + [(set_attr "type" "load") > > + (set_attr "length" "8")]) > > + > > +;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10 > > +;; immediate is -1/0/1 > > +(define_insn_and_split "*lwa_cmpdi_cr0" > > + [(set (match_operand:CC 2 "cc_reg_operand" "=x") > > + (compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m")) > > + (match_operand:GPR 3 "const_m1_to_1_operand" "n"))) > > + (set (match_operand:DI 0 "gpc_reg_operand" "=r") > > + (sign_extend:DI (match_dup 1))) > > + ] > > + "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" > > + "lwa %0,%1\;cmpdi %0,%3" > > + "&& reload_completed > > + && (cc_reg_not_cr0_operand (operands[2], CCmode) > > + || !address_ok_for_form (XEXP (operands[1],0), DImode, > > NON_PREFIXED_DS))" > > + [(set (match_dup 0) > > + (sign_extend:DI (match_dup 1))) > > + (set (match_dup 2) > > + (compare:CC (match_dup 0) > > + (match_dup 3)))] > > + "" > > + [(set_attr "type" "load") > > + (set_attr "length" "8")]) > > + > > +;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10 > > +;; immediate is -1/0/1 > > +(define_insn_and_split "*lwa_cmpwi_cr0" > > + [(set (match_operand:CC 2 "cc_reg_operand" "=x") > > + (compare:CC (match_operand:SI 1 "memory_operand" "m") > > + (match_operand:SI 3 "const_m1_to_1_operand" "n"))) > > + (set (match_operand:DI 0 "gpc_reg_operand" "=r") > > + (sign_extend:DI (match_dup 1))) > > + ] > > + "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" > > + "lwa %0,%1\;cmpwi %0,%3" > > + "&& reload_completed > > + && (cc_reg_not_cr0_operand (operands[2], CCmode) > > + || !address_ok_for_form (XEXP (operands[1],0), DImode, > > NON_PREFIXED_DS))" > > + [(set (match_dup 0) > > + (sign_extend:DI (match_dup 1))) > > + (set (match_dup 2) > > + (compare:CC (match_dup 0) > > + (match_dup 3)))] > > + "" > > + [(set_attr "type" "load") > > + (set_attr "length" "8")]) > > + > > ;; Split an add that we can't do in one insn into two insns, each of which > > ;; does one 16-bit part. This is used by combine. Note that the low-order > > ;; add should be last in case the result gets used in an address. skimmed, no comments on the .md parts. > > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > > index b2a70e88ca8..63457efb607 100644 > > --- a/gcc/config/rs6000/rs6000.opt > > +++ b/gcc/config/rs6000/rs6000.opt > > @@ -479,6 +479,14 @@ mpower8-vector > > Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags) > > Use vector and scalar instructions added in ISA 2.07. > > > > +mpower10-fusion > > +Target Report Mask(P10_FUSION) Var(rs6000_isa_flags) > > +Fuse certain integer operations together for better performance on power10. > > + > > +mpower10-fusion-ld-cmpi > > +Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags) > > +Fuse certain integer operations together for better performance on power10. > > + > > mcrypto > > Target Report Mask(CRYPTO) Var(rs6000_isa_flags) > > Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions. > > -- > > 2.18.4 > > > > ok thanks -Will