On Wed, 2020-11-04 at 12:12 -0600, Aaron Sawdey via Gcc-patches wrote:
> Ping.
> 
> Aaron Sawdey, Ph.D. saw...@linux.ibm.com
> IBM Linux on POWER Toolchain
>  
> 
> > On Oct 26, 2020, at 4:44 PM, acsaw...@linux.ibm.com wrote:
> > 
> > From: Aaron Sawdey <acsaw...@linux.ibm.com>
> > 

Hi, 

> > This patch adds the first couple patterns to support p10 fusion. These
> > will allow combine to create a single insn for a pair of instructions
> > that that power10 can fuse and execute. These particular ones have the

that the power10

s/particular ones/particular insns/ 

> > requirement that only cr0 can be used when fusing a load with a compare
> > immediate of -1/0/1, so we want combine to put that requirement in, and
> > if it doesn't work out later the splitter can get used.
> > 
> > This also adds option -mpower10-fusion which defaults on for power10 and
> > will gate all these fusion patterns. In addition I have added an
> > undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
> > that just controls the load+compare-immediate patterns.

ok

> >                                                              I have make

made

> > these default on for power10 but they are not disallowed for earlier

to on

> > processors because it is still valid code. This allows us to test the
> > correctness of fusion code generation by turning it on explicitly.
> > 
> > The intention is to work through more patterns of this style to support
> > the rest of the power10 fusion pairs.
> > 
> > Bootstrap and regtest looks good on ppc64le power9 with these patterns
> > enabled in stage2/stage3 and for regtest. Ok for trunk?
> > 
> > gcc/ChangeLog:
> > 
> >     * config/rs6000/predicates.md: Add const_me_to_1_operand.
> >     * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
> >     OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.

to ... and OTHER_P9_VECTOR_MASKS

> >     * config/rs6000/rs6000-protos.h (address_ok_for_form): Add
> >     prototype.



> >     * config/rs6000/rs6000.c (rs6000_option_override_internal):
> >     automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
> >     if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
> >     in function attributes.  (address_ok_for_form): New function.

ok


> >     * config/rs6000/rs6000.h: Add MASK_P10_FUSION.

> >     * config/rs6000/rs6000.md (*ld_cmpi_cr0): New
> >     define_insn_and_split.
> >     (*lwa_cmpdi_cr0): New define_insn_and_split.
> >     (*lwa_cmpwi_cr0): New define_insn_and_split.


> >     * config/rs6000/rs6000.opt: Add -mpower10-fusion
> >     and -mpower10-fusion-ld-cmpi.
> > ---
> > gcc/config/rs6000/predicates.md   |  5 +++
> > gcc/config/rs6000/rs6000-cpus.def |  6 ++-
> > gcc/config/rs6000/rs6000-protos.h |  2 +
> > gcc/config/rs6000/rs6000.c        | 34 ++++++++++++++++
> > gcc/config/rs6000/rs6000.h        |  1 +
> > gcc/config/rs6000/rs6000.md       | 68 +++++++++++++++++++++++++++++++
> > gcc/config/rs6000/rs6000.opt      |  8 ++++
> > 7 files changed, 123 insertions(+), 1 deletion(-)
> > 
> > diff --git a/gcc/config/rs6000/predicates.md 
> > b/gcc/config/rs6000/predicates.md
> > index 4c2fe7fa312..b75c1ddfb69 100644
> > --- a/gcc/config/rs6000/predicates.md
> > +++ b/gcc/config/rs6000/predicates.md
> > @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
> >   (and (match_code "const_int")
> >        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
> > 
> > +;; Match op = -1, op = 0, or op = 1.
> > +(define_predicate "const_m1_to_1_operand"
> > +  (and (match_code "const_int")
> > +       (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
> > +
> > ;; Match op = 0..3.
> > (define_predicate "const_0_to_3_operand"
> >   (and (match_code "const_int")

ok

> > diff --git a/gcc/config/rs6000/rs6000-cpus.def 
> > b/gcc/config/rs6000/rs6000-cpus.def
> > index 8d2c1ffd6cf..3e65289d8df 100644
> > --- a/gcc/config/rs6000/rs6000-cpus.def
> > +++ b/gcc/config/rs6000/rs6000-cpus.def
> > @@ -82,7 +82,9 @@
> > 
> > #define ISA_3_1_MASKS_SERVER        (ISA_3_0_MASKS_SERVER                   
> > \
> >                              | OPTION_MASK_POWER10                  \
> > -                            | OTHER_POWER10_MASKS)
> > +                            | OTHER_POWER10_MASKS                  \
> > +                            | OPTION_MASK_P10_FUSION               \
> > +                            | OPTION_MASK_P10_FUSION_LD_CMPI)
> > 
> > /* Flags that need to be turned off if -mno-power9-vector.  */
> > #define OTHER_P9_VECTOR_MASKS       (OPTION_MASK_FLOAT128_HW                
> > \
> > @@ -129,6 +131,8 @@
> >                              | OPTION_MASK_FLOAT128_KEYWORD         \
> >                              | OPTION_MASK_FPRND                    \
> >                              | OPTION_MASK_POWER10                  \
> > +                            | OPTION_MASK_P10_FUSION               \
> > +                            | OPTION_MASK_P10_FUSION_LD_CMPI       \
> >                              | OPTION_MASK_HTM                      \
> >                              | OPTION_MASK_ISEL                     \
> >                              | OPTION_MASK_MFCRF                    \

ok

> > diff --git a/gcc/config/rs6000/rs6000-protos.h 
> > b/gcc/config/rs6000/rs6000-protos.h
> > index 25fa5dd57cd..d8a344245e6 100644
> > --- a/gcc/config/rs6000/rs6000-protos.h
> > +++ b/gcc/config/rs6000/rs6000-protos.h
> > @@ -190,6 +190,8 @@ enum non_prefixed_form {
> > 
> > extern enum insn_form address_to_insn_form (rtx, machine_mode,
> >                                         enum non_prefixed_form);
> > +extern bool address_ok_for_form (rtx, machine_mode,
> > +                            enum non_prefixed_form);
> > extern bool prefixed_load_p (rtx_insn *);
> > extern bool prefixed_store_p (rtx_insn *);
> > extern bool prefixed_paddi_p (rtx_insn *);
> > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> > index 4d528a39a37..b8de318a0bc 100644
> > --- a/gcc/config/rs6000/rs6000.c
> > +++ b/gcc/config/rs6000/rs6000.c
> > @@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p)
> >   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
> >     rs6000_isa_flags |= OPTION_MASK_MMA;
> > 
> > +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & 
> > OPTION_MASK_P10_FUSION) == 0)
> > +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
> > +
> > +  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & 
> > OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
> > +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
> > +
> >   /* Turn off vector pair/mma options on non-power10 systems.  */
> >   else if (!TARGET_POWER10 && TARGET_MMA)
> >     {
> > @@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const 
> > rs6000_opt_masks[] =
> >   { "power9-minmax",                OPTION_MASK_P9_MINMAX,          false, 
> > true  },
> >   { "power9-misc",          OPTION_MASK_P9_MISC,            false, true  },
> >   { "power9-vector",                OPTION_MASK_P9_VECTOR,          false, 
> > true  },
> > +  { "power10-fusion",              OPTION_MASK_P10_FUSION,         false, 
> > true  },
> >   { "powerpc-gfxopt",               OPTION_MASK_PPC_GFXOPT,         false, 
> > true  },
> >   { "powerpc-gpopt",                OPTION_MASK_PPC_GPOPT,          false, 
> > true  },
> >   { "prefixed",                     OPTION_MASK_PREFIXED,           false, 
> > true  },
> > @@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr,
> >   return INSN_FORM_BAD;
> > }
> > 
> > +bool
> > +address_ok_for_form (rtx addr,
> > +                machine_mode mode,
> > +                enum non_prefixed_form non_prefixed_format)
> > +{
> > +  enum insn_form result_form;
> > +
> > +  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
> > +
> > +  switch (non_prefixed_format)
> > +    {
> > +    case NON_PREFIXED_DS:
> > +      switch (result_form)
> > +   {
> > +   case INSN_FORM_DS:
> > +   case INSN_FORM_BASE_REG:
> > +     return true;
> > +   default:
> > +     break;
> > +   }
> > +      break;
> > +    default:
> > +      break;
> > +    }
> > +  return false;
> > +}
> > +
> > /* Helper function to see if we're potentially looking at lfs/stfs.
> >    - PARALLEL containing a SET and a CLOBBER
> >    - stfs:
> > diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> > index bbd8060e143..884452fc6d9 100644
> > --- a/gcc/config/rs6000/rs6000.h
> > +++ b/gcc/config/rs6000/rs6000.h
> > @@ -539,6 +539,7 @@ extern int rs6000_vector_align[];
> > #define MASK_UPDATE                 OPTION_MASK_UPDATE
> > #define MASK_VSX                    OPTION_MASK_VSX
> > #define MASK_POWER10                        OPTION_MASK_POWER10
> > +#define MASK_P10_FUSION                    OPTION_MASK_P10_FUSION
> > 
> > #ifndef IN_LIBGCC2
> > #define MASK_POWERPC64                      OPTION_MASK_POWERPC64
> > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> > index dc060143104..bbcc6abe0f9 100644
> > --- a/gcc/config/rs6000/rs6000.md
> > +++ b/gcc/config/rs6000/rs6000.md
> > @@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2"
> >    (set_attr "dot" "yes")
> >    (set_attr "length" "4,8")])
> > 
> > +;; Define an insn for ld+cmpi so we can force it to use CR0 on p10
> > +;; immediate has to be -1/0/1
> > +(define_insn_and_split "*ld_cmpi_cr0"
> > +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> > +   (compare:CC (match_operand:DI 1 "memory_operand" "m")
> > +               (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> > +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> > +   (match_dup 1))
> > +   ]
> > +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> > +  "ld %0,%1\;cmpi 0,1,%0,%3"
> > +  "&& reload_completed
> > +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> > +       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
> > NON_PREFIXED_DS))"
> > +  [(set (match_dup 0) (match_dup 1))
> > +   (set (match_dup 2)
> > +        (compare:CC (match_dup 0)
> > +               (match_dup 3)))]
> > +  ""
> > +  [(set_attr "type" "load")
> > +   (set_attr "length" "8")])
> > +
> > +;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10
> > +;; immediate is -1/0/1
> > +(define_insn_and_split "*lwa_cmpdi_cr0"
> > +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> > +   (compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m"))
> > +               (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
> > +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> > +   (sign_extend:DI (match_dup 1)))
> > +   ]
> > +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> > +  "lwa %0,%1\;cmpdi %0,%3"
> > +  "&& reload_completed
> > +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> > +       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
> > NON_PREFIXED_DS))"
> > +  [(set (match_dup 0)
> > +   (sign_extend:DI (match_dup 1)))
> > +   (set (match_dup 2)
> > +        (compare:CC (match_dup 0)
> > +               (match_dup 3)))]
> > +  ""
> > +  [(set_attr "type" "load")
> > +   (set_attr "length" "8")])
> > +
> > +;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10
> > +;; immediate is -1/0/1
> > +(define_insn_and_split "*lwa_cmpwi_cr0"
> > +  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
> > +   (compare:CC (match_operand:SI 1 "memory_operand" "m")
> > +               (match_operand:SI 3 "const_m1_to_1_operand" "n")))
> > +   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
> > +   (sign_extend:DI (match_dup 1)))
> > +   ]
> > +  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
> > +  "lwa %0,%1\;cmpwi %0,%3"
> > +  "&& reload_completed
> > +   && (cc_reg_not_cr0_operand (operands[2], CCmode)
> > +       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
> > NON_PREFIXED_DS))"
> > +  [(set (match_dup 0)
> > +   (sign_extend:DI (match_dup 1)))
> > +   (set (match_dup 2)
> > +        (compare:CC (match_dup 0)
> > +               (match_dup 3)))]
> > +  ""
> > +  [(set_attr "type" "load")
> > +   (set_attr "length" "8")])
> > +
> > ;; Split an add that we can't do in one insn into two insns, each of which
> > ;; does one 16-bit part.  This is used by combine.  Note that the low-order
> > ;; add should be last in case the result gets used in an address.

skimmed, no comments on the .md parts.

> > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> > index b2a70e88ca8..63457efb607 100644
> > --- a/gcc/config/rs6000/rs6000.opt
> > +++ b/gcc/config/rs6000/rs6000.opt
> > @@ -479,6 +479,14 @@ mpower8-vector
> > Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
> > Use vector and scalar instructions added in ISA 2.07.
> > 
> > +mpower10-fusion
> > +Target Report Mask(P10_FUSION) Var(rs6000_isa_flags)
> > +Fuse certain integer operations together for better performance on power10.
> > +
> > +mpower10-fusion-ld-cmpi
> > +Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags)
> > +Fuse certain integer operations together for better performance on power10.
> > +
> > mcrypto
> > Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
> > Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
> > -- 
> > 2.18.4
> > 
> 
> 

ok

thanks
-Will



Reply via email to