Hi Mike, Thanks for this patch!
On 11/19/21 8:53 AM, Michael Meissner wrote: > Add power10 zero cycle moves for switches. > > Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero > cycle moves. This code exploits this fusion opportunity. > > I have built bootstrapped compilers with this patch on little endian power9 > and > power10 systems with no regressions. Can I install this into the master > branch? > > 2021-11-19 Michael Meissner <meiss...@the-meissners.org> > > * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add > support for -mpower10-fusion-zero-cycle. > (POWERPC_MASKS): Likewise. > * config/rs6000/rs6000.c (rs6000_option_override_internal): > Likewise. > * config/rs6000/rs6000.md (indirect_jump): Support zero cycle > moves. > (indirect_jump<mode>_zero_cycle): New insns. > (tablejump<mode>_normal): Likewise. > (tablejump<mode>_absolute): Likewise. > (tablejump<mode>_insn_zero_cycle): New insn. > * config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New > debug switch. > --- > gcc/config/rs6000/rs6000-cpus.def | 4 ++- > gcc/config/rs6000/rs6000.c | 4 +++ > gcc/config/rs6000/rs6000.md | 52 ++++++++++++++++++++++++++++--- > gcc/config/rs6000/rs6000.opt | 4 +++ > 4 files changed, 59 insertions(+), 5 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000-cpus.def > b/gcc/config/rs6000/rs6000-cpus.def > index f5812da0184..cc072ee94ea 100644 > --- a/gcc/config/rs6000/rs6000-cpus.def > +++ b/gcc/config/rs6000/rs6000-cpus.def > @@ -91,7 +91,8 @@ > | OPTION_MASK_P10_FUSION_LOGADD \ > | OPTION_MASK_P10_FUSION_ADDLOG \ > | OPTION_MASK_P10_FUSION_2ADD \ > - | OPTION_MASK_P10_FUSION_2STORE) > + | OPTION_MASK_P10_FUSION_2STORE \ > + | OPTION_MASK_P10_FUSION_ZERO_CYCLE) I guess it's fine to introduce one more for now, but ultimately we want all these to get collapsed down to one. No worries from me. > > /* Flags that need to be turned off if -mno-power9-vector. */ > #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW > \ > @@ -145,6 +146,7 @@ > | OPTION_MASK_P10_FUSION_ADDLOG \ > | OPTION_MASK_P10_FUSION_2ADD \ > | OPTION_MASK_P10_FUSION_2STORE \ > + | OPTION_MASK_P10_FUSION_ZERO_CYCLE \ > | OPTION_MASK_HTM \ > | OPTION_MASK_ISEL \ > | OPTION_MASK_MFCRF \ > diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c > index e4843eb0f1c..6780304a5eb 100644 > --- a/gcc/config/rs6000/rs6000.c > +++ b/gcc/config/rs6000/rs6000.c > @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p) > && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0) > rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE; > > + if (TARGET_POWER10 > + && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == > 0) > + rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE; > + > /* Turn off vector pair/mma options on non-power10 systems. */ > else if (!TARGET_POWER10 && TARGET_MMA) > { > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 6bec2bddbde..ea41eb4ada3 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump" > emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg)); > DONE; > } > + if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE) > + { > + emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0])); > + DONE; > + } > }) > > (define_insn "*indirect_jump<mode>" > [(set (pc) > (match_operand:P 0 "register_operand" "c,*l"))] > - "rs6000_speculate_indirect_jumps" > + "rs6000_speculate_indirect_jumps > + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" > "b%T0" > [(set_attr "type" "jmpreg")]) > > +(define_insn "@indirect_jump<mode>_zero_cycle" I don't know why this is an "@" pattern, but honestly I don't know why @indirect_jump<mode>_nospec is an "@" pattern either. The documentation for such things is hard for me to understand, so I'm probably just missing something obvious, but I don't immediately see why we would need the @ here. > + [(set (pc) > + (match_operand:P 0 "register_operand" "r,r,!cl")) > + (clobber (match_scratch:P 1 "=c,*l,X"))] Do we need the *l and X alternatives if we're only doing this for mtctr/bctr? > + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION > + && TARGET_P10_FUSION_ZERO_CYCLE" > + "@ > + mt%T1 %0\;b%T1 > + mt%T1 %0\;b%T1 > + b%T0" > + [(set_attr "type" "jmpreg") > + (set_attr "length" "8,8,4")]) > + > (define_insn "@indirect_jump<mode>_nospec" > [(set (pc) (match_operand:P 0 "register_operand" "c,*l")) > (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))] > @@ -13050,7 +13069,11 @@ (define_expand "@tablejump<mode>_normal" > rtx addr = gen_reg_rtx (Pmode); > > emit_insn (gen_add<mode>3 (addr, off, lab)); > - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE > + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1]) > + : gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + > + emit_jump_insn (insn); > DONE; > }) > > @@ -13062,7 +13085,11 @@ (define_expand "@tablejump<mode>_absolute" > rtx addr = gen_reg_rtx (Pmode); > emit_move_insn (addr, operands[0]); > > - emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE > + ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1]) > + : gen_tablejump_insn_normal (Pmode, addr, operands[1])); > + > + emit_jump_insn (insn); > DONE; > }) > > @@ -13107,10 +13134,27 @@ (define_insn "@tablejump<mode>_insn_normal" > [(set (pc) > (match_operand:P 0 "register_operand" "c,*l")) > (use (label_ref (match_operand 1)))] > - "rs6000_speculate_indirect_jumps" > + "rs6000_speculate_indirect_jumps > + && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)" > "b%T0" > [(set_attr "type" "jmpreg")]) > > +;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle > +;; moves on Power10. > +(define_insn "@tablejump<mode>_insn_zero_cycle" Same question about @. > + [(set (pc) > + (match_operand:P 0 "register_operand" "r,r,!cl")) > + (use (label_ref (match_operand 1))) > + (clobber (match_scratch:P 2 "=c,*l,X"))] Same question about 2nd and 3rd alternatives. Otherwise LGTM... over to the maintainers. :) Thanks! Bill > + "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION > + && TARGET_P10_FUSION_ZERO_CYCLE" > + "@ > + mt%T2 %0\;b%T2 > + mt%T2 %0\;b%T2 > + b%T0" > + [(set_attr "type" "jmpreg") > + (set_attr "length" "8,8,4")]) > + > (define_insn "@tablejump<mode>_insn_nospec" > [(set (pc) > (match_operand:P 0 "register_operand" "c,*l")) > diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt > index 9d7878f144a..ba674947557 100644 > --- a/gcc/config/rs6000/rs6000.opt > +++ b/gcc/config/rs6000/rs6000.opt > @@ -518,6 +518,10 @@ mpower10-fusion-2store > Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags) > Fuse certain store operations together for better performance on power10. > > +mpower10-fusion-zero-cycle > +Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags) > +Fuse move to special register and jump for better performance on power10. > + > mcrypto > Target Mask(CRYPTO) Var(rs6000_isa_flags) > Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.