Hi Mike,

Thanks for this patch!

On 11/19/21 8:53 AM, Michael Meissner wrote:
> Add power10 zero cycle moves for switches.
>
> Power10 will fuse adjacenet 'mtctr' and 'bctr' instructions to form zero
> cycle moves.  This code exploits this fusion opportunity.
>
> I have built bootstrapped compilers with this patch on little endian power9 
> and
> power10 systems with no regressions.  Can I install this into the master
> branch?
>
> 2021-11-19  Michael Meissner  <meiss...@the-meissners.org>
>
>       * config/rs6000/rs6000-cpus.def (ISA_3_1_MASKS_SERVER): Add
>       support for -mpower10-fusion-zero-cycle.
>       (POWERPC_MASKS): Likewise.
>       * config/rs6000/rs6000.c (rs6000_option_override_internal):
>       Likewise.
>       * config/rs6000/rs6000.md (indirect_jump): Support zero cycle
>       moves.
>       (indirect_jump<mode>_zero_cycle): New insns.
>       (tablejump<mode>_normal): Likewise.
>       (tablejump<mode>_absolute): Likewise.
>       (tablejump<mode>_insn_zero_cycle): New insn.
>       * config/rs6000/rs6000.opt (-mpower10-fusion-zero-cycle): New
>       debug switch.
> ---
>  gcc/config/rs6000/rs6000-cpus.def |  4 ++-
>  gcc/config/rs6000/rs6000.c        |  4 +++
>  gcc/config/rs6000/rs6000.md       | 52 ++++++++++++++++++++++++++++---
>  gcc/config/rs6000/rs6000.opt      |  4 +++
>  4 files changed, 59 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/config/rs6000/rs6000-cpus.def 
> b/gcc/config/rs6000/rs6000-cpus.def
> index f5812da0184..cc072ee94ea 100644
> --- a/gcc/config/rs6000/rs6000-cpus.def
> +++ b/gcc/config/rs6000/rs6000-cpus.def
> @@ -91,7 +91,8 @@
>                                | OPTION_MASK_P10_FUSION_LOGADD        \
>                                | OPTION_MASK_P10_FUSION_ADDLOG        \
>                                | OPTION_MASK_P10_FUSION_2ADD          \
> -                              | OPTION_MASK_P10_FUSION_2STORE)
> +                              | OPTION_MASK_P10_FUSION_2STORE        \
> +                              | OPTION_MASK_P10_FUSION_ZERO_CYCLE)

I guess it's fine to introduce one more for now, but ultimately we want
all these to get collapsed down to one.  No worries from me.

>  
>  /* Flags that need to be turned off if -mno-power9-vector.  */
>  #define OTHER_P9_VECTOR_MASKS        (OPTION_MASK_FLOAT128_HW                
> \
> @@ -145,6 +146,7 @@
>                                | OPTION_MASK_P10_FUSION_ADDLOG        \
>                                | OPTION_MASK_P10_FUSION_2ADD          \
>                                | OPTION_MASK_P10_FUSION_2STORE        \
> +                              | OPTION_MASK_P10_FUSION_ZERO_CYCLE    \
>                                | OPTION_MASK_HTM                      \
>                                | OPTION_MASK_ISEL                     \
>                                | OPTION_MASK_MFCRF                    \
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index e4843eb0f1c..6780304a5eb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4497,6 +4497,10 @@ rs6000_option_override_internal (bool global_init_p)
>        && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0)
>      rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE;
>  
> +  if (TARGET_POWER10
> +      && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ZERO_CYCLE) == 
> 0)
> +    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ZERO_CYCLE;
> +
>    /* Turn off vector pair/mma options on non-power10 systems.  */
>    else if (!TARGET_POWER10 && TARGET_MMA)
>      {
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..ea41eb4ada3 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -12988,15 +12988,34 @@ (define_expand "indirect_jump"
>      emit_jump_insn (gen_indirect_jump_nospec (Pmode, operands[0], ccreg));
>      DONE;
>    }
> +  if (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)
> +    {
> +      emit_jump_insn (gen_indirect_jump_zero_cycle (Pmode, operands[0]));
> +      DONE;
> +    }
>  })
>  
>  (define_insn "*indirect_jump<mode>"
>    [(set (pc)
>       (match_operand:P 0 "register_operand" "c,*l"))]
> -  "rs6000_speculate_indirect_jumps"
> +  "rs6000_speculate_indirect_jumps
> +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
>    "b%T0"
>    [(set_attr "type" "jmpreg")])
>  
> +(define_insn "@indirect_jump<mode>_zero_cycle"

I don't know why this is an "@" pattern, but honestly I don't
know why @indirect_jump<mode>_nospec is an "@" pattern either.
The documentation for such things is hard for me to understand,
so I'm probably just missing something obvious, but I don't
immediately see why we would need the @ here.

> +  [(set (pc)
> +     (match_operand:P 0 "register_operand" "r,r,!cl"))
> +   (clobber (match_scratch:P 1 "=c,*l,X"))]

Do we need the *l and X alternatives if we're only doing this for
mtctr/bctr?

> +  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> +   && TARGET_P10_FUSION_ZERO_CYCLE"
> +  "@
> +   mt%T1 %0\;b%T1
> +   mt%T1 %0\;b%T1
> +   b%T0"
> +  [(set_attr "type" "jmpreg")
> +   (set_attr "length" "8,8,4")])
> +
>  (define_insn "@indirect_jump<mode>_nospec"
>    [(set (pc) (match_operand:P 0 "register_operand" "c,*l"))
>     (clobber (match_operand:CC 1 "cc_reg_operand" "=y,y"))]
> @@ -13050,7 +13069,11 @@ (define_expand "@tablejump<mode>_normal"
>    rtx addr = gen_reg_rtx (Pmode);
>  
>    emit_insn (gen_add<mode>3 (addr, off, lab));
> -  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +  rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
> +           ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
> +           : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +
> +  emit_jump_insn (insn);
>    DONE;
>  })
>  
> @@ -13062,7 +13085,11 @@ (define_expand "@tablejump<mode>_absolute"
>    rtx addr = gen_reg_rtx (Pmode);
>    emit_move_insn (addr, operands[0]);
>  
> -  emit_jump_insn (gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +  rtx insn = (TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE
> +           ? gen_tablejump_insn_zero_cycle (Pmode, addr, operands[1])
> +           : gen_tablejump_insn_normal (Pmode, addr, operands[1]));
> +
> +  emit_jump_insn (insn);
>    DONE;
>  })
>  
> @@ -13107,10 +13134,27 @@ (define_insn "@tablejump<mode>_insn_normal"
>    [(set (pc)
>       (match_operand:P 0 "register_operand" "c,*l"))
>     (use (label_ref (match_operand 1)))]
> -  "rs6000_speculate_indirect_jumps"
> +  "rs6000_speculate_indirect_jumps
> +   && !(TARGET_P10_FUSION && TARGET_P10_FUSION_ZERO_CYCLE)"
>    "b%T0"
>    [(set_attr "type" "jmpreg")])
>  
> +;; Version of indirect jump that fuses the mtctr to bctr to achieve 0 cycle
> +;; moves on Power10.
> +(define_insn "@tablejump<mode>_insn_zero_cycle"

Same question about @.

> +  [(set (pc)
> +     (match_operand:P 0 "register_operand" "r,r,!cl"))
> +   (use (label_ref (match_operand 1)))
> +   (clobber (match_scratch:P 2 "=c,*l,X"))]

Same question about 2nd and 3rd alternatives.

Otherwise LGTM... over to the maintainers. :)

Thanks!
Bill

> +  "rs6000_speculate_indirect_jumps && TARGET_P10_FUSION
> +   && TARGET_P10_FUSION_ZERO_CYCLE"
> +  "@
> +   mt%T2 %0\;b%T2
> +   mt%T2 %0\;b%T2
> +   b%T0"
> +  [(set_attr "type" "jmpreg")
> +   (set_attr "length" "8,8,4")])
> +
>  (define_insn "@tablejump<mode>_insn_nospec"
>    [(set (pc)
>       (match_operand:P 0 "register_operand" "c,*l"))
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index 9d7878f144a..ba674947557 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -518,6 +518,10 @@ mpower10-fusion-2store
>  Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags)
>  Fuse certain store operations together for better performance on power10.
>  
> +mpower10-fusion-zero-cycle
> +Target Undocumented Mask(P10_FUSION_ZERO_CYCLE) Var(rs6000_isa_flags)
> +Fuse move to special register and jump for better performance on power10.
> +
>  mcrypto
>  Target Mask(CRYPTO) Var(rs6000_isa_flags)
>  Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.

Reply via email to