Hello Segher: Please review and let me know your feedback to submit in trunk.
Thanks & Regards Ajit On 25/02/23 3:20 pm, Ajit Agarwal via Gcc-patches wrote: > Hello All: > > Here is the patch that uses xxlor instead of fmr where possible. > Performance results shows that fmr is better in power9 and > power10 architectures whereas xxlor is better in power7 and > power 8 architectures. fmr is the only option before p7. > > Bootstrapped and regtested on powerpc64-linux-gnu > > Thanks & Regards > Ajit > > rs6000: Use xxlor instead of fmr where possible > > Replaces fmr with xxlor instruction for power7 and power8 > architectures whereas for power9 and power10 keep fmr > instruction. > > Perf measurement results: > > Power9 fmr: 201,847,661 cycles. > Power9 xxlor: 201,877,78 cycles. > Power8 fmr: 200,901,043 cycles. > Power8 xxlor: 201,020,518 cycles. > Power7 fmr: 201,059,524 cycles. > Power7 xxlor: 201,042,851 cycles. > > 2023-02-25 Ajit Kumar Agarwal <aagar...@linux.ibm.com> > > gcc/ChangeLog: > > * config/rs6000/rs6000.md (*movdf_hardfloat64): Use xxlor for power7 > and power8 and fmr for power9 and power10. > --- > gcc/config/rs6000/rs6000.md | 44 +++++++++++++++++++++++-------------- > 1 file changed, 28 insertions(+), 16 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 81bffb04ceb..e101f7f5fc1 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -354,7 +354,7 @@ (define_attr "cpu" > (const (symbol_ref "(enum attr_cpu) rs6000_tune"))) > > ;; The ISA we implement. > -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10" > +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p7p8v,p9,p9v,p9kf,p9tf,p10" > (const_string "any")) > > ;; Is this alternative enabled for the current CPU/ISA/etc.? > @@ -402,6 +402,11 @@ (define_attr "enabled" "" > (and (eq_attr "isa" "p10") > (match_test "TARGET_POWER10")) > (const_int 1) > + > + (and (eq_attr "isa" "p7p8v") > + (match_test "TARGET_VSX && !TARGET_P9_VECTOR")) > + (const_int 1) > + > ] (const_int 0))) > > ;; If this instruction is microcoded on the CELL processor > @@ -8436,27 +8441,29 @@ (define_insn "*mov<mode>_softfloat32" > > (define_insn "*mov<mode>_hardfloat64" > [(set (match_operand:FMOVE64 0 "nonimmediate_operand" > - "=m, d, d, <f64_p9>, wY, > - <f64_av>, Z, <f64_vsx>, <f64_vsx>, !r, > + "=m, d, <f64_vsx>, <f64_p9>, wY, > + <f64_av>, Z, wa, <f64_vsx>, !r, > YZ, r, !r, *c*l, !r, > - *h, r, <f64_dm>, wa") > + *h, r, <f64_dm>, d, wn, > + wa") > (match_operand:FMOVE64 1 "input_operand" > - "d, m, d, wY, <f64_p9>, > - Z, <f64_av>, <f64_vsx>, <zero_fp>, <zero_fp>, > + "d, m, <f64_vsx>, wY, <f64_p9>, > + Z, <f64_av>, wa, <zero_fp>, <zero_fp>, > r, YZ, r, r, *h, > - 0, <f64_dm>, r, eP"))] > + 0, <f64_dm>, r, d, wn, > + eP"))] > "TARGET_POWERPC64 && TARGET_HARD_FLOAT > && (gpc_reg_operand (operands[0], <MODE>mode) > || gpc_reg_operand (operands[1], <MODE>mode))" > "@ > stfd%U0%X0 %1,%0 > lfd%U1%X1 %0,%1 > - fmr %0,%1 > + xxlor %x0,%x1,%x1 > lxsd %0,%1 > stxsd %1,%0 > lxsdx %x0,%y1 > stxsdx %x1,%y0 > - xxlor %x0,%x1,%x1 > + fmr %0,%1 > xxlxor %x0,%x0,%x0 > li %0,0 > std%U0%X0 %1,%0 > @@ -8467,23 +8474,28 @@ (define_insn "*mov<mode>_hardfloat64" > nop > mfvsrd %0,%x1 > mtvsrd %x0,%1 > + fmr %0,%1 > + fmr %0,%1 > #" > [(set_attr "type" > - "fpstore, fpload, fpsimple, fpload, fpstore, > + "fpstore, fpload, veclogical, fpload, fpstore, > fpload, fpstore, veclogical, veclogical, integer, > store, load, *, mtjmpr, mfjmpr, > - *, mfvsr, mtvsr, vecperm") > + *, mfvsr, mtvsr, fpsimple, fpsimple, > + vecperm") > (set_attr "size" "64") > (set_attr "isa" > - "*, *, *, p9v, p9v, > - p7v, p7v, *, *, *, > - *, *, *, *, *, > - *, p8v, p8v, p10") > + "*, *, p7p8v, p9v, p9v, > + p7v, p7v, *, *, *, > + *, *, *, *, *, > + *, p8v, p8v, *, *, > + p10") > (set_attr "prefixed" > "*, *, *, *, *, > *, *, *, *, *, > *, *, *, *, *, > - *, *, *, *")]) > + *, *, *, *, *, > + *")]) > > ;; STD LD MR MT<SPR> MF<SPR> G-const > ;; H-const F-const Special