On Wed, 2021-09-01 at 11:13 -0500, Bill Schmidt via Gcc-patches wrote: > Peter Bergner recently added two new builtins __builtin_vsx_lxvp and > __builtin_vsx_stxvp. These happened to break a pattern in MMA builtins that > I had been using to automate gimple folding of MMA builtins. Previously, > every MMA function that could be folded had an associated internal function > that it was folded into. The LXVP/STXVP builtins are just folded directly > into memory operations. > > Instead of relying on this pattern, this patch adds a new attribute to > builtins called "mmaint," which is set for all MMA builtins that have an > associated internal builtin. The naming convention that adds _INTERNAL to > the builtin index name remains. > > The rest of the patch is just duplicating Peter's patch, using the new > builtin infrastructure. > > 2021-08-23 Bill Schmidt <wschm...@linux.ibm.com> > > gcc/ > * config/rs6000/rs6000-builtin-new.def (ASSEMBLE_ACC): Add mmaint flag. > (ASSEMBLE_PAIR): Likewise. > (BUILD_ACC): Likewise. > (DISASSEMBLE_ACC): Likewise. > (DISASSEMBLE_PAIR): Likewise. > (PMXVBF16GER2): Likewise. > (PMXVBF16GER2NN): Likewise. > (PMXVBF16GER2NP): Likewise. > (PMXVBF16GER2PN): Likewise. > (PMXVBF16GER2PP): Likewise. > (PMXVF16GER2): Likewise. > (PMXVF16GER2NN): Likewise. > (PMXVF16GER2NP): Likewise. > (PMXVF16GER2PN): Likewise. > (PMXVF16GER2PP): Likewise. > (PMXVF32GER): Likewise. > (PMXVF32GERNN): Likewise. > (PMXVF32GERNP): Likewise. > (PMXVF32GERPN): Likewise. > (PMXVF32GERPP): Likewise. > (PMXVF64GER): Likewise. > (PMXVF64GERNN): Likewise. > (PMXVF64GERNP): Likewise. > (PMXVF64GERPN): Likewise. > (PMXVF64GERPP): Likewise. > (PMXVI16GER2): Likewise. > (PMXVI16GER2PP): Likewise. > (PMXVI16GER2S): Likewise. > (PMXVI16GER2SPP): Likewise. > (PMXVI4GER8): Likewise. > (PMXVI4GER8PP): Likewise. > (PMXVI8GER4): Likewise. > (PMXVI8GER4PP): Likewise. > (PMXVI8GER4SPP): Likewise. > (XVBF16GER2): Likewise. > (XVBF16GER2NN): Likewise. > (XVBF16GER2NP): Likewise. > (XVBF16GER2PN): Likewise. > (XVBF16GER2PP): Likewise. > (XVF16GER2): Likewise. > (XVF16GER2NN): Likewise. > (XVF16GER2NP): Likewise. > (XVF16GER2PN): Likewise. > (XVF16GER2PP): Likewise. > (XVF32GER): Likewise. > (XVF32GERNN): Likewise. > (XVF32GERNP): Likewise. > (XVF32GERPN): Likewise. > (XVF32GERPP): Likewise. > (XVF64GER): Likewise. > (XVF64GERNN): Likewise. > (XVF64GERNP): Likewise. > (XVF64GERPN): Likewise. > (XVF64GERPP): Likewise. > (XVI16GER2): Likewise. > (XVI16GER2PP): Likewise. > (XVI16GER2S): Likewise. > (XVI16GER2SPP): Likewise. > (XVI4GER8): Likewise. > (XVI4GER8PP): Likewise. > (XVI8GER4): Likewise. > (XVI8GER4PP): Likewise. > (XVI8GER4SPP): Likewise. > (XXMFACC): Likewise. > (XXMTACC): Likewise. > (XXSETACCZ): Likewise. > (ASSEMBLE_PAIR_V): Likewise. > (BUILD_PAIR): Likewise. > (DISASSEMBLE_PAIR_V): Likewise. > (LXVP): New. > (STXVP): New.
ok > * config/rs6000/rs6000-call.c > (rs6000_gimple_fold_new_mma_builtin): Handle RS6000_BIF_LXVP and > RS6000_BIF_STXVP. > * config/rs6000/rs6000-gen-builtins.c (attrinfo): Add ismmaint. > (parse_bif_attrs): Handle ismmaint. > (write_decls): Add bif_mmaint_bit and bif_is_mmaint. > (write_bif_static_init): Handle ismmaint. ok > --- > gcc/config/rs6000/rs6000-builtin-new.def | 145 ++++++++++++----------- > gcc/config/rs6000/rs6000-call.c | 38 +++++- > gcc/config/rs6000/rs6000-gen-builtins.c | 38 +++--- > 3 files changed, 135 insertions(+), 86 deletions(-) > > diff --git a/gcc/config/rs6000/rs6000-builtin-new.def > b/gcc/config/rs6000/rs6000-builtin-new.def > index a8c6b9e988f..1966516551e 100644 > --- a/gcc/config/rs6000/rs6000-builtin-new.def > +++ b/gcc/config/rs6000/rs6000-builtin-new.def > @@ -129,6 +129,7 @@ > ; mma Needs special handling for MMA > ; quad MMA instruction using a register quad as an input operand > ; pair MMA instruction using a register pair as an input operand > +; mmaint MMA instruction expanding to internal call at GIMPLE time > ; no32bit Not valid for TARGET_32BIT > ; 32bit Requires different handling for TARGET_32BIT > ; cpu This is a "cpu_is" or "cpu_supports" builtin > @@ -3584,415 +3585,421 @@ > > [mma] > void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc); > - ASSEMBLE_ACC nothing {mma} > + ASSEMBLE_ACC nothing {mma,mmaint} > > v512 __builtin_mma_assemble_acc_internal (vuc, vuc, vuc, vuc); > ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma} > > void __builtin_mma_assemble_pair (v256 *, vuc, vuc); > - ASSEMBLE_PAIR nothing {mma} > + ASSEMBLE_PAIR nothing {mma,mmaint} > > v256 __builtin_mma_assemble_pair_internal (vuc, vuc); > ASSEMBLE_PAIR_INTERNAL vsx_assemble_pair {mma} > > void __builtin_mma_build_acc (v512 *, vuc, vuc, vuc, vuc); > - BUILD_ACC nothing {mma} > + BUILD_ACC nothing {mma,mmaint} > > v512 __builtin_mma_build_acc_internal (vuc, vuc, vuc, vuc); > BUILD_ACC_INTERNAL mma_assemble_acc {mma} > > void __builtin_mma_disassemble_acc (void *, v512 *); > - DISASSEMBLE_ACC nothing {mma,quad} > + DISASSEMBLE_ACC nothing {mma,quad,mmaint} > > vuc __builtin_mma_disassemble_acc_internal (v512, const int<2>); > DISASSEMBLE_ACC_INTERNAL mma_disassemble_acc {mma} > > void __builtin_mma_disassemble_pair (void *, v256 *); > - DISASSEMBLE_PAIR nothing {mma,pair} > + DISASSEMBLE_PAIR nothing {mma,pair,mmaint} > > vuc __builtin_mma_disassemble_pair_internal (v256, const int<2>); > DISASSEMBLE_PAIR_INTERNAL vsx_disassemble_pair {mma} > > void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVBF16GER2 nothing {mma} > + PMXVBF16GER2 nothing {mma,mmaint} > > v512 __builtin_mma_pmxvbf16ger2_internal (vuc, vuc, const int<4>, const > int<4>, const int<2>); > PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma} > > void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVBF16GER2NN nothing {mma,quad} > + PMXVBF16GER2NN nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvbf16ger2nn_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad} > > void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVBF16GER2NP nothing {mma,quad} > + PMXVBF16GER2NP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvbf16ger2np_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad} > > void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVBF16GER2PN nothing {mma,quad} > + PMXVBF16GER2PN nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvbf16ger2pn_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad} > > void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVBF16GER2PP nothing {mma,quad} > + PMXVBF16GER2PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvbf16ger2pp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad} > > void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVF16GER2 nothing {mma} > + PMXVF16GER2 nothing {mma,mmaint} > > v512 __builtin_mma_pmxvf16ger2_internal (vuc, vuc, const int<4>, const > int<4>, const int<2>); > PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma} > > void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVF16GER2NN nothing {mma,quad} > + PMXVF16GER2NN nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf16ger2nn_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad} > > void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVF16GER2NP nothing {mma,quad} > + PMXVF16GER2NP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf16ger2np_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad} > > void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVF16GER2PN nothing {mma,quad} > + PMXVF16GER2PN nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf16ger2pn_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad} > > void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVF16GER2PP nothing {mma,quad} > + PMXVF16GER2PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf16ger2pp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad} > > void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const > int<4>); > - PMXVF32GER nothing {mma} > + PMXVF32GER nothing {mma,mmaint} > > v512 __builtin_mma_pmxvf32ger_internal (vuc, vuc, const int<4>, const > int<4>); > PMXVF32GER_INTERNAL mma_pmxvf32ger {mma} > > void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const > int<4>); > - PMXVF32GERNN nothing {mma,quad} > + PMXVF32GERNN nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf32gernn_internal (v512, vuc, vuc, const int<4>, > const int<4>); > PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad} > > void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const > int<4>); > - PMXVF32GERNP nothing {mma,quad} > + PMXVF32GERNP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf32gernp_internal (v512, vuc, vuc, const int<4>, > const int<4>); > PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad} > > void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const > int<4>); > - PMXVF32GERPN nothing {mma,quad} > + PMXVF32GERPN nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf32gerpn_internal (v512, vuc, vuc, const int<4>, > const int<4>); > PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad} > > void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const > int<4>); > - PMXVF32GERPP nothing {mma,quad} > + PMXVF32GERPP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvf32gerpp_internal (v512, vuc, vuc, const int<4>, > const int<4>); > PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad} > > void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const > int<2>); > - PMXVF64GER nothing {mma,pair} > + PMXVF64GER nothing {mma,pair,mmaint} > > v512 __builtin_mma_pmxvf64ger_internal (v256, vuc, const int<4>, const > int<2>); > PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair} > > void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const > int<2>); > - PMXVF64GERNN nothing {mma,pair,quad} > + PMXVF64GERNN nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_pmxvf64gernn_internal (v512, v256, vuc, const int<4>, > const int<2>); > PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad} > > void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const > int<2>); > - PMXVF64GERNP nothing {mma,pair,quad} > + PMXVF64GERNP nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_pmxvf64gernp_internal (v512, v256, vuc, const int<4>, > const int<2>); > PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad} > > void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const > int<2>); > - PMXVF64GERPN nothing {mma,pair,quad} > + PMXVF64GERPN nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_pmxvf64gerpn_internal (v512, v256, vuc, const int<4>, > const int<2>); > PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad} > > void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const > int<2>); > - PMXVF64GERPP nothing {mma,pair,quad} > + PMXVF64GERPP nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_pmxvf64gerpp_internal (v512, v256, vuc, const int<4>, > const int<2>); > PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad} > > void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVI16GER2 nothing {mma} > + PMXVI16GER2 nothing {mma,mmaint} > > v512 __builtin_mma_pmxvi16ger2_internal (vuc, vuc, const int<4>, const > int<4>, const int<2>); > PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma} > > void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVI16GER2PP nothing {mma,quad} > + PMXVI16GER2PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvi16ger2pp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad} > > void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVI16GER2S nothing {mma} > + PMXVI16GER2S nothing {mma,mmaint} > > v512 __builtin_mma_pmxvi16ger2s_internal (vuc, vuc, const int<4>, const > int<4>, const int<2>); > PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma} > > void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<2>); > - PMXVI16GER2SPP nothing {mma,quad} > + PMXVI16GER2SPP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvi16ger2spp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<2>); > PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad} > > void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<8>); > - PMXVI4GER8 nothing {mma} > + PMXVI4GER8 nothing {mma,mmaint} > > v512 __builtin_mma_pmxvi4ger8_internal (vuc, vuc, const int<4>, const > int<4>, const int<8>); > PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma} > > void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<4>); > - PMXVI4GER8PP nothing {mma,quad} > + PMXVI4GER8PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvi4ger8pp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<4>); > PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad} > > void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<4>); > - PMXVI8GER4 nothing {mma} > + PMXVI8GER4 nothing {mma,mmaint} > > v512 __builtin_mma_pmxvi8ger4_internal (vuc, vuc, const int<4>, const > int<4>, const int<4>); > PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma} > > void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<4>); > - PMXVI8GER4PP nothing {mma,quad} > + PMXVI8GER4PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvi8ger4pp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<4>); > PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad} > > void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const > int<4>, const int<4>); > - PMXVI8GER4SPP nothing {mma,quad} > + PMXVI8GER4SPP nothing {mma,quad,mmaint} > > v512 __builtin_mma_pmxvi8ger4spp_internal (v512, vuc, vuc, const int<4>, > const int<4>, const int<4>); > PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad} > > void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc); > - XVBF16GER2 nothing {mma} > + XVBF16GER2 nothing {mma,mmaint} > > v512 __builtin_mma_xvbf16ger2_internal (vuc, vuc); > XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma} > > void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc); > - XVBF16GER2NN nothing {mma,quad} > + XVBF16GER2NN nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvbf16ger2nn_internal (v512, vuc, vuc); > XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad} > > void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc); > - XVBF16GER2NP nothing {mma,quad} > + XVBF16GER2NP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvbf16ger2np_internal (v512, vuc, vuc); > XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad} > > void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc); > - XVBF16GER2PN nothing {mma,quad} > + XVBF16GER2PN nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvbf16ger2pn_internal (v512, vuc, vuc); > XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad} > > void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc); > - XVBF16GER2PP nothing {mma,quad} > + XVBF16GER2PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvbf16ger2pp_internal (v512, vuc, vuc); > XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad} > > void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc); > - XVF16GER2 nothing {mma} > + XVF16GER2 nothing {mma,mmaint} > > v512 __builtin_mma_xvf16ger2_internal (vuc, vuc); > XVF16GER2_INTERNAL mma_xvf16ger2 {mma} > > void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc); > - XVF16GER2NN nothing {mma,quad} > + XVF16GER2NN nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf16ger2nn_internal (v512, vuc, vuc); > XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad} > > void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc); > - XVF16GER2NP nothing {mma,quad} > + XVF16GER2NP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf16ger2np_internal (v512, vuc, vuc); > XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad} > > void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc); > - XVF16GER2PN nothing {mma,quad} > + XVF16GER2PN nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf16ger2pn_internal (v512, vuc, vuc); > XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad} > > void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc); > - XVF16GER2PP nothing {mma,quad} > + XVF16GER2PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf16ger2pp_internal (v512, vuc, vuc); > XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad} > > void __builtin_mma_xvf32ger (v512 *, vuc, vuc); > - XVF32GER nothing {mma} > + XVF32GER nothing {mma,mmaint} > > v512 __builtin_mma_xvf32ger_internal (vuc, vuc); > XVF32GER_INTERNAL mma_xvf32ger {mma} > > void __builtin_mma_xvf32gernn (v512 *, vuc, vuc); > - XVF32GERNN nothing {mma,quad} > + XVF32GERNN nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf32gernn_internal (v512, vuc, vuc); > XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad} > > void __builtin_mma_xvf32gernp (v512 *, vuc, vuc); > - XVF32GERNP nothing {mma,quad} > + XVF32GERNP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf32gernp_internal (v512, vuc, vuc); > XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad} > > void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc); > - XVF32GERPN nothing {mma,quad} > + XVF32GERPN nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf32gerpn_internal (v512, vuc, vuc); > XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad} > > void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc); > - XVF32GERPP nothing {mma,quad} > + XVF32GERPP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvf32gerpp_internal (v512, vuc, vuc); > XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad} > > void __builtin_mma_xvf64ger (v512 *, v256, vuc); > - XVF64GER nothing {mma,pair} > + XVF64GER nothing {mma,pair,mmaint} > > v512 __builtin_mma_xvf64ger_internal (v256, vuc); > XVF64GER_INTERNAL mma_xvf64ger {mma,pair} > > void __builtin_mma_xvf64gernn (v512 *, v256, vuc); > - XVF64GERNN nothing {mma,pair,quad} > + XVF64GERNN nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_xvf64gernn_internal (v512, v256, vuc); > XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad} > > void __builtin_mma_xvf64gernp (v512 *, v256, vuc); > - XVF64GERNP nothing {mma,pair,quad} > + XVF64GERNP nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_xvf64gernp_internal (v512, v256, vuc); > XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad} > > void __builtin_mma_xvf64gerpn (v512 *, v256, vuc); > - XVF64GERPN nothing {mma,pair,quad} > + XVF64GERPN nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_xvf64gerpn_internal (v512, v256, vuc); > XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad} > > void __builtin_mma_xvf64gerpp (v512 *, v256, vuc); > - XVF64GERPP nothing {mma,pair,quad} > + XVF64GERPP nothing {mma,pair,quad,mmaint} > > v512 __builtin_mma_xvf64gerpp_internal (v512, v256, vuc); > XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad} > > void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc); > - XVI16GER2 nothing {mma} > + XVI16GER2 nothing {mma,mmaint} > > v512 __builtin_mma_xvi16ger2_internal (vuc, vuc); > XVI16GER2_INTERNAL mma_xvi16ger2 {mma} > > void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc); > - XVI16GER2PP nothing {mma,quad} > + XVI16GER2PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvi16ger2pp_internal (v512, vuc, vuc); > XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad} > > void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc); > - XVI16GER2S nothing {mma} > + XVI16GER2S nothing {mma,mmaint} > > v512 __builtin_mma_xvi16ger2s_internal (vuc, vuc); > XVI16GER2S_INTERNAL mma_xvi16ger2s {mma} > > void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc); > - XVI16GER2SPP nothing {mma,quad} > + XVI16GER2SPP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvi16ger2spp_internal (v512, vuc, vuc); > XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad} > > void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc); > - XVI4GER8 nothing {mma} > + XVI4GER8 nothing {mma,mmaint} > > v512 __builtin_mma_xvi4ger8_internal (vuc, vuc); > XVI4GER8_INTERNAL mma_xvi4ger8 {mma} > > void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc); > - XVI4GER8PP nothing {mma,quad} > + XVI4GER8PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvi4ger8pp_internal (v512, vuc, vuc); > XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad} > > void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc); > - XVI8GER4 nothing {mma} > + XVI8GER4 nothing {mma,mmaint} > > v512 __builtin_mma_xvi8ger4_internal (vuc, vuc); > XVI8GER4_INTERNAL mma_xvi8ger4 {mma} > > void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc); > - XVI8GER4PP nothing {mma,quad} > + XVI8GER4PP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvi8ger4pp_internal (v512, vuc, vuc); > XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad} > > void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc); > - XVI8GER4SPP nothing {mma,quad} > + XVI8GER4SPP nothing {mma,quad,mmaint} > > v512 __builtin_mma_xvi8ger4spp_internal (v512, vuc, vuc); > XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad} > > void __builtin_mma_xxmfacc (v512 *); > - XXMFACC nothing {mma,quad} > + XXMFACC nothing {mma,quad,mmaint} > > v512 __builtin_mma_xxmfacc_internal (v512); > XXMFACC_INTERNAL mma_xxmfacc {mma,quad} > > void __builtin_mma_xxmtacc (v512 *); > - XXMTACC nothing {mma,quad} > + XXMTACC nothing {mma,quad,mmaint} > > v512 __builtin_mma_xxmtacc_internal (v512); > XXMTACC_INTERNAL mma_xxmtacc {mma,quad} > > void __builtin_mma_xxsetaccz (v512 *); > - XXSETACCZ nothing {mma} > + XXSETACCZ nothing {mma,mmaint} > > v512 __builtin_mma_xxsetaccz_internal (); > XXSETACCZ_INTERNAL mma_xxsetaccz {mma} > > void __builtin_vsx_assemble_pair (v256 *, vuc, vuc); > - ASSEMBLE_PAIR_V nothing {mma} > + ASSEMBLE_PAIR_V nothing {mma,mmaint} > > v256 __builtin_vsx_assemble_pair_internal (vuc, vuc); > ASSEMBLE_PAIR_V_INTERNAL vsx_assemble_pair {mma} > > void __builtin_vsx_build_pair (v256 *, vuc, vuc); > - BUILD_PAIR nothing {mma} > + BUILD_PAIR nothing {mma,mmaint} > > v256 __builtin_vsx_build_pair_internal (vuc, vuc); > BUILD_PAIR_INTERNAL vsx_assemble_pair {mma} > > void __builtin_vsx_disassemble_pair (void *, v256 *); > - DISASSEMBLE_PAIR_V nothing {mma,pair} > + DISASSEMBLE_PAIR_V nothing {mma,pair,mmaint} > > vuc __builtin_vsx_disassemble_pair_internal (v256, const int<2>); > DISASSEMBLE_PAIR_V_INTERNAL vsx_disassemble_pair {mma} > + > + v256 __builtin_vsx_lxvp (unsigned long, const v256 *); > + LXVP nothing {mma} > + > + void __builtin_vsx_stxvp (v256, unsigned long, const v256 *); > + STXVP nothing {mma,pair} > diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c > index eae4e15df1e..558f06cfd6c 100644 > --- a/gcc/config/rs6000/rs6000-call.c > +++ b/gcc/config/rs6000/rs6000-call.c > @@ -13072,8 +13072,10 @@ rs6000_gimple_fold_new_mma_builtin > (gimple_stmt_iterator *gsi, > > /* Each call that can be gimple-expanded has an associated built-in > function that it will expand into. If this one doesn't, we have > - already expanded it! */ > - if (rs6000_builtin_info_x[fncode].assoc_bif == RS6000_BIF_NONE) > + already expanded it! Exceptions: lxvp and stxvp. */ > + if (rs6000_builtin_info_x[fncode].assoc_bif == RS6000_BIF_NONE > + && fncode != RS6000_BIF_LXVP > + && fncode != RS6000_BIF_STXVP) > return false; > > bifdata *bd = &rs6000_builtin_info_x[fncode]; > @@ -13150,6 +13152,38 @@ rs6000_gimple_fold_new_mma_builtin > (gimple_stmt_iterator *gsi, > gsi_replace_with_seq (gsi, new_seq, true); > return true; > } > + else if (fncode == RS6000_BIF_LXVP) > + { > + push_gimplify_context (true); > + tree offset = gimple_call_arg (stmt, 0); > + tree ptr = gimple_call_arg (stmt, 1); > + tree lhs = gimple_call_lhs (stmt); > + if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) > + ptr = build1 (VIEW_CONVERT_EXPR, > + build_pointer_type (vector_pair_type_node), ptr); > + tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, > + TREE_TYPE (ptr), ptr, offset)); > + gimplify_assign (lhs, mem, &new_seq); > + pop_gimplify_context (NULL); > + gsi_replace_with_seq (gsi, new_seq, true); > + return true; > + } > + else if (fncode == RS6000_BIF_STXVP) > + { > + push_gimplify_context (true); > + tree src = gimple_call_arg (stmt, 0); > + tree offset = gimple_call_arg (stmt, 1); > + tree ptr = gimple_call_arg (stmt, 2); > + if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) > + ptr = build1 (VIEW_CONVERT_EXPR, > + build_pointer_type (vector_pair_type_node), ptr); > + tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, > + TREE_TYPE (ptr), ptr, offset)); > + gimplify_assign (mem, src, &new_seq); > + pop_gimplify_context (NULL); > + gsi_replace_with_seq (gsi, new_seq, true); > + return true; > + } > ok > /* Convert this built-in into an internal version that uses pass-by-value > arguments. The internal built-in is found in the assoc_bif field. */ > diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c > b/gcc/config/rs6000/rs6000-gen-builtins.c > index f65932e1cd5..7f711210aff 100644 > --- a/gcc/config/rs6000/rs6000-gen-builtins.c > +++ b/gcc/config/rs6000/rs6000-gen-builtins.c > @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see > mma Needs special handling for MMA instructions > quad MMA instruction using a register quad as an input operand > pair MMA instruction using a register pair as an input operand > + mmaint MMA instruction expanding to internal call at GIMPLE time > no32bit Not valid for TARGET_32BIT > 32bit Requires different handling for TARGET_32BIT > cpu This is a "cpu_is" or "cpu_supports" builtin > @@ -369,6 +370,7 @@ struct attrinfo > bool ismma; > bool isquad; > bool ispair; > + bool ismmaint; > bool isno32bit; > bool is32bit; > bool iscpu; > @@ -1363,6 +1365,8 @@ parse_bif_attrs (attrinfo *attrptr) > attrptr->isquad = 1; > else if (!strcmp (attrname, "pair")) > attrptr->ispair = 1; > + else if (!strcmp (attrname, "mmaint")) > + attrptr->ismmaint = 1; > else if (!strcmp (attrname, "no32bit")) > attrptr->isno32bit = 1; > else if (!strcmp (attrname, "32bit")) > @@ -1409,15 +1413,15 @@ parse_bif_attrs (attrinfo *attrptr) > (*diag) ("attribute set: init = %d, set = %d, extract = %d, nosoft = %d, " > "ldvec = %d, stvec = %d, reve = %d, pred = %d, htm = %d, " > "htmspr = %d, htmcr = %d, mma = %d, quad = %d, pair = %d, " > - "no32bit = %d, 32bit = %d, cpu = %d, ldstmask = %d, lxvrse = %d, " > - "lxvrze = %d, endian = %d.\n", > + "mmaint = %d, no32bit = %d, 32bit = %d, cpu = %d, ldstmask = %d, " > + "lxvrse = %d, lxvrze = %d, endian = %d.\n", > attrptr->isinit, attrptr->isset, attrptr->isextract, > attrptr->isnosoft, attrptr->isldvec, attrptr->isstvec, > attrptr->isreve, attrptr->ispred, attrptr->ishtm, attrptr->ishtmspr, > attrptr->ishtmcr, attrptr->ismma, attrptr->isquad, attrptr->ispair, > - attrptr->isno32bit, attrptr->is32bit, attrptr->iscpu, > - attrptr->isldstmask, attrptr->islxvrse, attrptr->islxvrze, > - attrptr->isendian); > + attrptr->ismmaint, attrptr->isno32bit, attrptr->is32bit, > + attrptr->iscpu, attrptr->isldstmask, attrptr->islxvrse, > + attrptr->islxvrze, attrptr->isendian); > #endif > > return PC_OK; > @@ -2223,13 +2227,14 @@ write_decls (void) > fprintf (header_file, "#define bif_mma_bit\t\t(0x00000800)\n"); > fprintf (header_file, "#define bif_quad_bit\t\t(0x00001000)\n"); > fprintf (header_file, "#define bif_pair_bit\t\t(0x00002000)\n"); > - fprintf (header_file, "#define bif_no32bit_bit\t\t(0x00004000)\n"); > - fprintf (header_file, "#define bif_32bit_bit\t\t(0x00008000)\n"); > - fprintf (header_file, "#define bif_cpu_bit\t\t(0x00010000)\n"); > - fprintf (header_file, "#define bif_ldstmask_bit\t(0x00020000)\n"); > - fprintf (header_file, "#define bif_lxvrse_bit\t\t(0x00040000)\n"); > - fprintf (header_file, "#define bif_lxvrze_bit\t\t(0x00080000)\n"); > - fprintf (header_file, "#define bif_endian_bit\t\t(0x00100000)\n"); > + fprintf (header_file, "#define bif_mmaint_bit\t\t(0x00004000)\n"); > + fprintf (header_file, "#define bif_no32bit_bit\t\t(0x00008000)\n"); > + fprintf (header_file, "#define bif_32bit_bit\t\t(0x00010000)\n"); > + fprintf (header_file, "#define bif_cpu_bit\t\t(0x00020000)\n"); > + fprintf (header_file, "#define bif_ldstmask_bit\t(0x00040000)\n"); > + fprintf (header_file, "#define bif_lxvrse_bit\t\t(0x00080000)\n"); > + fprintf (header_file, "#define bif_lxvrze_bit\t\t(0x00100000)\n"); > + fprintf (header_file, "#define bif_endian_bit\t\t(0x00200000)\n"); > fprintf (header_file, "\n"); ok > fprintf (header_file, > "#define bif_is_init(x)\t\t((x).bifattrs & bif_init_bit)\n"); > @@ -2259,6 +2264,8 @@ write_decls (void) > "#define bif_is_quad(x)\t\t((x).bifattrs & bif_quad_bit)\n"); > fprintf (header_file, > "#define bif_is_pair(x)\t\t((x).bifattrs & bif_pair_bit)\n"); > + fprintf (header_file, > + "#define bif_is_mmaint(x)\t\t((x).bifattrs & bif_mmaint_bit)\n"); > fprintf (header_file, > "#define bif_is_no32bit(x)\t((x).bifattrs & bif_no32bit_bit)\n"); > fprintf (header_file, > @@ -2491,6 +2498,8 @@ write_bif_static_init (void) > fprintf (init_file, " | bif_quad_bit"); > if (bifp->attrs.ispair) > fprintf (init_file, " | bif_pair_bit"); > + if (bifp->attrs.ismmaint) > + fprintf (init_file, " | bif_mmaint_bit"); > if (bifp->attrs.isno32bit) > fprintf (init_file, " | bif_no32bit_bit"); > if (bifp->attrs.is32bit) > @@ -2537,10 +2546,9 @@ write_bif_static_init (void) > : (bifp->kind == FNK_PURE ? "= pure" > : (bifp->kind == FNK_FPMATH ? "= fp, const" > : "")))); > - bool no_icode = !strcmp (bifp->patname, "nothing"); > fprintf (init_file, " /* assoc_bif */\tRS6000_BIF_%s%s\n", > - bifp->attrs.ismma && no_icode ? bifp->idname : "NONE", > - bifp->attrs.ismma && no_icode ? "_INTERNAL" : ""); > + bifp->attrs.ismmaint ? bifp->idname : "NONE", > + bifp->attrs.ismmaint ? "_INTERNAL" : ""); Ok. (I did look, it does appear there there are other references to .ismma, so this is simply a different flag, versus a replacement/rename.). > fprintf (init_file, " },\n"); > } > fprintf (init_file, " };\n\n"); lgtm, thanks -Will