https://gcc.gnu.org/g:42e629408136542b17fc0faf2638231a6b6c4509
commit 42e629408136542b17fc0faf2638231a6b6c4509 Author: Michael Meissner <[email protected]> Date: Fri Feb 13 04:47:03 2026 -0500 Add support for dense math registers #3. This patch completes support for the dense math registes with 512-bit types. A future path will add support for 1,024-bit dense registers. gcc/ 2026-02-13 Michael Meissner <[email protected]> * config/rs6000/mma.md (mma_<acc>): Add support for dense registers. Document which instructions are generated by each insn. (mma_<vv>): Likewise. (mma_<avv>): Likewise. (mma_<pv>): Likewise. (mma_<apv>): Likewise. (mma_<vvi4i4i8>): Likewise. (mma_<avvi4i4i8>): Likewise. (mma_<vvi4i4i2>): Likewise. (mma_<avvi4i4i2): Likewise. (mma_<vvi4i4>): Likewise. (mma_<avvi4i4>): Likewise. (mma_<pvi4i2>): Likewise. (mma_<apvi4i2>): Likewise. (mma_<vvi4i4i4>): Likewise. (mma_<avvi4i4i4>): Likewise. * config/rs6000/rs6000-builtin.cc (rs6000_gimple_fold_mma_builtin): Do not issue a xxmfacc instruction if we support dense math registers. * config/rs6000/rs6000-cpu.def (FUTURE_MASKS_SERVER): If -mcpu=future, turn on -mdense-math. (POWERPC_MASKS): Mark -mdense-math as being set by -mcpu=<xxx> options. Diff: --- gcc/config/rs6000/mma.md | 91 +++++++++++++++++++++++++++---------- gcc/config/rs6000/rs6000-builtin.cc | 5 +- gcc/config/rs6000/rs6000-cpus.def | 2 + 3 files changed, 71 insertions(+), 27 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 6f05f5a35811..9855b446570e 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -488,15 +488,18 @@ DONE; }) -;; MMA instructions that do not use their accumulators as an input, still -;; must not allow their vector operands to overlap the registers used by -;; the accumulator. We enforce this by marking the output as early clobber. +;; If dense math registers are not available, MMA instructions that do +;; not use their accumulators that overlap with FPR registers as an +;; input, still must not allow their vector operands to overlap the +;; registers used by the accumulator. We enforce this by marking the +;; output as early clobber. The prime and de-prime instructions are +;; not needed on systems with dense math registers. (define_insn "mma_<acc>" [(set (match_operand:XO 0 "fpr_reg_operand" "=&d") (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")] MMA_ACC))] - "TARGET_MMA" + "TARGET_MMA && !TARGET_DENSE_MATH" "<acc> %A0" [(set_attr "type" "mma")]) @@ -540,8 +543,12 @@ ;; If dense math registers are not available, these operations will use ;; accumulators that are overlaid on top of the FPR registers. +;; Instructions: +;; xvi4ger8 xvi8ger4 xvi16ger2 xvi16ger2s xvf16ger2 +;; xvbf16ger2 xvf32ger + (define_insn "mma_<vv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_VV))] @@ -549,9 +556,15 @@ "<vv> %A0,%x1,%x2" [(set_attr "type" "mma")]) +;; Instructions: +;; xvi4ger8pp xvi8ger4pp xvi8ger4spp xvi16ger2pp xvi16ger2spp +;; xvf16ger2pp xvf16ger2pn xvf16ger2np xvf16ger2nn xvbf16ger2pp +;; xvbf16ger2pn xvbf16ger2np xvbf16ger2nn xvf32gerpp xvf32gerpn +;; xvf32gernp xvf32gernn + (define_insn "mma_<avv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_AVV))] @@ -559,8 +572,10 @@ "<avv> %A0,%x2,%x3" [(set_attr "type" "mma")]) +;; Instruction: xvf64ger + (define_insn "mma_<pv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")] MMA_PV))] @@ -568,9 +583,11 @@ "<pv> %A0,%x1,%x2" [(set_attr "type" "mma")]) +;; Instructions: xvf64gerpp xvf64gerpn xvf64gernp xvf64gernn + (define_insn "mma_<apv>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")] MMA_APV))] @@ -578,8 +595,10 @@ "<apv> %A0,%x2,%x3" [(set_attr "type" "mma")]) +;; Instruction: pmxvi4ger8 + (define_insn "mma_<vvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -591,9 +610,11 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvi4ger8pp + (define_insn "mma_<avvi4i4i8>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -605,8 +626,11 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: +;; pmxvi16ger2 pmxvi16ger2s pmxvf16ger2 pmxvbf16ger2 + (define_insn "mma_<vvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -618,9 +642,14 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: +;; pmxvi16ger2pp pmxvi16ger2spp pmxvf16ger2pp pmxvf16ger2pn +;; pmxvf16ger2np pmxvf16ger2nn pmxvbf16ger2pp pmxvbf16ger2pn +;; pmxvbf16ger2np pmxvbf16ger2nn + (define_insn "mma_<avvi4i4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -632,8 +661,10 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvf32ger + (define_insn "mma_<vvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -644,9 +675,11 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: pmxvf32gerpp pmxvf32gerpn pmxvf32gernp pmxvf32gernn + (define_insn "mma_<avvi4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -657,8 +690,10 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvf64ger + (define_insn "mma_<pvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -669,9 +704,11 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: pmxvf64gerpp pmxvf64gerpn pmxvf64gernp pmxvf64gernn + (define_insn "mma_<apvi4i2>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:OO 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") @@ -682,8 +719,10 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instruction: pmxvi8ger4 + (define_insn "mma_<vvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:SI 3 "const_0_to_15_operand" "n,n") @@ -695,9 +734,11 @@ [(set_attr "type" "mma") (set_attr "prefixed" "yes")]) +;; Instructions: pmxvi8ger4pp pmxvi8ger4spp + (define_insn "mma_<avvi4i4i4>" - [(set (match_operand:XO 0 "fpr_reg_operand" "=&d,&d") - (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0,0") + [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD") + (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0") (match_operand:V16QI 2 "vsx_register_operand" "v,?wa") (match_operand:V16QI 3 "vsx_register_operand" "v,?wa") (match_operand:SI 4 "const_0_to_15_operand" "n,n") diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 45c88fe063b1..084eaab5b96a 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -1125,8 +1125,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, } /* If we're disassembling an accumulator into a different type, we need - to emit a xxmfacc instruction now, since we cannot do it later. */ - if (fncode == RS6000_BIF_DISASSEMBLE_ACC) + to emit a xxmfacc instruction now, since we cannot do it later. If we + have dense math registers, we don't need to do this. */ + if (fncode == RS6000_BIF_DISASSEMBLE_ACC && !TARGET_DENSE_MATH) { new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL]; new_call = gimple_build_call (new_decl, 1, src); diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index dc67e287672e..3e51848481f4 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -91,6 +91,7 @@ will be fixed in potential future machines. */ #define FUTURE_MASKS_SERVER (POWER11_MASKS_SERVER \ | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \ + | OPTION_MASK_DENSE_MATH \ | OPTION_MASK_FUTURE) /* Flags that need to be turned off if -mno-vsx. */ @@ -124,6 +125,7 @@ | OPTION_MASK_BLOCK_OPS_VECTOR_PAIR \ | OPTION_MASK_CMPB \ | OPTION_MASK_CRYPTO \ + | OPTION_MASK_DENSE_MATH \ | OPTION_MASK_DFP \ | OPTION_MASK_DLMZB \ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
