https://gcc.gnu.org/g:e280d42199f9fffb196fc4fa51ca3801dffa15e2
commit e280d42199f9fffb196fc4fa51ca3801dffa15e2 Author: Surya Kumari Jangala <jskum...@linux.ibm.com> Date: Mon Jul 14 07:05:58 2025 -0500 MMA+: Add support for dmr disassemble builtins Add support for __builtin_mma_dmr_extract512 and __builtin_mma_disassemble_dmr. Diff: --- gcc/config/rs6000/mma.md | 1 + gcc/config/rs6000/rs6000-builtin.cc | 73 +++++++++++++++++++++- gcc/config/rs6000/rs6000-builtins.def | 9 +++ .../gcc.target/powerpc/dmf-disassemble-dmr.c | 22 +++++++ gcc/testsuite/gcc.target/powerpc/dmf-extract512.c | 18 ++++++ 5 files changed, 121 insertions(+), 2 deletions(-) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index 14f33724d69c..6de45acd4175 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -95,6 +95,7 @@ UNSPEC_DM_INSERT512_LOWER UNSPEC_DM_INSERT1024 UNSPEC_DM_EXTRACT512 + UNSPEC_DM_EXTRACT1024 UNSPEC_DMR_RELOAD_FROM_MEMORY UNSPEC_DMR_RELOAD_TO_MEMORY UNSPEC_DMF_DMXOR diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 00c1a6687101..a27499d20a65 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -1107,11 +1107,12 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, /* Each call that can be gimple-expanded has an associated built-in function that it will expand into. If this one doesn't, we have - already expanded it! Exceptions: lxvp and stxvp. */ + already expanded it! Exceptions: lxvp, stxvp and disassemble_dmr. */ if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE && fncode != RS6000_BIF_LXVP && fncode != RS6000_BIF_STXVP - && fncode != RS6000_BIF_DMMR) + && fncode != RS6000_BIF_DMMR + && fncode != RS6000_BIF_DISASSEMBLE_DMR) return false; bifdata *bd = &rs6000_builtin_info[fncode]; @@ -1119,6 +1120,74 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, gimple *new_call; tree new_decl; + if (fncode == RS6000_BIF_DM_EXTRACT512 + || fncode == RS6000_BIF_DISASSEMBLE_DMR) + { + unsigned num_extract512; + push_gimplify_context (true); + tree dst_ptr = gimple_call_arg (stmt, 0); + tree src_ptr = gimple_call_arg (stmt, 1); + tree src_type = build_pointer_type (dmr_type_node); + + if (TREE_TYPE (src_ptr) != src_type) + src_ptr = build1 (NOP_EXPR, src_type, src_ptr); + + // the following code will ensure we are sending *src as parameter + tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type)); + gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq); + + // now we should call the internal builtin RS6000_BIF_DM_EXTRACT512_INTERNAL + if (fncode == RS6000_BIF_DISASSEMBLE_DMR) + num_extract512 = 2; + else + num_extract512 = 1; + + tree extract_decl = rs6000_builtin_decls[RS6000_BIF_DM_EXTRACT512_INTERNAL]; + + for (unsigned i = 0; i < num_extract512; i++) + { + tree const_arg; + if (fncode == RS6000_BIF_DISASSEMBLE_DMR) + const_arg = build_int_cstu (uint16_type_node, i); + else + const_arg = gimple_call_arg (stmt, 2); + + // create call + new_call = gimple_build_call (extract_decl, 2, src, const_arg); + // create a tmp reg to denote lhs of call + tree lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node); + + // lhs = new_call + gimple_call_set_lhs (new_call, lhs); + + // add gimple stmt to gimple sequence + gimple_seq_add_stmt (&new_seq, new_call); + + // Now lhs contains the 512-bit value in vector_quad. We have to now + // split up the vector_quad into individual vectors + + new_decl = rs6000_builtin_decls[RS6000_BIF_DISASSEMBLE_ACC_INTERNAL]; + tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node, + ptr_mode, true); + + tree dst_base = build1 (NOP_EXPR, dst_type, dst_ptr); + for (unsigned j = 0; j < 4; j++) + { + tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base, + build_int_cst (dst_type, j * 16 + i * 64)); + tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node); + new_call = gimple_build_call (new_decl, 2, lhs, + build_int_cstu (uint16_type_node, j)); + gimple_call_set_lhs (new_call, dstssa); + gimple_seq_add_stmt (&new_seq, new_call); + gimplify_assign (dst, dstssa, &new_seq); + } + } + pop_gimplify_context (NULL); + gsi_replace_with_seq (gsi, new_seq, true); + return true; + } + /* Compatibility built-ins; we used to call these __builtin_mma_{dis,}assemble_pair, but now we call them __builtin_vsx_{dis,}assemble_pair. Handle the old versions. */ diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index 07ead4b9ffee..7ba1715b89cd 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -3935,6 +3935,12 @@ dm1024 __builtin_mma_dmsetdmrz_internal (); DMSETDMRZ_INTERNAL mma_dmsetdmrz {dm} + void __builtin_mma_dmr_extract512 (void *, dm1024 *, const int<2>); + DM_EXTRACT512 nothing {dm,dmint} + + v512 __builtin_mma_dmr_extract512_internal (dm1024, const int<2>); + DM_EXTRACT512_INTERNAL movtdo_extract512 {dm} + void __builtin_mma_dmmr (dm1024 *, dm1024 *); DMMR nothing {dm,dmint} @@ -3953,6 +3959,9 @@ dm1024 __builtin_mma_build_dmr_internal (vuc, vuc, vuc, vuc, vuc, vuc, vuc, vuc); BUILD_DMR_INTERNAL mma_build_dmr {dm} + void __builtin_mma_disassemble_dmr (void *, dm1024 *); + DISASSEMBLE_DMR nothing {dm} + void __builtin_mma_dmxvi8gerx4 (dm1024 *, v256, vuc); DMXVI8GERX4 nothing {dm,dmint} diff --git a/gcc/testsuite/gcc.target/powerpc/dmf-disassemble-dmr.c b/gcc/testsuite/gcc.target/powerpc/dmf-disassemble-dmr.c new file mode 100644 index 000000000000..b1406ec380e1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dmf-disassemble-dmr.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +bar (vec_t *dst, __dmr *src) +{ + vec_t res[8]; + __builtin_mma_disassemble_dmr (res, src); + dst[0] = res[0]; + dst[2] = res[1]; + dst[4] = res[2]; + dst[6] = res[3]; + dst[8] = res[4]; + dst[10] = res[5]; + dst[12] = res[6]; + dst[14] = res[7]; +} + +/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstxv\M} 8 } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/dmf-extract512.c b/gcc/testsuite/gcc.target/powerpc/dmf-extract512.c new file mode 100644 index 000000000000..35c1cfaff509 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/dmf-extract512.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=future -O2" } */ + +typedef unsigned char vec_t __attribute__((vector_size(16))); + +void +bar (vec_t *dst, __dmr *src) +{ + vec_t res[4]; + __builtin_mma_dmr_extract512 (res, src); + dst[0] = res[0]; + dst[2] = res[1]; + dst[4] = res[2]; + dst[6] = res[3]; +} + +/* { dg-final { scan-assembler-times {\mdmxxextfdmr512\M} 1 } } */ +/* { dg-final { scan-assembler-times {\mstxv\M} 4 } } */