================ @@ -662,4 +662,152 @@ def XeGPU_UpdateOffsetOp: XeGPU_Op<"update_offset", }]; } +def XeGPU_DpasOp : XeGPU_Op<"dpas", [Pure, AllElementTypesMatch<["lhs", "rhs"]>]> { + let summary = "It performs mma computation"; + + let description = [{DPAS performs matrix multiplication on matrix A of `mxk` + size, B of `kxn` size, and accumulate on matrix C of `mxn` to the same size + matrix , `m=8`, `n=16` and `k=8 * 32/bit_width_of_elem_type`. So for fp16 + data type, the matrices are `A: vector<8x16xf16>`, `B: vector<16x16xf16>`, + and `C/D: vector<8x16xf32>`. Besides the matrix size requirements, DPAS + also requires A and B to be loaded with the required data layout. Specially, + VNNI layout is required for B operand. It is achieved via setting `vnni_axis = 0` + of the corresponding `load_nd` operator. To keep both operands as 3D vector, + operand A is loaded via setting `vnni_axis = 1` without impacting the + physical layouts change in register. Due to the VNNI transformation, A and B operands + are represented as 3D vector, with the last dimension representing the VNNI factor, + which is computed as `32/bit_width_of_elem_type`. Therefore, `A: vector<8x16xf16>` + is represented as `A: vector<4x8x2xf16>`, and `B:vector<16x16xf16>` is ---------------- adam-smnk wrote:
```suggestion is represented as `A: vector<8x8x2xf16>`, and `B: vector<16x16xf16>` is ``` https://github.com/llvm/llvm-project/pull/88439 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits