Author: Chinmay Deshpande
Date: 2026-03-18T13:52:34-07:00
New Revision: e044c4ad81f09bba5d31463b2b63f0c27cfc1aca

URL: 
https://github.com/llvm/llvm-project/commit/e044c4ad81f09bba5d31463b2b63f0c27cfc1aca
DIFF: 
https://github.com/llvm/llvm-project/commit/e044c4ad81f09bba5d31463b2b63f0c27cfc1aca.diff

LOG: [AMDGPU] Add target features for SWMMAC instructions (#185785)

Introduce `swmmac-gfx1200-insts` and `swmmac-gfx1250-insts`

Added: 
    clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl
    clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.td
    llvm/lib/Target/AMDGPU/AMDGPU.td
    llvm/lib/Target/AMDGPU/VOP3PInstructions.td
    llvm/lib/TargetParser/TargetParser.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.td 
b/clang/include/clang/Basic/BuiltinsAMDGPU.td
index acd0a34a79253..20e2af6aaf700 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.td
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.td
@@ -828,29 +828,29 @@ def __builtin_amdgcn_wmma_i32_16x16x32_iu4_w64_gfx12 : 
AMDGPUBuiltin<"_ExtVector
   let ArgNames = ["a_sign", "a", "b_sign", "b", "c", "clamp"];
 }
 
-def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, 
__fp16>, _ExtVector<8, float>, int)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, 
short>, _ExtVector<8, float>, int)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, 
__fp16>, _ExtVector<8, __fp16>, int)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, 
short>, _ExtVector<8, short>, int)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant 
bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : 
AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, 
_ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : 
AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant 
bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], 
"wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "wmma-128b-insts,wavefrontsize32">;
-
-def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, 
__fp16>, _ExtVector<4, float>, int)", [Const], 
"wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, 
_ExtVector<4, float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, 
__fp16>, _ExtVector<4, __fp16>, int)", [Const], 
"wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, 
_ExtVector<4, short>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, 
_ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], 
"wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : 
AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, 
_ExtVector<4, int>, int, _Constant bool)", [Const], 
"wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : 
AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, 
_ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], 
"wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
-def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "wmma-128b-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, __fp16>, _ExtVector<16, 
__fp16>, _ExtVector<8, float>, int)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, short>, _ExtVector<16, 
short>, _ExtVector<8, float>, int)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, __fp16>(_ExtVector<8, __fp16>, _ExtVector<16, 
__fp16>, _ExtVector<8, __fp16>, int)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32 : 
AMDGPUBuiltin<"_ExtVector<8, short>(_ExtVector<8, short>, _ExtVector<16, 
short>, _ExtVector<8, short>, int)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant 
bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32 : 
AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, int, _Constant bool, 
_ExtVector<2, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32 : 
AMDGPUBuiltin<"_ExtVector<8, int>(_Constant bool, _ExtVector<2, int>, _Constant 
bool, _ExtVector<4, int>, _ExtVector<8, int>, int, _Constant bool)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<2, int>, _ExtVector<4, int>, 
_ExtVector<8, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize32">;
+
+def __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, __fp16>, _ExtVector<8, 
__fp16>, _ExtVector<4, float>, int)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(_ExtVector<4, short>, _ExtVector<8, short>, 
_ExtVector<4, float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, __fp16>(_ExtVector<4, __fp16>, _ExtVector<8, 
__fp16>, _ExtVector<4, __fp16>, int)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64 : 
AMDGPUBuiltin<"_ExtVector<4, short>(_ExtVector<4, short>, _ExtVector<8, short>, 
_ExtVector<4, short>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, 
_ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64 : 
AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, int, 
_ExtVector<4, int>, int, _Constant bool)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64 : 
AMDGPUBuiltin<"_ExtVector<4, int>(_Constant bool, int, _Constant bool, 
_ExtVector<2, int>, _ExtVector<4, int>, int, _Constant bool)", [Const], 
"swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
+def __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64 : 
AMDGPUBuiltin<"_ExtVector<4, float>(int, _ExtVector<2, int>, _ExtVector<4, 
float>, int)", [Const], "swmmac-gfx1200-insts,wavefrontsize64">;
 
 def __builtin_amdgcn_prng_b32 : AMDGPUBuiltin<"unsigned int(unsigned int)", 
[Const], "prng-inst">;
 def __builtin_amdgcn_cvt_scalef32_pk32_fp6_f16 : AMDGPUBuiltin<"_ExtVector<6, 
unsigned int>(_ExtVector<32, _Float16>, float)", [Const], 
"f16bf16-to-fp6bf6-cvt-scale-insts">;
@@ -1170,20 +1170,20 @@ def __builtin_amdgcn_wmma_scale16_f32_32x16x128_f4 : 
AMDGPUBuiltin<"_ExtVector<1
   let Documentation = [DocWMMA_scale16_GFX1250];
   let ArgNames = ["a", "b", "c_mod", "c", "matrix_a_scale", 
"matrix_a_scale_fmt", "matrix_a_scale_exp", "matrix_b_scale", 
"matrix_b_scale_fmt", "matrix_b_scale_exp", "matrix_a_reuse", "matrix_b_reuse"];
 }
-def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, 
float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, 
__bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], 
"gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, 
__bf16>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, 
__bf16>, _ExtVector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], 
"gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, 
_Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant 
bool, _Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_ExtVector<8, 
int>(_Constant bool, _ExtVector<8, int>, _Constant bool, _ExtVector<16, int>, 
_ExtVector<8, int>, _ExtVector<2, int>, _Constant bool, _Constant bool, ...)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, 
float>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, 
_Float16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", 
[Const], "gfx1250-insts,wavefrontsize32">;
-def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, 
_Float16>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, 
_ExtVector<32, _Float16>, _ExtVector<8, _Float16>, int, _Constant bool, 
_Constant bool)", [Const], "gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, 
float>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, 
__bf16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", [Const], 
"swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16_16x16x64_bf16 : AMDGPUBuiltin<"_ExtVector<8, 
__bf16>(_Constant bool, _ExtVector<16, __bf16>, _Constant bool, _ExtVector<32, 
__bf16>, _ExtVector<8, __bf16>, int, _Constant bool, _Constant bool)", [Const], 
"swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_Constant bool, _ExtVector<16, __bf16>, 
_Constant bool, _ExtVector<32, __bf16>, _ExtVector<8, float>, int, _Constant 
bool, _Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, float>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, float>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8 : 
AMDGPUBuiltin<"_ExtVector<8, _Float16>(_ExtVector<8, int>, _ExtVector<16, int>, 
_ExtVector<8, _Float16>, _ExtVector<2, int>, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_i32_16x16x128_iu8 : AMDGPUBuiltin<"_ExtVector<8, 
int>(_Constant bool, _ExtVector<8, int>, _Constant bool, _ExtVector<16, int>, 
_ExtVector<8, int>, _ExtVector<2, int>, _Constant bool, _Constant bool, ...)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f32_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, 
float>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, _ExtVector<32, 
_Float16>, _ExtVector<8, float>, int, _Constant bool, _Constant bool)", 
[Const], "swmmac-gfx1250-insts,wavefrontsize32">;
+def __builtin_amdgcn_swmmac_f16_16x16x64_f16 : AMDGPUBuiltin<"_ExtVector<8, 
_Float16>(_Constant bool, _ExtVector<16, _Float16>, _Constant bool, 
_ExtVector<32, _Float16>, _ExtVector<8, _Float16>, int, _Constant bool, 
_Constant bool)", [Const], "swmmac-gfx1250-insts,wavefrontsize32">;
 
 // GFX12.5 128B cooperative atomics
 def __builtin_amdgcn_cooperative_atomic_load_32x4B : AMDGPUBuiltin<"int(int *, 
_Constant int, char const *)", [Const], "gfx1250-insts,wavefrontsize32">;

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl
new file mode 100644
index 0000000000000..641a43426efb4
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-gfx1250-err.cl
@@ -0,0 +1,36 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \
+// RUN:   -verify -S -o - %s
+
+typedef float  v8f    __attribute__((ext_vector_type(8)));
+typedef half   v8h    __attribute__((ext_vector_type(8)));
+typedef half   v16h   __attribute__((ext_vector_type(16)));
+typedef half   v32h   __attribute__((ext_vector_type(32)));
+typedef __bf16 v8bf16  __attribute__((ext_vector_type(8)));
+typedef __bf16 v16bf16 __attribute__((ext_vector_type(16)));
+typedef __bf16 v32bf16 __attribute__((ext_vector_type(32)));
+typedef int    v2i    __attribute__((ext_vector_type(2)));
+typedef int    v8i    __attribute__((ext_vector_type(8)));
+typedef int    v16i   __attribute__((ext_vector_type(16)));
+
+void test_amdgcn_swmmac_gfx1250(global v8f* out8f, global v8h* out8h, global 
v8bf16* out8bf16, global v8i* out8i,
+                                  v16bf16 a16bf16, v16h a16h, v8i a8i,
+                                  v32bf16 b32bf16, v32h b32h, v16i b16i,
+                                  v8f c8f, v8bf16 c8bf16, v8h c8h, v8i c8i,
+                                  int index, v2i index2)
+{
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x64_bf16(0, a16bf16, 0, b32bf16, 
c8f, index, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x64_bf16' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8bf16 = __builtin_amdgcn_swmmac_bf16_16x16x64_bf16(0, a16bf16, 0, 
b32bf16, c8bf16, index, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x64_bf16' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16(0, a16bf16, 0, 
b32bf16, c8f, index, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_bf16f32_16x16x64_bf16' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8(a8i, b16i, c8f, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_fp8_fp8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8(a8i, b16i, c8f, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_fp8_bf8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8(a8i, b16i, c8f, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_bf8_fp8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8(a8i, b16i, c8f, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x128_bf8_bf8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8(a8i, b16i, c8h, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_fp8_fp8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8(a8i, b16i, c8h, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_fp8_bf8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8(a8i, b16i, c8h, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_bf8_fp8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8h = __builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8(a8i, b16i, c8h, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x128_bf8_bf8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8i = __builtin_amdgcn_swmmac_i32_16x16x128_iu8(0, a8i, 0, b16i, c8i, 
index2, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x128_iu8' needs target 
feature swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x64_f16(0, a16h, 0, b32h, c8f, 
index, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x64_f16' needs target feature 
swmmac-gfx1250-insts,wavefrontsize32}}
+  *out8h = __builtin_amdgcn_swmmac_f16_16x16x64_f16(0, a16h, 0, b32h, c8h, 
index, false, true); // 
expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x64_f16' needs target feature 
swmmac-gfx1250-insts,wavefrontsize32}}
+}

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl
new file mode 100644
index 0000000000000..fa4f3b5e40233
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w32-gfx10-err.cl
@@ -0,0 +1,31 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \
+// RUN:   -verify -S -o - %s
+
+typedef int    v2i   __attribute__((ext_vector_type(2)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+typedef float  v8f   __attribute__((ext_vector_type(8)));
+typedef half   v8h   __attribute__((ext_vector_type(8)));
+typedef short  v8s   __attribute__((ext_vector_type(8)));
+typedef int    v8i   __attribute__((ext_vector_type(8)));
+typedef half  v16h   __attribute__((ext_vector_type(16)));
+typedef short v16s   __attribute__((ext_vector_type(16)));
+
+void test_amdgcn_swmmac_w32(global v8f* out8f, global v8h* out8h, global v8s* 
out8s, global v8i* out8i,
+                             v8h a8h, v8s a8s, v2i a2i, int ai,
+                             v16h b16h, v16s b16s, v4i b4i, v2i b2i,
+                             v8f c8f, v8h c8h, v8s c8s, v8i c8i,
+                             int index)
+{
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w32(a8h, b16h, c8f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_f16_w32' needs 
target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32(a8s, b16s, c8f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w32' 
needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8h = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w32(a8h, b16h, c8h, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x32_f16_w32' needs 
target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8s = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32(a8s, b16s, c8s, 
index); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w32' 
needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8i = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32(true, a2i, true, b4i, 
c8i, index, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w32' needs target 
feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8i = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32(true, ai, true, b2i, 
c8i, index, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w32' needs target 
feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8i = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32(true, a2i, true, b4i, 
c8i, index, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w32' needs target 
feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32(a2i, b4i, c8f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w32' 
needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32(a2i, b4i, c8f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w32' 
needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32(a2i, b4i, c8f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w32' 
needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
+  *out8f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32(a2i, b4i, c8f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w32' 
needs target feature swmmac-gfx1200-insts,wavefrontsize32}}
+}

diff  --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl 
b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl
new file mode 100644
index 0000000000000..4cde9d78abf2f
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-swmmac-w64-gfx10-err.cl
@@ -0,0 +1,30 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 \
+// RUN:   -verify -S -o - %s
+
+typedef int    v2i   __attribute__((ext_vector_type(2)));
+typedef float  v4f   __attribute__((ext_vector_type(4)));
+typedef half   v4h   __attribute__((ext_vector_type(4)));
+typedef short  v4s   __attribute__((ext_vector_type(4)));
+typedef int    v4i   __attribute__((ext_vector_type(4)));
+typedef half   v8h   __attribute__((ext_vector_type(8)));
+typedef short  v8s   __attribute__((ext_vector_type(8)));
+
+void test_amdgcn_swmmac_w64(global v4f* out4f, global v4h* out4h, global v4s* 
out4s, global v4i* out4i,
+                             v4h a4h, v4s a4s, int ai,
+                             v8h b8h, v8s b8s, v2i b2i, int bi,
+                             v4f c4f, v4h c4h, v4s c4s, v4i c4i,
+                             int index)
+{
+  *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_f16_w64(a4h, b8h, c4f, index); 
// expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_f16_w64' needs target 
feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64(a4s, b8s, c4f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf16_w64' 
needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4h = __builtin_amdgcn_swmmac_f16_16x16x32_f16_w64(a4h, b8h, c4h, index); 
// expected-error{{'__builtin_amdgcn_swmmac_f16_16x16x32_f16_w64' needs target 
feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4s = __builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64(a4s, b8s, c4s, 
index); // expected-error{{'__builtin_amdgcn_swmmac_bf16_16x16x32_bf16_w64' 
needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4i = __builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64(true, ai, true, b2i, 
c4i, index, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu8_w64' needs target 
feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4i = __builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64(true, ai, true, bi, 
c4i, index, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x32_iu4_w64' needs target 
feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4i = __builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64(true, ai, true, b2i, 
c4i, index, true); // 
expected-error{{'__builtin_amdgcn_swmmac_i32_16x16x64_iu4_w64' needs target 
feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64(ai, b2i, c4f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_fp8_w64' 
needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64(ai, b2i, c4f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_fp8_bf8_w64' 
needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64(ai, b2i, c4f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_fp8_w64' 
needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
+  *out4f = __builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64(ai, b2i, c4f, 
index); // expected-error{{'__builtin_amdgcn_swmmac_f32_16x16x32_bf8_bf8_w64' 
needs target feature swmmac-gfx1200-insts,wavefrontsize64}}
+}

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPU.td 
b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 616effeb5b9f2..d87e612cedd54 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -814,6 +814,14 @@ defm WMMA128bInsts : 
AMDGPUSubtargetFeature<"wmma-128b-insts",
   "Has WMMA instructions where A and B matrices do not have duplicated data"
 >;
 
+defm SWMMACGfx1200Insts : AMDGPUSubtargetFeature<"swmmac-gfx1200-insts",
+  "Has GFX1200 SWMMAC instructions"
+>;
+
+defm SWMMACGfx1250Insts : AMDGPUSubtargetFeature<"swmmac-gfx1250-insts",
+  "Has GFX1250 SWMMAC instructions"
+>;
+
 defm PkFmacF16Inst : AMDGPUSubtargetFeature<"pk-fmac-f16-inst",
   "Has v_pk_fmac_f16 instruction"
 >;
@@ -1950,6 +1958,7 @@ def FeatureISAVersion11_7_0 : FeatureSet<
      FeatureFP8ConversionInsts,
      FeatureDot11Insts,
      FeatureWMMA128bInsts,
+     FeatureSWMMACGfx1200Insts,
      FeatureIEEEMinimumMaximumInsts,
      FeatureMinimum3Maximum3F32,
      FeatureMinimum3Maximum3F16])>;
@@ -1983,6 +1992,7 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureExtendedImageInsts,
    FeatureFP8ConversionInsts,
    FeatureWMMA128bInsts,
+   FeatureSWMMACGfx1200Insts,
    FeatureIEEEMinimumMaximumInsts,
    FeaturePackedTID,
    FeatureVcmpxPermlaneHazard,
@@ -2080,7 +2090,9 @@ def FeatureISAVersion12_50_Common : FeatureSet<
    FeatureXNACK,
    FeatureClusters,
    FeatureD16Writes32BitVgpr,
-   FeatureMcastLoadInsts
+   FeatureMcastLoadInsts,
+   FeatureSWMMACGfx1200Insts,
+   FeatureSWMMACGfx1250Insts
 ]>;
 
 def FeatureISAVersion12_50 : FeatureSet<

diff  --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td 
b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 333240e0f7ac2..992c375069e77 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -2061,7 +2061,9 @@ let WaveSizePredicate = isWave32, SubtargetPredicate = 
isGFX11PlusNot12_50, Othe
   defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w32", 
int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w32>;
   defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w32", 
int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w32>;
   defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w32",     
int_amdgcn_wmma_i32_16x16x32_iu4,     I32_IU4X32_WMMA_w32>;
+}
 
+let WaveSizePredicate = isWave32, SubtargetPredicate = HasSWMMACGfx1200Insts 
in {
   def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w32_twoaddr,     
int_amdgcn_swmmac_f32_16x16x32_f16,     F32_F16_SWMMAC_w32>;
   def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w32_twoaddr,    
int_amdgcn_swmmac_f32_16x16x32_bf16,    F32_BF16_SWMMAC_w32>;
   def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w32_twoaddr,     
int_amdgcn_swmmac_f16_16x16x32_f16,     F16_F16_SWMMAC_w32>;
@@ -2088,7 +2090,9 @@ let WaveSizePredicate = isWave64, SubtargetPredicate = 
isGFX11PlusNot12_50, Othe
   defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_FP8_w64", 
int_amdgcn_wmma_f32_16x16x16_bf8_fp8, F32_FP8BF8_WMMA_w64>;
   defm : WMMAPat<"V_WMMA_F32_16X16X16_BF8_BF8_w64", 
int_amdgcn_wmma_f32_16x16x16_bf8_bf8, F32_FP8BF8_WMMA_w64>;
   defm : WMMAPat<"V_WMMA_I32_16X16X32_IU4_w64",     
int_amdgcn_wmma_i32_16x16x32_iu4,     I32_IU4X32_WMMA_w64>;
+}
 
+let WaveSizePredicate = isWave64, SubtargetPredicate = HasSWMMACGfx1200Insts 
in {
   def : SWMMACPat<V_SWMMAC_F32_16X16X32_F16_w64_twoaddr,     
int_amdgcn_swmmac_f32_16x16x32_f16,     F32_F16_SWMMAC_w64>;
   def : SWMMACPat<V_SWMMAC_F32_16X16X32_BF16_w64_twoaddr,    
int_amdgcn_swmmac_f32_16x16x32_bf16,    F32_BF16_SWMMAC_w64>;
   def : SWMMACPat<V_SWMMAC_F16_16X16X32_F16_w64_twoaddr,     
int_amdgcn_swmmac_f16_16x16x32_f16,     F16_F16_SWMMAC_w64>;

diff  --git a/llvm/lib/TargetParser/TargetParser.cpp 
b/llvm/lib/TargetParser/TargetParser.cpp
index d335f9174b150..3664711d387bc 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -249,10 +249,13 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const 
Triple &T,
                                  StringMap<bool> &Features) {
   AMDGPU::GPUKind Kind = parseArchAMDGCN(GPU);
   switch (Kind) {
-  case GK_GFX1310:
   case GK_GFX1251:
   case GK_GFX1250:
   case GK_GFX12_5_GENERIC:
+    Features["swmmac-gfx1200-insts"] = true;
+    Features["swmmac-gfx1250-insts"] = true;
+    [[fallthrough]];
+  case GK_GFX1310:
     Features["ci-insts"] = true;
     Features["dot7-insts"] = true;
     Features["dot8-insts"] = true;
@@ -333,6 +336,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const 
Triple &T,
     Features["cvt-pknorm-vop2-insts"] = true;
     Features["fp8-conversion-insts"] = true;
     Features["wmma-128b-insts"] = true;
+    Features["swmmac-gfx1200-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     break;
   case GK_GFX1170:
@@ -361,6 +365,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const 
Triple &T,
     Features["dot11-insts"] = true;
     Features["fp8-conversion-insts"] = true;
     Features["wmma-128b-insts"] = true;
+    Features["swmmac-gfx1200-insts"] = true;
     Features["atomic-fmin-fmax-global-f32"] = true;
     break;
   case GK_GFX1153:


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to