llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-codegen Author: Shilei Tian (shiltian) <details> <summary>Changes</summary> Co-authored-by: Mekhanoshin, Stanislav <Stanislav.Mekhanoshin@<!-- -->amd.com> --- Patch is 66.99 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149355.diff 23 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+1) - (modified) clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp (+1) - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (+19) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+2) - (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll (+33) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s (+56) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s (+60) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s (+12) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8.s (+16) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s (+45) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s (+48) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16-fake16.s (+56) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp16.s (+60) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8-fake16.s (+16) - (modified) llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1_dpp8.s (+20) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1.txt (+63) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp16.txt (+59) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop1_dpp8.txt (+15) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt (+64) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp16.txt (+60) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1_dpp8.txt (+20) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index e7a45f0e4300d..3b6ad7d90be3c 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -675,6 +675,7 @@ TARGET_BUILTIN(__builtin_amdgcn_rsq_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_log_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_exp2_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_sin_bf16, "yy", "nc", "bf16-trans-insts") +TARGET_BUILTIN(__builtin_amdgcn_cos_bf16, "yy", "nc", "bf16-trans-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_fp8, "hiIi", "nc", "gfx1250-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_f16_bf8, "hiIi", "nc", "gfx1250-insts") diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index 32cf622f20605..9f48149354255 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -433,6 +433,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_sin); case AMDGPU::BI__builtin_amdgcn_cosf: case AMDGPU::BI__builtin_amdgcn_cosh: + case AMDGPU::BI__builtin_amdgcn_cos_bf16: return emitBuiltinWithOneOverloadedType<1>(*this, E, Intrinsic::amdgcn_cos); case AMDGPU::BI__builtin_amdgcn_dispatch_ptr: return EmitAMDGPUDispatchPtr(*this, E); diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl index 748b6455103ec..a1f984c129276 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -156,6 +156,25 @@ void test_sin_bf16(global __bf16* out, __bf16 a) *out = __builtin_amdgcn_sin_bf16(a); } +// CHECK-LABEL: @test_cos_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) +// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr +// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr +// CHECK-NEXT: store ptr addrspace(1) [[OUT:%.*]], ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store bfloat [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR_ASCAST]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = call bfloat @llvm.amdgcn.cos.bf16(bfloat [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8 +// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(1) [[TMP2]], align 2 +// CHECK-NEXT: ret void +// +void test_cos_bf16(global __bf16* out, __bf16 a) +{ + *out = __builtin_amdgcn_cos_bf16(a); +} + // CHECK-LABEL: @test_cvt_f16_fp8( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index c91319eae7218..ff89b8badeed0 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -535,6 +535,7 @@ defm V_RSQ_BF16 : VOP1Inst_t16 <"v_rsq_bf16", VOP_BF16_BF16, AMDGPUrsq>; defm V_LOG_BF16 : VOP1Inst_t16 <"v_log_bf16", VOP_BF16_BF16, AMDGPUlogf16>; defm V_EXP_BF16 : VOP1Inst_t16 <"v_exp_bf16", VOP_BF16_BF16, AMDGPUexpf16>; defm V_SIN_BF16 : VOP1Inst_t16 <"v_sin_bf16", VOP_BF16_BF16, AMDGPUsin>; +defm V_COS_BF16 : VOP1Inst_t16 <"v_cos_bf16", VOP_BF16_BF16, AMDGPUcos>; } } // End TRANS = 1, SchedRW = [WriteTrans32] defm V_FREXP_MANT_F16 : VOP1Inst_t16 <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>; @@ -1149,6 +1150,7 @@ defm V_RSQ_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07b>; defm V_LOG_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07c>; defm V_EXP_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07d>; defm V_SIN_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07e>; +defm V_COS_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x07f>; //===----------------------------------------------------------------------===// // GFX10. diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll new file mode 100644 index 0000000000000..091859f3c9bf3 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.bf16.ll @@ -0,0 +1,33 @@ +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN %s +; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefix=GCN %s + +; FIXME: GlobalISel does not work with bf16 + +declare bfloat @llvm.amdgcn.cos.bf16(bfloat) #0 + +; GCN-LABEL: {{^}}cos_bf16: +; GCN: v_cos_bf16_e32 {{v[0-9]+}}, {{s[0-9]+}} +define amdgpu_kernel void @cos_bf16(ptr addrspace(1) %out, bfloat %src) #1 { + %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat %src) #0 + store bfloat %cos, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}cos_bf16_constant_4 +; GCN: v_cos_bf16_e32 v0, 4.0 +define amdgpu_kernel void @cos_bf16_constant_4(ptr addrspace(1) %out) #1 { + %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 4.0) #0 + store bfloat %cos, ptr addrspace(1) %out, align 2 + ret void +} + +; GCN-LABEL: {{^}}cos_bf16_constant_100 +; GCN: v_cos_bf16_e32 {{v[0-9]+}}, 0x42c8 +define amdgpu_kernel void @cos_bf16_constant_100(ptr addrspace(1) %out) #1 { + %cos = call bfloat @llvm.amdgcn.cos.bf16(bfloat 100.0) #0 + store bfloat %cos, ptr addrspace(1) %out, align 2 + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s index f51d709a594a0..4b61064815ed5 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1-fake16.s @@ -343,6 +343,51 @@ v_sin_bf16 v5, src_scc v_sin_bf16 v127, 0x8000 // GFX1250: v_sin_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfc,0xfe,0x7e,0x00,0x80,0x00,0x00] +v_cos_bf16 v5, v1 +// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e] + +v_cos_bf16 v5, v127 +// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e] + +v_cos_bf16 v5, s1 +// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, s105 +// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_lo +// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_hi +// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, ttmp15 +// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, m0 +// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_lo +// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_hi +// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, null +// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, -1 +// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, 0.5 +// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, src_scc +// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e] + +v_cos_bf16 v127, 0x8000 +// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s index 39fc73d70cab2..40901618fce95 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1.s @@ -364,6 +364,54 @@ v_sin_bf16 v127, 0x8000 v_sin_bf16 v5.h, v1.h // GFX1250: v_sin_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xfd,0x0a,0x7f] +v_cos_bf16 v5, v1 +// GFX1250: v_cos_bf16_e32 v5, v1 ; encoding: [0x01,0xff,0x0a,0x7e] + +v_cos_bf16 v5, v127 +// GFX1250: v_cos_bf16_e32 v5, v127 ; encoding: [0x7f,0xff,0x0a,0x7e] + +v_cos_bf16 v5, s1 +// GFX1250: v_cos_bf16_e32 v5, s1 ; encoding: [0x01,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, s105 +// GFX1250: v_cos_bf16_e32 v5, s105 ; encoding: [0x69,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_lo +// GFX1250: v_cos_bf16_e32 v5, vcc_lo ; encoding: [0x6a,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, vcc_hi +// GFX1250: v_cos_bf16_e32 v5, vcc_hi ; encoding: [0x6b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, ttmp15 +// GFX1250: v_cos_bf16_e32 v5, ttmp15 ; encoding: [0x7b,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, m0 +// GFX1250: v_cos_bf16_e32 v5, m0 ; encoding: [0x7d,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_lo +// GFX1250: v_cos_bf16_e32 v5, exec_lo ; encoding: [0x7e,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, exec_hi +// GFX1250: v_cos_bf16_e32 v5, exec_hi ; encoding: [0x7f,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, null +// GFX1250: v_cos_bf16_e32 v5, null ; encoding: [0x7c,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, -1 +// GFX1250: v_cos_bf16_e32 v5, -1 ; encoding: [0xc1,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, 0.5 +// GFX1250: v_cos_bf16_e32 v5, 0.5 ; encoding: [0xf0,0xfe,0x0a,0x7e] + +v_cos_bf16 v5, src_scc +// GFX1250: v_cos_bf16_e32 v5, src_scc ; encoding: [0xfd,0xfe,0x0a,0x7e] + +v_cos_bf16 v127, 0x8000 +// GFX1250: v_cos_bf16_e32 v127, 0x8000 ; encoding: [0xff,0xfe,0xfe,0x7e,0x00,0x80,0x00,0x00] + +v_cos_bf16 v5.h, v1.h +// GFX1250: v_cos_bf16_e32 v5.h, v1.h ; encoding: [0x81,0xff,0x0a,0x7f] + v_cvt_f32_bf16 v5, v1 // GFX1250: v_cvt_f32_bf16_e32 v5, v1 ; encoding: [0x01,0xe5,0x0a,0x7e] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s index 97058eb2e7c9f..ab5d55fad49ac 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16-fake16.s @@ -394,6 +394,62 @@ v_sin_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi // GFX1250: v_sin_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfc,0xfe,0x7e,0x7f,0x6f,0x35,0x30] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_half_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s index 6a293c19a79a4..dcb613c09a62d 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp16.s @@ -422,6 +422,66 @@ v_sin_bf16 v5.h, v1.h quad_perm:[3,2,1,0] // GFX1250: v_sin_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfc,0x0a,0x7f,0x81,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 quad_perm:[0,1,2,3] +// GFX1250: v_cos_bf16_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0xe4,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x40,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_half_mirror +// GFX1250: v_cos_bf16_dpp v5, v1 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x41,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x01,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shl:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x0f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x11,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_shr:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x1f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x21,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_ror:15 +// GFX1250: v_cos_bf16_dpp v5, v1 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x2f,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:0 row_mask:0xf bank_mask:0xf +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x50,0x01,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX1250: v_cos_bf16_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x5f,0x01,0x01] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX1250: v_cos_bf16_dpp v5, v1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0xfa,0xfe,0x0a,0x7e,0x01,0x60,0x09,0x13] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 +// GFX1250: v_cos_bf16_dpp v127, -|v127| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xfa,0xfe,0xfe,0x7e,0x7f,0x6f,0x35,0x30] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5.h, v1.h quad_perm:[3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5.h, v1.h quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xfe,0x0a,0x7f,0x81,0x1b,0x00,0xff] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + v_cvt_f32_bf16 v5, v1 quad_perm:[3,2,1,0] // GFX1250: v_cvt_f32_bf16_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0xe4,0x0a,0x7e,0x01,0x1b,0x00,0xff] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s index d1f53c7b2065c..4b37d648a928c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_dpp8-fake16.s @@ -86,6 +86,18 @@ v_sin_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX1250: v_sin_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfc,0xfe,0x7e,0x7f,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU +v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] +// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0xe9,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX1250: v_cos_bf16_dpp v5, v1 dpp8:[7,6,5,4,3,2,1,0] fi:1 ; encoding: [0xea,0xfe,0x0a,0x7e,0x01,0x77,0x39,0x05] +// GFX12-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU + +v_cos_bf16 v127, v127 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX1250: v_cos_bf16_dpp v127, v127 dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xe9,0xfe,0xfe,0x7e,0x7f,0x00,0x00,0x00] +// GFX... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/149355 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits