https://github.com/broxigarchen updated https://github.com/llvm/llvm-project/pull/119750
>From dc1cc19d8d3cb2c41ca05a131f67bb576effb614 Mon Sep 17 00:00:00 2001 From: guochen2 <guoch...@amd.com> Date: Thu, 12 Dec 2024 13:33:14 -0500 Subject: [PATCH 1/2] True16 for v_alignbyte_b32 in MC --- clang/lib/CodeGen/CGBuiltin.cpp | 8 ++++ llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +++- llvm/test/MC/AMDGPU/gfx11_asm_vop3.s | 11 +++-- llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s | 42 +++++++++++++------ llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s | 17 ++++++-- llvm/test/MC/AMDGPU/gfx12_asm_vop3.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 3 ++ llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 3 ++ .../Disassembler/AMDGPU/gfx11_dasm_vop3.txt | 16 ++++++- .../AMDGPU/gfx11_dasm_vop3_dpp16.txt | 31 +++++++++++--- .../AMDGPU/gfx11_dasm_vop3_dpp8.txt | 16 ++++++- .../Disassembler/AMDGPU/gfx12_dasm_vop3.txt | 16 ++++++- .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 36 +++++++++++++--- .../AMDGPU/gfx12_dasm_vop3_dpp8.txt | 21 ++++++++-- 15 files changed, 190 insertions(+), 43 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index c2e983eebebc10..2c359f67680e3f 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,6 +19567,14 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { + case AMDGPU::BI__builtin_amdgcn_alignbyte: { + llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); + llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); + llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); + llvm::Function *F = + CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); + return Builder.CreateCall(F, {Src0, Src1, Src2}); + } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 92418b9104ad14..a3f2d3df3f5276 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2354,7 +2354,7 @@ def int_amdgcn_writelane : >; def int_amdgcn_alignbyte : ClangBuiltin<"__builtin_amdgcn_alignbyte">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable] >; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 8a9f8aa3d16d3a..804a15c94d4728 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -212,7 +212,11 @@ defm V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGP defm V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>; defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>; defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>; -defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>; +defm V_ALIGNBYTE_B32 : VOP3Inst_t16_with_profiles <"v_alignbyte_b32", + VOP3_Profile<VOP_I32_I32_I32_I32>, + VOP3_Profile_True16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, + VOP3_Profile_Fake16<VOP_I32_I32_I32_I16, VOP3_OPSEL>, + int_amdgcn_alignbyte>; // XXX - No FPException seems suspect but manual doesn't say it does let mayRaiseFPException = 0 in { @@ -1676,7 +1680,7 @@ defm V_FMA_F32 : VOP3_Realtriple_gfx11_gfx12<0x213>; defm V_FMA_F64 : VOP3_Real_Base_gfx11_gfx12<0x214>; defm V_LERP_U8 : VOP3_Realtriple_gfx11_gfx12<0x215>; defm V_ALIGNBIT_B32 : VOP3_Realtriple_gfx11_gfx12<0x216>; -defm V_ALIGNBYTE_B32 : VOP3_Realtriple_gfx11_gfx12<0x217>; +defm V_ALIGNBYTE_B32 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x217, "v_alignbyte_b32">; defm V_MULLIT_F32 : VOP3_Realtriple_gfx11_gfx12<0x218>; defm V_MIN3_F32 : VOP3_Realtriple_gfx11<0x219>; defm V_MIN3_I32 : VOP3_Realtriple_gfx11_gfx12<0x21a>; diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s index b649bab532f262..94cd631590ae97 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s @@ -461,11 +461,11 @@ v_alignbyte_b32 v5, s1, v255, s3 v_alignbyte_b32 v5, s105, s105, s105 // GFX11: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] -v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 -// GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l +// GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] -v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 -// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l +// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 // GFX11: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] @@ -494,6 +494,9 @@ v_alignbyte_b32 v5, src_scc, vcc_lo, -1 v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null // GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h +// GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + v_and_b16 v5, v1, v2 // GFX11: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s index e6f868d2b40e7e..27a9c10a1a4187 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp16.s @@ -363,22 +363,22 @@ v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bou v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 @@ -387,7 +387,7 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 @@ -405,6 +405,24 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_mirror +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s3 row_half_mirror +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:1 +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_shl:15 +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, m0 row_ror:1 +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX11: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s index 160863b19012dc..cd61b8babd1fd2 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3_dpp8.s @@ -187,11 +187,11 @@ v_alignbit_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x16,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] @@ -220,6 +220,15 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_alignbyte_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xf6,0x01,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX11: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s index c7cd88e81583f8..584f37c4499c2d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s @@ -452,6 +452,9 @@ v_alignbyte_b32 v5, src_scc, vcc_lo, -1 v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null // GFX12: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h +// GFX12: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + v_and_b16 v5, v1, v2 // GFX12: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 894acc5e94e1d3..120adb5f6eee64 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -485,6 +485,9 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bo v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 // GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index a5bfec80d8039e..180169ccf19a4c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -288,6 +288,9 @@ v_alignbyte_b32_e64_dpp v5, v1, v2, -1 dpp8:[7,6,5,4,3,2,1,0] fi:1 v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] ; encoding: [0xff,0x00,0x17,0xd6,0xe9,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h dpp8:[7,6,5,4,3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt index 60d213f1ff9372..d2062d9edc4d2e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt @@ -508,10 +508,16 @@ # GFX11: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] 0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX11: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX11: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] 0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01 # GFX11: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] @@ -540,6 +546,12 @@ 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf # GFX11: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 # W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] # W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt index 35ad673de75db4..7c29a67e2be82d 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp16.txt @@ -228,19 +228,34 @@ # GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff # GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] @@ -269,6 +284,12 @@ 0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 # GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt index 3a37a19d6d3af9..125ea943f838a2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3_dpp8.txt @@ -135,10 +135,16 @@ # GFX11: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 # GFX11: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] @@ -167,6 +173,12 @@ 0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 # GFX11: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + 0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt index c3bab320b0ba2d..ab3118cd3e96c5 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt @@ -472,10 +472,16 @@ # GFX12: v_alignbyte_b32 v5, s105, s105, s105 ; encoding: [0x05,0x00,0x17,0xd6,0x69,0xd2,0xa4,0x01] 0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04 -# GFX12: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-REAL16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3.l ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_lo, ttmp15, v3 ; encoding: [0x05,0x00,0x17,0xd6,0x6a,0xf6,0x0c,0x04] 0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf -# GFX12: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.l ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 ; encoding: [0x05,0x00,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] 0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01 # GFX12: v_alignbyte_b32 v5, ttmp15, src_scc, ttmp15 ; encoding: [0x05,0x00,0x17,0xd6,0x7b,0xfa,0xed,0x01] @@ -504,6 +510,12 @@ 0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf # GFX12: v_alignbyte_b32 v255, 0xaf123456, vcc_hi, null ; encoding: [0xff,0x00,0x17,0xd6,0xff,0xd6,0xf0,0x01,0x56,0x34,0x12,0xaf] +0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf +# W32-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W32-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-REAL16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255.h op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] +# W64-FAKE16: v_alignbyte_b32 v5, vcc_hi, 0xaf123456, v255 op_sel:[0,0,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0x6b,0xfe,0xfd,0x07,0x56,0x34,0x12,0xaf] + 0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00 # W32-REAL16: v_and_b16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] # W32-FAKE16: v_and_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x62,0xd7,0x01,0x05,0x02,0x00] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 232ed8d23c9c6e..560be3aba528b6 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -240,22 +240,40 @@ # GFX12: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] @@ -284,6 +302,12 @@ 0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 # GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + 0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff # W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index 469b199053d475..a24e851bbcbf12 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -147,13 +147,22 @@ # GFX12: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] 0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05 # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] @@ -182,6 +191,12 @@ 0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00 # GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xea,0xfe,0xf7,0x03,0xff,0x00,0x00,0x00] +0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 +# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] +# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x20,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] + 0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 # W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] >From dd1871968759a3d2d305e01901373028613915be Mon Sep 17 00:00:00 2001 From: guochen2 <guoch...@amd.com> Date: Fri, 13 Dec 2024 13:26:30 -0500 Subject: [PATCH 2/2] remove clang change --- clang/lib/CodeGen/CGBuiltin.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 2c359f67680e3f..c2e983eebebc10 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -19567,14 +19567,6 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, llvm::AtomicOrdering AO = llvm::AtomicOrdering::SequentiallyConsistent; llvm::SyncScope::ID SSID; switch (BuiltinID) { - case AMDGPU::BI__builtin_amdgcn_alignbyte: { - llvm::Value *Src0 = EmitScalarExpr(E->getArg(0)); - llvm::Value *Src1 = EmitScalarExpr(E->getArg(1)); - llvm::Value *Src2 = EmitScalarExpr(E->getArg(2)); - llvm::Function *F = - CGM.getIntrinsic(Intrinsic::amdgcn_alignbyte, Src2->getType()); - return Builder.CreateCall(F, {Src0, Src1, Src2}); - } case AMDGPU::BI__builtin_amdgcn_div_scale: case AMDGPU::BI__builtin_amdgcn_div_scalef: { // Translate from the intrinsics's struct return to the builtin's out _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits