llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Mirko Brkušanin (mbrkusanin) <details> <summary>Changes</summary> --- Patch is 171.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180191.diff 38 Files Affected: - (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl (+1) - (modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+7-1) - (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+15-1) - (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+17-16) - (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+4-4) - (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+2-1) - (modified) llvm/lib/TargetParser/TargetParser.cpp (+2) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll (+59-2) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir (+80-78) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll (+235) - (added) llvm/test/MC/AMDGPU/gfx1150_unsupported.s (+56) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1-fake16.s (+47) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1.s (+54) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp16-fake16.s (+14) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp16.s (+16) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp8-fake16.s (+14) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop1_dpp8.s (+16) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3-fake16.s (+62) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3.s (+62) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp16-fake16.s (+134) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp16.s (+134) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp8-fake16.s (+74) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_dpp8.s (+74) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_err.s (+5) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1-fake16.s (+140) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1.s (+152) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp16-fake16.s (+44) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp16.s (+44) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp8-fake16.s (+44) - (added) llvm/test/MC/AMDGPU/gfx1170_asm_vop3_from_vop1_dpp8.s (+44) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop1_dpp16.txt (+15) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop1_dpp8.txt (+15) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3.txt (+63) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_dpp16.txt (+147) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_dpp8.txt (+77) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_from_vop1.txt (+57) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_from_vop1_dpp16.txt (+45) - (added) llvm/test/MC/Disassembler/AMDGPU/gfx1170_dasm_vop3_from_vop1_dpp8.txt (+45) ``````````diff diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl index cdfe9fcd89091..83a44be930ae0 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl @@ -1,5 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index accaeda1cb239..9d723c86031f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1884,7 +1884,8 @@ def FeatureISAVersion11_5_3 : FeatureSet< def FeatureISAVersion11_7_0 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureSALUFloatInsts, - FeatureDPPSrc1SGPR])>; + FeatureDPPSrc1SGPR, + FeatureFP8ConversionInsts])>; def FeatureISAVersion12 : FeatureSet< [FeatureGFX12, @@ -2388,6 +2389,11 @@ def isGFX11Plus : Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11">, AssemblerPredicate<(all_of FeatureGFX11Insts)>; +def isGFX11PlusNot12_50 : + Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11 &&" + "(Subtarget->getGeneration() >= AMDGPUSubtarget::GFX13 || !Subtarget->hasGFX1250Insts())">, + AssemblerPredicate<(all_of FeatureGFX11Insts, (any_of FeatureGFX13Insts, (not FeatureGFX1250Insts)))>; + def isGFX12Only : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::GFX12">, AssemblerPredicate<(all_of FeatureGFX12Insts, (not FeatureGFX13Insts))>; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index d2c707646699b..184485ebf17c6 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -9533,6 +9533,8 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi || Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || Opc == AMDGPU::V_CVT_SR_FP8_F32_vi || + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 || Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 || Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) { Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods @@ -9542,7 +9544,19 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, // Adding vdst_in operand is already covered for these DPP instructions in // cvtVOP3DPP. if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) && - !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 || + !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 || + Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 || + Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 || + Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 || + Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 || Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 || Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 || diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1f053201da226..56e7623496eea 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -760,9 +760,9 @@ def V_CVT_F16_F8_True16_Profile : VOP3_Profile_True16<V_CVT_F16_F8_Profile>; def V_CVT_F16_F8_Fake16_Profile : VOP3_Profile_Fake16<V_CVT_F16_F8_Profile>; } -let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts], +let SubtargetPredicate = isGFX11Plus, OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, SchedRW = [WriteFloatCvt] in { - let SubtargetPredicate = isGFX12PlusNot12_50 in + let SubtargetPredicate = isGFX11PlusNot12_50 in defm V_CVT_F32_FP8_OP_SEL : VOP1Inst<"v_cvt_f32_fp8_op_sel", VOPProfile_Base_CVT_F_F8_ByteSel<f32>>; let SubtargetPredicate = isGFX125xOnly in defm V_CVT_F32_FP8_gfx1250 : VOP1Inst<"v_cvt_f32_fp8_gfx1250", VOPProfile_Base_CVT_F_F8_ByteSel<f32, 1>>; @@ -786,7 +786,7 @@ class Cvt_F_F8_Pat_ByteSel<SDPatternOperator node, VOP3_Pseudo inst, bit HasOpSe >; let OtherPredicates = [HasFP8ConversionInsts] in { - let SubtargetPredicate = isGFX12PlusNot12_50 in + let SubtargetPredicate = isGFX11PlusNot12_50 in def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_fp8, V_CVT_F32_FP8_OP_SEL_e64>; let SubtargetPredicate = isGFX125xOnly in { def : GCNPat<(int_amdgcn_cvt_f32_fp8 i32:$src0, timm:$byte_sel), @@ -794,7 +794,7 @@ let OtherPredicates = [HasFP8ConversionInsts] in { def : GCNPat<(int_amdgcn_cvt_f32_fp8_e5m3 i32:$src0, timm:$byte_sel), (V_CVT_F32_FP8_gfx1250_e64 $src0, DSTCLAMP.ENABLE, (as_i32timm $byte_sel))>; } - let SubtargetPredicate = isGFX12Plus in + let SubtargetPredicate = isGFX11Plus in def : Cvt_F_F8_Pat_ByteSel<int_amdgcn_cvt_f32_bf8, V_CVT_F32_BF8_OP_SEL_e64>; } @@ -806,7 +806,7 @@ class Cvt_PK_F32_F8_Pat_OpSel<SDPatternOperator node, int index, (inst_e32 $src)) >; -let SubtargetPredicate = isGFX12Plus, OtherPredicates = [HasFP8ConversionInsts] in { +let SubtargetPredicate = isGFX11Plus, OtherPredicates = [HasFP8ConversionInsts] in { foreach Index = [0, -1] in { def : Cvt_PK_F32_F8_Pat_OpSel<int_amdgcn_cvt_pk_f32_fp8, Index, V_CVT_PK_F32_FP8_fake16_e32, V_CVT_PK_F32_FP8_fake16_e64>; @@ -1140,8 +1140,9 @@ multiclass VOP1Only_Real_gfx11_gfx12_gfx13<bits<9> op> : multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> : VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>; -multiclass VOP1_Real_e32_with_name_gfx12_gfx13<bits<9> op, string opName, - string asmName> : +multiclass VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<bits<9> op, string opName, + string asmName> : + VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>, VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>, VOP1_Real_e32_with_name<GFX13Gen, op, opName, asmName>; @@ -1179,16 +1180,16 @@ multiclass VOP1_Real_OpSelIsDPP_gfx1250_gfx13<bits<9> op> : defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">; defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">; -defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name_gfx12_gfx13<0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; +defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">; -defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">; -defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">; -defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">; -defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">; -defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">; -defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">; -defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">; -defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">; +defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">; +defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">; +defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">; +defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">; +defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">; +defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name_gfx11_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">; +defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">; +defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name_gfx11_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">; defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x00c, "V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 56127c7e2f48f..60b06edc211b1 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -821,13 +821,13 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0, VOP3_CVT_PK_F8_F32_Profile_t16<>, VOP3_CVT_PK_F8_F32_Profile_fake16<>>; - let SubtargetPredicate = isGFX12Plus in { + let SubtargetPredicate = isGFX11Plus in { let OtherPredicates = [HasFP8ConversionInsts, NotHasFP8E5M3Insts] in defm V_CVT_SR_FP8_F32_gfx12 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>; let OtherPredicates = [HasFP8ConversionInsts, HasFP8E5M3Insts] in defm V_CVT_SR_FP8_F32_gfx1250 : VOP3Inst<"v_cvt_sr_fp8_f32_gfx1250", VOP3_CVT_SR_F8_ByteSel_Profile<f32, true>>; defm V_CVT_SR_BF8_F32_gfx12 : VOP3Inst<"v_cvt_sr_bf8_f32_gfx12", VOP3_CVT_SR_F8_ByteSel_Profile<f32>>; - } + } // End SubtargetPredicate = isGFX11Plus } // These instructions have non-standard use of op_sel. In particular they are @@ -931,7 +931,7 @@ let SubtargetPredicate = isGFX940Plus in { } } -let SubtargetPredicate = isGFX12Plus in { +let SubtargetPredicate = isGFX11Plus in { let OtherPredicates = [HasFP8ConversionInsts, NotHasFP8E5M3Insts] in def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_fp8_f32, V_CVT_SR_FP8_F32_gfx12_e64, f32>; let OtherPredicates = [HasFP8ConversionInsts, HasFP8E5M3Insts] in { @@ -939,7 +939,7 @@ let SubtargetPredicate = isGFX12Plus in { def : Cvt_SR_F8_ByteSel_E5M3_Pat<int_amdgcn_cvt_sr_fp8_f32_e5m3, V_CVT_SR_FP8_F32_gfx1250_e64, f32, DSTCLAMP.ENABLE>; } def : Cvt_SR_F8_ByteSel_Pat<int_amdgcn_cvt_sr_bf8_f32, V_CVT_SR_BF8_F32_gfx12_e64, f32>; -} +} // End SubtargetPredicate = isGFX11Plus } class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat < diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 4694658952c6a..09fdb004c1363 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1873,8 +1873,9 @@ multiclass VOP3_Real_with_name<GFXGen Gen, bits<10> op, string opName, } } -multiclass VOP3_Real_with_name_gfx12_gfx13< +multiclass VOP3_Real_with_name_gfx11_gfx12_gfx13< bits<10> op, string opName, string asmName, string pseudo_mnemonic = "", bit isSingle = 0> : + VOP3_Real_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>, VOP3_Real_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>, VOP3_Real_with_name<GFX13Gen, op, opName, asmName, pseudo_mnemonic, isSingle>; diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp index 671fc79149d18..d317ca4e1194a 100644 --- a/llvm/lib/TargetParser/TargetParser.cpp +++ b/llvm/lib/TargetParser/TargetParser.cpp @@ -519,6 +519,8 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, break; case GK_GFX1170: // TODO-GFX1170: Update features map for gfx1170 + Features["fp8-conversion-insts"] = true; + [[fallthrough]]; case GK_GFX1153: case GK_GFX1152: case GK_GFX1151: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll index 3372868455d65..4280d10fc2b33 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll @@ -1,8 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1250 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1170 < %s | FileCheck -check-prefixes=GFX1170PLUS,GFX1170 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX1170PLUS,GFX12 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GFX1170PLUS,GFX1250 %s define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) { +; GFX1170-LABEL: test_cvt_f32_bf8_byte0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_cvt_f32_bf8_byte0: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_bf8_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -19,6 +25,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte0(i32 %a) { } define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) { +; GFX1170-LABEL: test_cvt_f32_bf8_byte1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_cvt_f32_bf8_byte1: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -35,6 +46,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte1(i32 %a) { } define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) { +; GFX1170-LABEL: test_cvt_f32_bf8_byte2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_cvt_f32_bf8_byte2: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_bf8_e64_dpp v0, v0 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -51,6 +67,11 @@ define amdgpu_cs float @test_cvt_f32_bf8_byte2(i32 %a) { } define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) { +; GFX1170-LABEL: test_cvt_f32_fp8_byte3: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: ; return to shader part epilog +; ; GFX12-LABEL: test_cvt_f32_fp8_byte3: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_f32_fp8_e64_dpp v0, v0 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -67,6 +88,14 @@ define amdgpu_cs float @test_cvt_f32_fp8_byte3(i32 %a) { } define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr addrspace(1) %out) { +; GFX1170-LABEL: test_cvt_pk_bf8_f32_word0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_cvt_pk_bf8_f32 v2.l, v0, v1 +; GFX1170-NEXT: global_store_b32 v[3:4], v2, off +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_cvt_pk_bf8_f32_word0: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_pk_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -88,6 +117,14 @@ define amdgpu_cs void @test_cvt_pk_bf8_f32_word0(i32 %a, float %y, i32 %old, ptr } define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr addrspace(1) %out) { +; GFX1170-LABEL: test_cvt_pk_fp8_f32_word1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1170-NEXT: v_cvt_pk_fp8_f32 v2.h, v0, v1 op_sel:[0,0,1] +; GFX1170-NEXT: global_store_b32 v[3:4], v2, off +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_cvt_pk_fp8_f32_word1: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_mov_b32_dpp v0, v0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -113,6 +150,12 @@ define amdgpu_cs void @test_cvt_pk_fp8_f32_word1(i32 %a, float %y, i32 %old, ptr } define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) { +; GFX1170-LABEL: test_cvt_sr_bf8_f32_byte0: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: global_store_b32 v[3:4], v2, off +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_cvt_sr_bf8_f32_byte0: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_sr_bf8_f32_e64_dpp v2, v0, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -134,6 +177,12 @@ define amdgpu_cs void @test_cvt_sr_bf8_f32_byte0(i32 %a, i32 %r, i32 %old, ptr a } define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) { +; GFX1170-LABEL: test_cvt_sr_fp8_f32_byte1: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: global_store_b32 v[3:4], v2, off +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte1: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -155,6 +204,12 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte1(i32 %a, i32 %r, i32 %old, ptr a } define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr addrspace(1) %out) { +; GFX1170-LABEL: test_cvt_sr_fp8_f32_byte2: +; GFX1170: ; %bb.0: +; GFX1170-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 +; GFX1170-NEXT: global_store_b32 v[3:4], v2, off +; GFX1170-NEXT: s_endpgm +; ; GFX12-LABEL: test_cvt_sr_fp8_f32_byte2: ; GFX12: ; %bb.0: ; GFX12-NEXT: v_cvt_sr_fp8_f32_e64_dpp v2, v0, v1 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 @@ -187,3 +242,5 @@ declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1 attributes #0 = { nounwind convergent } attributes #1 = { nounwind readnone convergent } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX1170PLUS: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir index b1e23808e91a9..ce36dd0aa26f9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.mir @@ -1,6 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=gcn-dpp-combine %s -o - | FileCheck -check-p... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/180191 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
