llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: None (llvmbot) <details> <summary>Changes</summary> Backport c253b9f Requested by: @<!-- -->shiltian --- Full diff: https://github.com/llvm/llvm-project/pull/177365.diff 17 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+8) - (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+18-1) - (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+2) - (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+20) - (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp (+8) - (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+1) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+3) - (modified) llvm/lib/Target/AMDGPU/SIRegisterInfo.td (+2) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+37) - (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+8) - (modified) llvm/lib/Target/AMDGPU/VOP2Instructions.td (+6-1) - (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s (+3) - (modified) llvm/test/MC/AMDGPU/gfx11_asm_vop2.s (+3) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s (+4-1) - (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop2.s (+4-1) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt (+3) - (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt (+3) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 7a91a40e18cde..fa4b790b88a79 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -666,6 +666,8 @@ class AMDGPUOperand : public MCParsedAsmOperand { bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); } + bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); } + bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); } bool isVISrcB32() const { @@ -2044,6 +2046,7 @@ static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_KIMM16: return &APFloat::IEEEhalf(); @@ -2438,6 +2441,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_IMM_V2INT32: @@ -2480,6 +2484,7 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_V2FP32: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: @@ -3725,6 +3730,9 @@ bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) return AMDGPU::isInlinableLiteralV2F16(Val); + if (OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT) + return AMDGPU::isPKFMACF16InlineConstant(Val, isGFX11Plus()); + if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2BF16 || OperandType == AMDGPU::OPERAND_REG_IMM_V2BF16) return AMDGPU::isInlinableLiteralV2BF16(Val); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index dd3120f05ce26..cc03fb988ddbb 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -528,12 +528,26 @@ void AMDGPUDisassembler::decodeImmOperands(MCInst &MI, break; case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_IMM_INT16: - case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_FP16: case AMDGPU::OPERAND_REG_INLINE_C_INT16: + Imm = getInlineImmValF16(Imm); + break; + case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: Imm = getInlineImmValF16(Imm); break; + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: { + // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both + // halves, so we need to produce the duplicated value for correct + // round-trip. + if (isGFX11Plus()) { + int64_t F16Val = getInlineImmValF16(Imm); + Imm = (F16Val << 16) | (F16Val & 0xFFFF); + } else { + Imm = getInlineImmValF16(Imm); + } + break; + } case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_INT64: case AMDGPU::OPERAND_REG_INLINE_AC_FP64: @@ -1597,6 +1611,9 @@ AMDGPUDisassembler::decodeLiteralConstant(const MCInstrDesc &Desc, case AMDGPU::OPERAND_REG_IMM_V2FP16: UseLit = AMDGPU::isInlinableLiteralV2F16(Val); break; + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: + UseLit = AMDGPU::isPKFMACF16InlineConstant(Val, isGFX11Plus()); + break; case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: break; case AMDGPU::OPERAND_REG_IMM_INT16: diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index dad1ba7af9cf6..6d0c1bcbf1abe 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -374,6 +374,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return (Generation)Gen; } + bool isGFX11Plus() const { return getGeneration() >= GFX11; } + unsigned getMaxWaveScratchSize() const { // See COMPUTE_TMPRING_SIZE.WAVESIZE. if (getGeneration() >= GFX12) { diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index b63d71dc2fde9..5a00cb8a4b6cb 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -623,6 +623,25 @@ void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType, printImmediateFP16(static_cast<uint16_t>(Imm), STI, O)) return; break; + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: { + if (AMDGPU::isGFX11Plus(STI)) { + // For GFX11+, the inline constant is duplicated to both channels, so we + // need to check if the low and high 16 bits are the same, and then if + // they can be printed as inline constant values. + uint16_t Lo16 = static_cast<uint16_t>(Imm & 0xFFFF); + uint16_t Hi16 = static_cast<uint16_t>((Imm >> 16) & 0xFFFF); + if (Lo16 == Hi16 && + printImmediateFP16(static_cast<uint16_t>(Imm), STI, O)) + return; + } else { + // For pre-GFX11, the inline constant is in the low 16 bits, so we need + // to check if it can be printed as inline constant value. + if (isUInt<16>(Imm) && + printImmediateFP16(static_cast<uint16_t>(Imm), STI, O)) + return; + } + break; + } case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: if (isUInt<16>(Imm) && @@ -867,6 +886,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo, case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp index 49e94183202bd..5b731cdf6d05f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp @@ -346,6 +346,14 @@ std::optional<uint64_t> AMDGPUMCCodeEmitter::getLitEncoding( return AMDGPU::getInlineEncodingV2F16(static_cast<uint32_t>(Imm)) .value_or(255); + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: + // V_PK_FMAC_F16 has different inline constant behavior on pre-GFX11 vs + // GFX11+: pre-GFX11 produces (f16, 0), GFX11+ duplicates f16 to both + // halves. + return AMDGPU::getPKFMACF16InlineEncoding(static_cast<uint32_t>(Imm), + AMDGPU::isGFX11Plus(STI)) + .value_or(255); + case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: return AMDGPU::getInlineEncodingV2BF16(static_cast<uint32_t>(Imm)) diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index b9ee9c7015061..a7721cdad08bf 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -207,6 +207,7 @@ enum OperandType : unsigned { OPERAND_REG_IMM_FP16, OPERAND_REG_IMM_V2BF16, OPERAND_REG_IMM_V2FP16, + OPERAND_REG_IMM_V2FP16_SPLAT, OPERAND_REG_IMM_V2INT16, OPERAND_REG_IMM_NOINLINE_V2FP16, OPERAND_REG_IMM_V2INT32, diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index bd6c58d0f8945..513145f83994a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4666,6 +4666,8 @@ bool SIInstrInfo::isInlineConstant(int64_t Imm, uint8_t OperandType) const { case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: return AMDGPU::isInlinableLiteralV2F16(Imm); + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: + return AMDGPU::isPKFMACF16InlineConstant(Imm, ST.isGFX11Plus()); case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: return AMDGPU::isInlinableLiteralV2BF16(Imm); @@ -5133,6 +5135,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2INT32: case AMDGPU::OPERAND_REG_IMM_V2BF16: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index 4763b5f57b8c8..3009440c75161 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -1429,6 +1429,8 @@ def VSrc_v2f32 : SrcRegOrImm9 <VS_64_AlignTarget, "OPERAND_REG_IMM_V2FP32">; def VSrc_NoInline_v2f16 : SrcRegOrImm9 <VS_32, "OPERAND_REG_IMM_NOINLINE_V2FP16">; +def VSrc_v2f16_splat : SrcRegOrImm9 <VS_32, "OPERAND_REG_IMM_V2FP16_SPLAT">; + //===----------------------------------------------------------------------===// // VRegSrc_* Operands with a VGPR //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 4ad3a5cd1d727..10cdae63d602f 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -2807,6 +2807,7 @@ bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { case AMDGPU::OPERAND_REG_IMM_FP64: case AMDGPU::OPERAND_REG_IMM_FP16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_FP32: case AMDGPU::OPERAND_REG_INLINE_C_FP64: @@ -3168,6 +3169,34 @@ std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal) { return getInlineEncodingV216(true, Literal); } +// Encoding of the literal as an inline constant for V_PK_FMAC_F16 instruction +// or nullopt. This accounts for different inline constant behavior: +// - Pre-GFX11: fp16 inline constants have the value in low 16 bits, 0 in high +// - GFX11+: fp16 inline constants are duplicated into both halves +std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal, + bool IsGFX11Plus) { + // Pre-GFX11 behavior: f16 in low bits, 0 in high bits + if (!IsGFX11Plus) + return getInlineEncodingV216(/*IsFloat=*/true, Literal); + + // GFX11+ behavior: f16 duplicated in both halves + // First, check for sign-extended integer inline constants (-16 to 64) + // These work the same across all generations + int32_t Signed = static_cast<int32_t>(Literal); + if (Signed >= 0 && Signed <= 64) + return 128 + Signed; + + if (Signed >= -16 && Signed <= -1) + return 192 + std::abs(Signed); + + // For float inline constants on GFX11+, both halves must be equal + uint16_t Lo = static_cast<uint16_t>(Literal); + uint16_t Hi = static_cast<uint16_t>(Literal >> 16); + if (Lo != Hi) + return std::nullopt; + return getInlineEncodingV216(/*IsFloat=*/true, Lo); +} + // Whether the given literal can be inlined for a V_PK_* instruction. bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { switch (OpType) { @@ -3177,6 +3206,8 @@ bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType) { case AMDGPU::OPERAND_REG_IMM_V2FP16: case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: return getInlineEncodingV216(true, Literal).has_value(); + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: + llvm_unreachable("OPERAND_REG_IMM_V2FP16_SPLAT is not supported"); case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_INLINE_C_V2BF16: return isInlinableLiteralV2BF16(Literal); @@ -3202,6 +3233,11 @@ bool isInlinableLiteralV2F16(uint32_t Literal) { return getInlineEncodingV2F16(Literal).has_value(); } +// Whether the given literal can be inlined for V_PK_FMAC_F16 instruction. +bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus) { + return getPKFMACF16InlineEncoding(Literal, IsGFX11Plus).has_value(); +} + bool isValid32BitLiteral(uint64_t Val, bool IsFP64) { if (IsFP64) return !Lo_32(Val); @@ -3223,6 +3259,7 @@ int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit) { case OPERAND_REG_IMM_INT32: case OPERAND_REG_IMM_V2BF16: case OPERAND_REG_IMM_V2FP16: + case OPERAND_REG_IMM_V2FP16_SPLAT: case OPERAND_REG_IMM_V2FP32: case OPERAND_REG_IMM_V2INT16: case OPERAND_REG_IMM_V2INT32: diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 770f9a86dc883..835ebfad9330d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1702,6 +1702,7 @@ inline unsigned getOperandSize(const MCOperandInfo &OpInfo) { case AMDGPU::OPERAND_REG_IMM_V2INT16: case AMDGPU::OPERAND_REG_IMM_V2BF16: case AMDGPU::OPERAND_REG_IMM_V2FP16: + case AMDGPU::OPERAND_REG_IMM_V2FP16_SPLAT: case AMDGPU::OPERAND_REG_IMM_NOINLINE_V2FP16: return 2; @@ -1747,6 +1748,10 @@ std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal); LLVM_READNONE std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal); +LLVM_READNONE +std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal, + bool IsGFX11Plus); + LLVM_READNONE bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType); @@ -1759,6 +1764,9 @@ bool isInlinableLiteralV2BF16(uint32_t Literal); LLVM_READNONE bool isInlinableLiteralV2F16(uint32_t Literal); +LLVM_READNONE +bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus); + LLVM_READNONE bool isValid32BitLiteral(uint64_t Val, bool IsFP64); diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 42e4fe7fe26af..799bdb8b18c9c 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1348,10 +1348,15 @@ let isCommutable = 1 in def V_FMAAK_F64 : VOP2_Pseudo<"v_fmaak_f64", VOP_MADAK_F64, [], "">; } // End SubtargetPredicate = HasFmaakFmamkF64Insts, isReMaterializable = 1, FixedSize = 1, Size = 12, SchedRW = [Write64Bit] +// A dedicated profile for V_PK_FMAC_F16. +def VOP_V2F16_V2F16_V2F16_SPLAT : VOPProfile <[v2f16, v2f16, v2f16, untyped]> { + let Src0RC32 = VSrc_v2f16_splat; +} + let SubtargetPredicate = HasPkFmacF16Inst in { // FIXME: V_PK_FMAC_F16 is currently not used in instruction selection. // If this changes, ensure the DPP variant is not used for GFX11+. -defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16>; +defm V_PK_FMAC_F16 : VOP2Inst<"v_pk_fmac_f16", VOP_V2F16_V2F16_V2F16_SPLAT>; } // End SubtargetPredicate = HasPkFmacF16Inst // Note: 16-bit instructions produce a 0 result in the high 16-bits diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s index f05178dae37c9..bd670eb88c903 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2-fake16.s @@ -1916,6 +1916,9 @@ v_pk_fmac_f16 v5, -1, v2 // GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] v_pk_fmac_f16 v5, 0.5, v2 +// GFX11: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00] + +v_pk_fmac_f16 v5, 0x38003800, v2 // GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] v_pk_fmac_f16 v5, exec_hi, v2 diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s index fbc6713245398..c60240362cc0f 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop2.s @@ -2039,6 +2039,9 @@ v_pk_fmac_f16 v5, -1, v2 // GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] v_pk_fmac_f16 v5, 0.5, v2 +// GFX11: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00] + +v_pk_fmac_f16 v5, 0x38003800, v2 // GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] v_pk_fmac_f16 v5, src_scc, v2 diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s index 6c9c4c60e9817..c535adea8b821 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2-fake16.s @@ -1922,7 +1922,10 @@ v_pk_fmac_f16 v5, -1, v2 // GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] v_pk_fmac_f16 v5, 0.5, v2 -// GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] +// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00] + +v_pk_fmac_f16 v5, 0x38003800, v2 +// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78] v_pk_fmac_f16 v5, exec_hi, v2 // GFX12: v_pk_fmac_f16 v5, exec_hi, v2 ; encoding: [0x7f,0x04,0x0a,0x78] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s index e57d2c3e74d70..828430d2b2b95 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop2.s @@ -2048,7 +2048,10 @@ v_pk_fmac_f16 v5, -1, v2 // GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] v_pk_fmac_f16 v5, 0.5, v2 -// GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] +// GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00] + +v_pk_fmac_f16 v5, 0x38003800, v2 +// GFX11: v_pk_fmac_f16 v5, 0x38003800, v2 ; encoding: [0xf0,0x04,0x0a,0x78] v_pk_fmac_f16 v5, src_scc, v2 // GFX12: v_pk_fmac_f16 v5, src_scc, v2 ; encoding: [0xfd,0x04,0x0a,0x78] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt index 9fc3f619529a2..c9ef581fbfb20 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop2.txt @@ -1843,6 +1843,9 @@ 0xc1,0x04,0x0a,0x78 # GFX11: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] +0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00 +# GFX11: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00] + 0xf0,0x04,0x0a,0x78 # GFX11: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt index 71ac49b8a469a..05e3291dea0a2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop2.txt @@ -1921,6 +1921,9 @@ 0xc1,0x04,0x0a,0x78 # GFX12: v_pk_fmac_f16 v5, -1, v2 ; encoding: [0xc1,0x04,0x0a,0x78] +0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00 +# GFX12: v_pk_fmac_f16 v5, 0x3800, v2 ; encoding: [0xff,0x04,0x0a,0x78,0x00,0x38,0x00,0x00] + 0xf0,0x04,0x0a,0x78 # GFX12: v_pk_fmac_f16 v5, 0.5, v2 ; encoding: [0xf0,0x04,0x0a,0x78] `````````` </details> https://github.com/llvm/llvm-project/pull/177365 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
