Author: Kewen Meng Date: 2026-01-29T21:17:25-08:00 New Revision: 8b9e72f78bafc9d8c4d3f30dbd4c77128b8e4a2a
URL: https://github.com/llvm/llvm-project/commit/8b9e72f78bafc9d8c4d3f30dbd4c77128b8e4a2a DIFF: https://github.com/llvm/llvm-project/commit/8b9e72f78bafc9d8c4d3f30dbd4c77128b8e4a2a.diff LOG: Revert "[AMDGPU] Replace AMDGPUISD::FFBH_I32 with ISD::CTLS (#178420)" This reverts commit 65925b0405009177a4ede0b79bcfebfb48329b1e. Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp llvm/lib/Target/AMDGPU/SIISelLowering.cpp llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5035cc6926546..0c9ae45f648e9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3346,7 +3346,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), OppositeSign); // Count the leading sign bits. - ShAmt = DAG.getNode(ISD::CTLS, SL, MVT::i32, Hi); + ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi); // Different from unsigned conversion, the shift should be one bit less to // preserve the sign bit. ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index eef084214e237..8a43c2da38346 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -320,6 +320,7 @@ def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; // ctlz with -1 if input is zero. def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>; +def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>; // cttz with -1 if input is zero. def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>; @@ -486,7 +487,7 @@ def AMDGPUdiv_fixup : PatFrags<(ops node:$src0, node:$src1, node:$src2), def AMDGPUffbh_i32 : PatFrags<(ops node:$src), [(int_amdgcn_sffbh node:$src), - (ctls node:$src)]>; + (AMDGPUffbh_i32_impl node:$src)]>; def AMDGPUffbh_u32 : PatFrags<(ops node:$src), [(ctlz_zero_undef node:$src), diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 93313a8912a50..559fb041c424d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1359,12 +1359,6 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .widenScalarToNextPow2(0, 32) .widenScalarToNextPow2(1, 32); - getActionDefinitionsBuilder(G_CTLS) - .legalFor({{S32, S32}}) - .clampScalar(0, S32, S32) - .clampScalar(1, S32, S32) - .scalarize(0); - // S64 is only legal on SALU, and needs to be broken into 32-bit elements in // RegBankSelect. getActionDefinitionsBuilder(G_BITREVERSE) @@ -2768,7 +2762,8 @@ bool AMDGPULegalizerInfo::legalizeITOFP( auto X = B.buildXor(S32, Unmerge.getReg(0), Unmerge.getReg(1)); auto OppositeSign = B.buildAShr(S32, X, ThirtyOne); auto MaxShAmt = B.buildAdd(S32, ThirtyTwo, OppositeSign); - auto LS = B.buildCTLS(S32, Unmerge.getReg(1)); + auto LS = B.buildIntrinsic(Intrinsic::amdgcn_sffbh, {S32}) + .addUse(Unmerge.getReg(1)); auto LS2 = B.buildSub(S32, LS, One); ShAmt = B.buildUMin(S32, LS2, MaxShAmt); } else diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index fd946c36d4dbf..949cc69e2402c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4289,13 +4289,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(BankID, Size); break; } - case AMDGPU::G_CTLS: { - unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); - unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI); - OpdsMapping[0] = AMDGPU::getValueMapping(BankID, 32); - OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); - break; - } case AMDGPU::G_CTPOP: { unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); unsigned BankID = getRegBankID(MI.getOperand(1).getReg(), MRI); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b82000e3c1968..e8891b6a603b4 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -516,7 +516,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom); setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom); - setOperationAction(ISD::CTLS, MVT::i32, Legal); // We only really have 32-bit BFE instructions (and 16-bit on VI). // @@ -10191,7 +10190,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::amdgcn_sffbh: - return DAG.getNode(ISD::CTLS, DL, VT, Op.getOperand(1)); + return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1)); case Intrinsic::amdgcn_sbfe: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir index f6107900984b9..4cbdea64f1c00 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -123,7 +123,7 @@ body: | ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) @@ -145,7 +145,7 @@ body: | ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) @@ -467,7 +467,7 @@ body: | ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32) @@ -490,7 +490,7 @@ body: | ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT_INREG]], [[UMIN]](s32) @@ -524,7 +524,7 @@ body: | ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) @@ -548,7 +548,7 @@ body: | ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV]], [[UV1]] ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV1]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV1]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[UMIN]](s32) @@ -585,7 +585,7 @@ body: | ; GFX6-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]] ; GFX6-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX6-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV3]](s32) + ; GFX6-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32) ; GFX6-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX6-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32) @@ -600,7 +600,7 @@ body: | ; GFX6-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]] ; GFX6-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32) ; GFX6-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]] - ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_CTLS [[UV7]](s32) + ; GFX6-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) ; GFX6-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]] ; GFX6-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]] ; GFX6-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) @@ -630,7 +630,7 @@ body: | ; GFX8-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[UV2]], [[UV3]] ; GFX8-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[XOR]], [[C2]](s32) ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR]] - ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_CTLS [[UV3]](s32) + ; GFX8-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV3]](s32) ; GFX8-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[INT]], [[C1]] ; GFX8-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[SUB]], [[ADD]] ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[UMIN]](s32) @@ -645,7 +645,7 @@ body: | ; GFX8-NEXT: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[UV6]], [[UV7]] ; GFX8-NEXT: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[XOR1]], [[C2]](s32) ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[C]], [[ASHR1]] - ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_CTLS [[UV7]](s32) + ; GFX8-NEXT: [[INT1:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.sffbh), [[UV7]](s32) ; GFX8-NEXT: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[INT1]], [[C1]] ; GFX8-NEXT: [[UMIN2:%[0-9]+]]:_(s32) = G_UMIN [[SUB2]], [[ADD1]] ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[UMIN2]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll index 328ce6ce037b6..9896e5f4c8cae 100644 --- a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll +++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll @@ -18,7 +18,7 @@ define amdgpu_kernel void @select_constant_cttz(ptr addrspace(1) noalias %out, p ; GCN-NEXT: s_and_b64 s[6:7], s[4:5], exec ; GCN-NEXT: s_cselect_b32 s2, -1, s2 ; GCN-NEXT: s_flbit_i32 s6, s2 -; GCN-NEXT: s_xor_b32 s8, s6, 31 +; GCN-NEXT: s_sub_i32 s8, 31, s6 ; GCN-NEXT: s_cmp_eq_u32 s2, 0 ; GCN-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
