llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> Handle this here instead of DAGCombine, mostly because the f32 case is handled here due to the dependency on !fpmath. Also we can take advantage of computeKnownFPClass. --- Patch is 604.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/172053.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp (+129-12) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll (+309-56) - (modified) llvm/test/CodeGen/AMDGPU/rsq.f64.ll (+5716-4375) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 71ea9ef6fc050..e45d0652a65ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -236,6 +236,9 @@ class AMDGPUCodeGenPrepareImpl FastMathFlags FMF) const; Value *emitSqrtIEEE2ULP(IRBuilder<> &Builder, Value *Src, FastMathFlags FMF) const; + Value *emitRsqF64(IRBuilder<> &Builder, Value *X, FastMathFlags SqrtFMF, + FastMathFlags DivFMF, const Instruction *CtxI, + bool IsNegative) const; bool tryNarrowMathIfNoOverflow(Instruction *I); @@ -605,6 +608,94 @@ static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src, return Builder.CreateFMul(Rsq, OutputScaleFactor); } +/// Emit inverse sqrt expansion for f64 with a correction sequence on top of +/// v_rsq_f64. This should give a 1ulp result. +Value *AMDGPUCodeGenPrepareImpl::emitRsqF64(IRBuilder<> &Builder, Value *X, + FastMathFlags SqrtFMF, + FastMathFlags DivFMF, + const Instruction *CtxI, + bool IsNegative) const { + // rsq(x): + // double y0 = BUILTIN_AMDGPU_RSQRT_F64(x); + // double e = MATH_MAD(-y0 * (x == PINF_F64 || x == 0.0 ? y0 : x), y0, 1.0); + // return MATH_MAD(y0*e, MATH_MAD(e, 0.375, 0.5), y0); + // + // The rsq instruction handles the special cases correctly. We need to check + // for the edge case conditions to ensure the special case propagates through + // the later instructions. + + Value *Y0 = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, X); + + // Try to elide the edge case check. + // + // Fast math flags imply: + // sqrt ninf => !isinf(x) + // sqrt nnan => not helpful + // fdiv ninf => x != 0, !isinf(x) + // fdiv nnan => x != 0 + bool MaybePosInf = !SqrtFMF.noInfs() && !DivFMF.noInfs(); + bool MaybeZero = !DivFMF.noInfs() && !DivFMF.noNaNs(); + + DenormalMode DenormMode; + FPClassTest Interested = fcNone; + if (MaybeZero) + Interested = fcZero; + if (MaybePosInf) + Interested = fcPosInf; + + if (Interested != fcNone) { + KnownFPClass KnownSrc = computeKnownFPClass(X, Interested, CtxI); + if (KnownSrc.isKnownNeverPosInfinity()) + MaybePosInf = false; + + DenormMode = F.getDenormalMode(X->getType()->getFltSemantics()); + if (KnownSrc.isKnownNeverLogicalZero(DenormMode)) + MaybeZero = false; + } + + Value *SpecialOrRsq = Y0; + if (MaybeZero || MaybePosInf) { + Value *Cond; + if (MaybePosInf && MaybeZero) { + if (DenormMode.Input != DenormalMode::DenormalModeKind::Dynamic) { + FPClassTest TestMask = fcPosInf | fcZero; + if (DenormMode.inputsAreZero()) + TestMask |= fcSubnormal; + + Cond = Builder.createIsFPClass(X, TestMask); + } else { + // Avoid using llvm.is.fpclass for dynamic denormal mode, since it + // doesn't respect the floating-point environment. + Value *IsZero = + Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType())); + Value *IsInf = + Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType())); + Cond = Builder.CreateOr(IsZero, IsInf); + } + } else if (MaybeZero) { + Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getZero(X->getType())); + } else { + Cond = Builder.CreateFCmpOEQ(X, ConstantFP::getInfinity(X->getType())); + } + + SpecialOrRsq = Builder.CreateSelect(Cond, Y0, X); + } + + Value *NegY0 = Builder.CreateFNeg(Y0); + Value *NegXY0 = Builder.CreateFMul(NegY0, SpecialOrRsq); + + // Could be fmuladd, but isFMAFasterThanFMulAndFAdd is always true for f64. + Value *E = Builder.CreateFMA(NegXY0, Y0, ConstantFP::get(X->getType(), 1.0)); + Value *Y0E = Builder.CreateFMul(Y0, E); + + Value *EFMA = Builder.CreateFMA(E, ConstantFP::get(X->getType(), 0.375), + ConstantFP::get(X->getType(), 0.5)); + if (IsNegative) + EFMA = Builder.CreateFNeg(EFMA); + + return Builder.CreateFMA(Y0E, EFMA, Y0); +} + bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp, FastMathFlags DivFMF, FastMathFlags SqrtFMF) const { @@ -612,8 +703,22 @@ bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp, if (!DivFMF.allowContract() || !SqrtFMF.allowContract()) return false; - // v_rsq_f32 gives 1ulp - return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f; + Type *EltTy = SqrtOp->getType()->getScalarType(); + switch (EltTy->getTypeID()) { + case Type::FloatTyID: + // v_rsq_f32 gives 1ulp + // Separate correctly rounded fdiv + sqrt give ~1.81 ulp. + + // FIXME: rsq formation should not depend on approx func or the fpmath + // accuracy. This strictly improves precision. + return SqrtFMF.approxFunc() || SqrtOp->getFPAccuracy() >= 1.0f; + case Type::DoubleTyID: + return true; + default: + return false; + } + + llvm_unreachable("covered switch"); } Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq( @@ -629,8 +734,6 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq( if (!CLHS) return nullptr; - assert(Den->getType()->isFloatTy()); - bool IsNegative = false; // TODO: Handle other numerator values with arcp. @@ -639,14 +742,20 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq( IRBuilder<>::FastMathFlagGuard Guard(Builder); Builder.setFastMathFlags(DivFMF | SqrtFMF); - if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || - canIgnoreDenormalInput(Den, CtxI)) { - Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den); - // -1.0 / sqrt(x) -> fneg(rsq(x)) - return IsNegative ? Builder.CreateFNeg(Result) : Result; + if (Den->getType()->isFloatTy()) { + if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) || + canIgnoreDenormalInput(Den, CtxI)) { + Value *Result = + Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den); + // -1.0 / sqrt(x) -> fneg(rsq(x)) + return IsNegative ? Builder.CreateFNeg(Result) : Result; + } + + return emitRsqIEEE1ULP(Builder, Den, IsNegative); } - return emitRsqIEEE1ULP(Builder, Den, IsNegative); + if (Den->getType()->isDoubleTy()) + return emitRsqF64(Builder, Den, SqrtFMF, DivFMF, CtxI, IsNegative); } return nullptr; @@ -758,6 +867,9 @@ Value *AMDGPUCodeGenPrepareImpl::visitFDivElement( return Rsq; } + if (!Num->getType()->isFloatTy()) + return nullptr; + Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst); if (Rcp) return Rcp; @@ -793,7 +905,8 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) { return false; Type *Ty = FDiv.getType()->getScalarType(); - if (!Ty->isFloatTy()) + const bool IsFloat = Ty->isFloatTy(); + if (!IsFloat && !Ty->isDoubleTy()) return false; // The f64 rcp/rsq approximations are pretty inaccurate. We can do an @@ -818,6 +931,10 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) { RsqOp = SqrtOp->getOperand(0); } + // rcp path not yet implemented for f64. + if (!IsFloat && !RsqOp) + return false; + // Inaccurate rcp is allowed with afn. // // Defer to codegen to handle this. @@ -832,7 +949,7 @@ bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) { return false; // Defer the correct implementations to codegen. - if (ReqdAccuracy < 1.0f) + if (IsFloat && ReqdAccuracy < 1.0f) return false; IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator())); diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll index b97cd91f2ab32..764b10a7d1987 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.f64.ll @@ -4,8 +4,15 @@ define double @rsq_f64(double %x) { ; CHECK-LABEL: define double @rsq_f64( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608) +; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract double @llvm.sqrt.f64(double %x) @@ -16,8 +23,16 @@ define double @rsq_f64(double %x) { define double @neg_rsq_f64(double %x) { ; CHECK-LABEL: define double @neg_rsq_f64( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double -1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.is.fpclass.f64(double [[X]], i32 608) +; CHECK-NEXT: [[TMP3:%.*]] = select contract i1 [[TMP2]], double [[TMP1]], double [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = fneg contract double [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul contract double [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = call contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = fmul contract double [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[TMP9:%.*]] = fneg contract double [[TMP8]] +; CHECK-NEXT: [[FDIV:%.*]] = call contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract double @llvm.sqrt.f64(double %x) @@ -28,8 +43,15 @@ define double @neg_rsq_f64(double %x) { define double @rsq_f64_nnan(double %x) { ; CHECK-LABEL: define double @rsq_f64_nnan( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double 1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000 +; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x) @@ -40,8 +62,16 @@ define double @rsq_f64_nnan(double %x) { define double @neg_rsq_f64_nnan(double %x) { ; CHECK-LABEL: define double @neg_rsq_f64_nnan( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan contract double -1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan contract oeq double [[X]], 0x7FF0000000000000 +; CHECK-NEXT: [[TMP3:%.*]] = select nnan contract i1 [[TMP2]], double [[TMP1]], double [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan contract double [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan contract double [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan contract double [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[TMP9:%.*]] = fneg nnan contract double [[TMP8]] +; CHECK-NEXT: [[FDIV:%.*]] = call nnan contract double @llvm.fma.f64(double [[TMP7]], double [[TMP9]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract nnan double @llvm.sqrt.f64(double %x) @@ -52,8 +82,13 @@ define double @neg_rsq_f64_nnan(double %x) { define double @rsq_f64_ninf(double %x) { ; CHECK-LABEL: define double @rsq_f64_ninf( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double 1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x) @@ -64,8 +99,14 @@ define double @rsq_f64_ninf(double %x) { define double @neg_rsq_f64_ninf(double %x) { ; CHECK-LABEL: define double @neg_rsq_f64_ninf( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call ninf contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv ninf contract double -1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call ninf contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg ninf contract double [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul ninf contract double [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP5:%.*]] = fmul ninf contract double [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[TMP7:%.*]] = fneg ninf contract double [[TMP6]] +; CHECK-NEXT: [[FDIV:%.*]] = call ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract ninf double @llvm.sqrt.f64(double %x) @@ -76,8 +117,13 @@ define double @neg_rsq_f64_ninf(double %x) { define double @rsq_f64_nnan_ninf(double %x) { ; CHECK-LABEL: define double @rsq_f64_nnan_ninf( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x) @@ -88,8 +134,14 @@ define double @rsq_f64_nnan_ninf(double %x) { define double @neg_rsq_f64_nnan_ninf(double %x) { ; CHECK-LABEL: define double @neg_rsq_f64_nnan_ninf( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double -1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ninf contract double [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul nnan ninf contract double [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP3]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP4]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[TMP7:%.*]] = fneg nnan ninf contract double [[TMP6]] +; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP7]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x) @@ -100,8 +152,15 @@ define double @neg_rsq_f64_nnan_ninf(double %x) { define double @rsq_f64_sqrt_nnan_ninf(double %x) { ; CHECK-LABEL: define double @rsq_f64_sqrt_nnan_ninf( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call nnan ninf contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv contract double 1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan ninf contract oeq double [[X]], 0.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = select nnan ninf contract i1 [[TMP2]], double [[TMP1]], double [[X]] +; CHECK-NEXT: [[TMP4:%.*]] = fneg nnan ninf contract double [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul nnan ninf contract double [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP5]], double [[TMP1]], double 1.000000e+00) +; CHECK-NEXT: [[TMP7:%.*]] = fmul nnan ninf contract double [[TMP1]], [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP6]], double 3.750000e-01, double 5.000000e-01) +; CHECK-NEXT: [[FDIV:%.*]] = call nnan ninf contract double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP1]]) ; CHECK-NEXT: ret double [[FDIV]] ; %sqrt.x = call contract nnan ninf double @llvm.sqrt.f64(double %x) @@ -112,8 +171,13 @@ define double @rsq_f64_sqrt_nnan_ninf(double %x) { define double @rsq_f64_fdiv_nnan_ninf(double %x) { ; CHECK-LABEL: define double @rsq_f64_fdiv_nnan_ninf( ; CHECK-SAME: double [[X:%.*]]) { -; CHECK-NEXT: [[SQRT_X:%.*]] = call contract double @llvm.sqrt.f64(double [[X]]) -; CHECK-NEXT: [[FDIV:%.*]] = fdiv nnan ninf contract double 1.000000e+00, [[SQRT_X]] +; CHECK-NEXT: [[TMP1:%.*]] = call nnan ninf contract double @llvm.amdgcn.rsq.f64(double [[X]]) +; CHECK-NEXT: [[TMP2:%.*]] = fneg nnan ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/172053 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
