llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-analysis Author: None (llvmbot) <details> <summary>Changes</summary> Backport 004c67ea257039e4e98abc26dd4ac6e8f3d7a171 Requested by: @<!-- -->fhahn --- Patch is 66.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149736.diff 16 Files Affected: - (modified) llvm/include/llvm/Analysis/IVDescriptors.h (+3) - (modified) llvm/lib/Analysis/IVDescriptors.cpp (+23-3) - (modified) llvm/lib/Transforms/Utils/LoopUtils.cpp (+8-2) - (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h (+11-1) - (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+14-4) - (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+6) - (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+1) - (modified) llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp (+160) - (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+6-2) - (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+6) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll (+48-7) - (modified) llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll (+48-7) - (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll (+48-7) - (modified) llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll (+263-9) - (modified) llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll (+80-14) - (modified) llvm/test/Transforms/LoopVectorize/minmax_reduction.ll (+6-2) ``````````diff diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h index b985292ccee40..1dc73205a0ebb 100644 --- a/llvm/include/llvm/Analysis/IVDescriptors.h +++ b/llvm/include/llvm/Analysis/IVDescriptors.h @@ -47,6 +47,8 @@ enum class RecurKind { FMul, ///< Product of floats. FMin, ///< FP min implemented in terms of select(cmp()). FMax, ///< FP max implemented in terms of select(cmp()). + FMinNum, ///< FP min with llvm.minnum semantics including NaNs. + FMaxNum, ///< FP max with llvm.maxnum semantics including NaNs. FMinimum, ///< FP min with llvm.minimum semantics FMaximum, ///< FP max with llvm.maximum semantics FMinimumNum, ///< FP min with llvm.minimumnum semantics @@ -250,6 +252,7 @@ class RecurrenceDescriptor { /// Returns true if the recurrence kind is a floating-point min/max kind. static bool isFPMinMaxRecurrenceKind(RecurKind Kind) { return Kind == RecurKind::FMin || Kind == RecurKind::FMax || + Kind == RecurKind::FMinNum || Kind == RecurKind::FMaxNum || Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum || Kind == RecurKind::FMinimumNum || Kind == RecurKind::FMaximumNum; } diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp index 39f74beca082f..8be5de3bf356f 100644 --- a/llvm/lib/Analysis/IVDescriptors.cpp +++ b/llvm/lib/Analysis/IVDescriptors.cpp @@ -941,10 +941,30 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr( m_Intrinsic<Intrinsic::minimumnum>(m_Value(), m_Value())) || match(I, m_Intrinsic<Intrinsic::maximumnum>(m_Value(), m_Value())); }; - if (isIntMinMaxRecurrenceKind(Kind) || - (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind))) + if (isIntMinMaxRecurrenceKind(Kind)) return isMinMaxPattern(I, Kind, Prev); - else if (isFMulAddIntrinsic(I)) + if (isFPMinMaxRecurrenceKind(Kind)) { + InstDesc Res = isMinMaxPattern(I, Kind, Prev); + if (!Res.isRecurrence()) + return InstDesc(false, I); + if (HasRequiredFMF()) + return Res; + // We may be able to vectorize FMax/FMin reductions using maxnum/minnum + // intrinsics with extra checks ensuring the vector loop handles only + // non-NaN inputs. + if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value()))) { + assert(Kind == RecurKind::FMax && + "unexpected recurrence kind for maxnum"); + return InstDesc(I, RecurKind::FMaxNum); + } + if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value()))) { + assert(Kind == RecurKind::FMin && + "unexpected recurrence kind for minnum"); + return InstDesc(I, RecurKind::FMinNum); + } + return InstDesc(false, I); + } + if (isFMulAddIntrinsic(I)) return InstDesc(Kind == RecurKind::FMulAdd, I, I->hasAllowReassoc() ? nullptr : I); return InstDesc(false, I); diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 200d1fb854155..e7623aaff105d 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -938,8 +938,10 @@ constexpr Intrinsic::ID llvm::getReductionIntrinsicID(RecurKind RK) { case RecurKind::UMin: return Intrinsic::vector_reduce_umin; case RecurKind::FMax: + case RecurKind::FMaxNum: return Intrinsic::vector_reduce_fmax; case RecurKind::FMin: + case RecurKind::FMinNum: return Intrinsic::vector_reduce_fmin; case RecurKind::FMaximum: return Intrinsic::vector_reduce_fmaximum; @@ -1037,8 +1039,10 @@ Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) { case RecurKind::SMax: return Intrinsic::smax; case RecurKind::FMin: + case RecurKind::FMinNum: return Intrinsic::minnum; case RecurKind::FMax: + case RecurKind::FMaxNum: return Intrinsic::maxnum; case RecurKind::FMinimum: return Intrinsic::minimum; @@ -1096,9 +1100,9 @@ Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right) { Type *Ty = Left->getType(); if (Ty->isIntOrIntVectorTy() || - (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum || + (RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum || + RK == RecurKind::FMinimum || RK == RecurKind::FMaximum || RK == RecurKind::FMinimumNum || RK == RecurKind::FMaximumNum)) { - // TODO: Add float minnum/maxnum support when FMF nnan is set. Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK); return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr, "rdx.minmax"); @@ -1308,6 +1312,8 @@ Value *llvm::createSimpleReduction(IRBuilderBase &Builder, Value *Src, case RecurKind::UMin: case RecurKind::FMax: case RecurKind::FMin: + case RecurKind::FMinNum: + case RecurKind::FMaxNum: case RecurKind::FMinimum: case RecurKind::FMaximum: case RecurKind::FMinimumNum: diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 11853859484e3..f57ce0c3ccb4d 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -230,7 +230,6 @@ class VPBuilder { /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A /// and \p B. - /// TODO: add createFCmp when needed. VPInstruction *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { @@ -240,6 +239,17 @@ class VPBuilder { new VPInstruction(Instruction::ICmp, {A, B}, Pred, DL, Name)); } + /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A + /// and \p B. + VPInstruction *createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, + DebugLoc DL = DebugLoc::getUnknown(), + const Twine &Name = "") { + assert(Pred >= CmpInst::FIRST_FCMP_PREDICATE && + Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate"); + return tryInsertInstruction( + new VPInstruction(Instruction::FCmp, {A, B}, Pred, DL, Name)); + } + VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 06db89a89bc38..74f59a2f7f136 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4345,10 +4345,14 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization( ElementCount VF) const { - // Cross iteration phis such as reductions need special handling and are - // currently unsupported. - if (any_of(OrigLoop->getHeader()->phis(), - [&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); })) + // Cross iteration phis such as fixed-order recurrences and FMaxNum/FMinNum + // reductions need special handling and are currently unsupported. + if (any_of(OrigLoop->getHeader()->phis(), [&](PHINode &Phi) { + if (!Legal->isReductionVariable(&Phi)) + return Legal->isFixedOrderRecurrence(&Phi); + RecurKind RK = Legal->getRecurrenceDescriptor(&Phi).getRecurrenceKind(); + return RK == RecurKind::FMinNum || RK == RecurKind::FMaxNum; + })) return false; // Phis with uses outside of the loop require special handling and are @@ -8817,6 +8821,12 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // Adjust the recipes for any inloop reductions. adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start); + // Apply mandatory transformation to handle FP maxnum/minnum reduction with + // NaNs if possible, bail out otherwise. + if (!VPlanTransforms::runPass( + VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath, *Plan)) + return nullptr; + // Transform recipes to abstract recipes if it is legal and beneficial and // clamp the range for better cost estimation. // TODO: Enable following transform when the EVL-version of extended-reduction diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 31aec77db63c1..f6610ea5b333f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -23196,6 +23196,8 @@ class HorizontalReduction { case RecurKind::FindFirstIVUMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: + case RecurKind::FMaxNum: + case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: case RecurKind::None: @@ -23333,6 +23335,8 @@ class HorizontalReduction { case RecurKind::FindFirstIVUMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: + case RecurKind::FMaxNum: + case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: case RecurKind::None: @@ -23435,6 +23439,8 @@ class HorizontalReduction { case RecurKind::FindFirstIVUMin: case RecurKind::FindLastIVSMax: case RecurKind::FindLastIVUMax: + case RecurKind::FMaxNum: + case RecurKind::FMinNum: case RecurKind::FMaximumNum: case RecurKind::FMinimumNum: case RecurKind::None: diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index b27a7ffeed208..66657b98b094b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -84,6 +84,7 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) { return ResTy; } case Instruction::ICmp: + case Instruction::FCmp: case VPInstruction::ActiveLaneMask: assert(inferScalarType(R->getOperand(0)) == inferScalarType(R->getOperand(1)) && diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index 52eecb000d0c2..c71d70935b449 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -628,3 +628,163 @@ void VPlanTransforms::attachCheckBlock(VPlan &Plan, Value *Cond, Term->addMetadata(LLVMContext::MD_prof, BranchWeights); } } + +bool VPlanTransforms::handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan) { + auto GetMinMaxCompareValue = [](VPReductionPHIRecipe *RedPhiR) -> VPValue * { + auto *MinMaxR = dyn_cast<VPRecipeWithIRFlags>( + RedPhiR->getBackedgeValue()->getDefiningRecipe()); + if (!MinMaxR) + return nullptr; + + auto *RepR = dyn_cast<VPReplicateRecipe>(MinMaxR); + if (!isa<VPWidenIntrinsicRecipe>(MinMaxR) && + !(RepR && isa<IntrinsicInst>(RepR->getUnderlyingInstr()))) + return nullptr; + +#ifndef NDEBUG + Intrinsic::ID RdxIntrinsicId = + RedPhiR->getRecurrenceKind() == RecurKind::FMaxNum ? Intrinsic::maxnum + : Intrinsic::minnum; + assert((isa<VPWidenIntrinsicRecipe>(MinMaxR) && + cast<VPWidenIntrinsicRecipe>(MinMaxR)->getVectorIntrinsicID() == + RdxIntrinsicId) || + (RepR && + cast<IntrinsicInst>(RepR->getUnderlyingInstr())->getIntrinsicID() == + RdxIntrinsicId) && + "Intrinsic did not match recurrence kind"); +#endif + + if (MinMaxR->getOperand(0) == RedPhiR) + return MinMaxR->getOperand(1); + + assert(MinMaxR->getOperand(1) == RedPhiR && + "Reduction phi operand expected"); + return MinMaxR->getOperand(0); + }; + + VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); + VPReductionPHIRecipe *RedPhiR = nullptr; + bool HasUnsupportedPhi = false; + for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) { + if (isa<VPCanonicalIVPHIRecipe, VPWidenIntOrFpInductionRecipe>(&R)) + continue; + auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R); + if (!Cur) { + // TODO: Also support fixed-order recurrence phis. + HasUnsupportedPhi = true; + continue; + } + // For now, only a single reduction is supported. + // TODO: Support multiple MaxNum/MinNum reductions and other reductions. + if (RedPhiR) + return false; + if (Cur->getRecurrenceKind() != RecurKind::FMaxNum && + Cur->getRecurrenceKind() != RecurKind::FMinNum) { + HasUnsupportedPhi = true; + continue; + } + RedPhiR = Cur; + } + + if (!RedPhiR) + return true; + + // We won't be able to resume execution in the scalar tail, if there are + // unsupported header phis or there is no scalar tail at all, due to + // tail-folding. + if (HasUnsupportedPhi || !Plan.hasScalarTail()) + return false; + + VPValue *MinMaxOp = GetMinMaxCompareValue(RedPhiR); + if (!MinMaxOp) + return false; + + RecurKind RedPhiRK = RedPhiR->getRecurrenceKind(); + assert((RedPhiRK == RecurKind::FMaxNum || RedPhiRK == RecurKind::FMinNum) && + "unsupported reduction"); + + /// Check if the vector loop of \p Plan can early exit and restart + /// execution of last vector iteration in the scalar loop. This requires all + /// recipes up to early exit point be side-effect free as they are + /// re-executed. Currently we check that the loop is free of any recipe that + /// may write to memory. Expected to operate on an early VPlan w/o nested + /// regions. + for (VPBlockBase *VPB : vp_depth_first_shallow( + Plan.getVectorLoopRegion()->getEntryBasicBlock())) { + auto *VPBB = cast<VPBasicBlock>(VPB); + for (auto &R : *VPBB) { + if (R.mayWriteToMemory() && + !match(&R, m_BranchOnCount(m_VPValue(), m_VPValue()))) + return false; + } + } + + VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock(); + VPBuilder Builder(LatchVPBB->getTerminator()); + auto *LatchExitingBranch = cast<VPInstruction>(LatchVPBB->getTerminator()); + assert(LatchExitingBranch->getOpcode() == VPInstruction::BranchOnCount && + "Unexpected terminator"); + auto *IsLatchExitTaken = + Builder.createICmp(CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0), + LatchExitingBranch->getOperand(1)); + + VPValue *IsNaN = Builder.createFCmp(CmpInst::FCMP_UNO, MinMaxOp, MinMaxOp); + VPValue *AnyNaN = Builder.createNaryOp(VPInstruction::AnyOf, {IsNaN}); + auto *AnyExitTaken = + Builder.createNaryOp(Instruction::Or, {AnyNaN, IsLatchExitTaken}); + Builder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken); + LatchExitingBranch->eraseFromParent(); + + // If we exit early due to NaNs, compute the final reduction result based on + // the reduction phi at the beginning of the last vector iteration. + auto *RdxResult = find_singleton<VPSingleDefRecipe>( + RedPhiR->users(), [](VPUser *U, bool) -> VPSingleDefRecipe * { + auto *VPI = dyn_cast<VPInstruction>(U); + if (VPI && VPI->getOpcode() == VPInstruction::ComputeReductionResult) + return VPI; + return nullptr; + }); + + auto *MiddleVPBB = Plan.getMiddleBlock(); + Builder.setInsertPoint(MiddleVPBB, MiddleVPBB->begin()); + auto *NewSel = + Builder.createSelect(AnyNaN, RedPhiR, RdxResult->getOperand(1)); + RdxResult->setOperand(1, NewSel); + + auto *ScalarPH = Plan.getScalarPreheader(); + // Update resume phis for inductions in the scalar preheader. If AnyNaN is + // true, the resume from the start of the last vector iteration via the + // canonical IV, otherwise from the original value. + for (auto &R : ScalarPH->phis()) { + auto *ResumeR = cast<VPPhi>(&R); + VPValue *VecV = ResumeR->getOperand(0); + if (VecV == RdxResult) + continue; + if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) { + if (DerivedIV->getNumUsers() == 1 && + DerivedIV->getOperand(1) == &Plan.getVectorTripCount()) { + auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), + &Plan.getVectorTripCount()); + DerivedIV->moveAfter(&*Builder.getInsertPoint()); + DerivedIV->setOperand(1, NewSel); + continue; + } + } + // Bail out and abandon the current, partially modified, VPlan if we + // encounter resume phi that cannot be updated yet. + if (VecV != &Plan.getVectorTripCount()) { + LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with " + "FMaxNum/FMinNum reduction.\n"); + return false; + } + auto *NewSel = Builder.createSelect(AnyNaN, Plan.getCanonicalIV(), VecV); + ResumeR->setOperand(0, NewSel); + } + + auto *MiddleTerm = MiddleVPBB->getTerminator(); + Builder.setInsertPoint(MiddleTerm); + VPValue *MiddleCond = MiddleTerm->getOperand(0); + VPValue *NewCond = Builder.createAnd(MiddleCond, Builder.createNot(AnyNaN)); + MiddleTerm->setOperand(0, NewCond); + return true; +} diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1664bcc3881aa..57b713d3dfcb9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -587,6 +587,7 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this)); return Builder.CreateFreeze(Op, Name); } + case Instruction::FCmp: case Instruction::ICmp: { bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); Value *A = State.get(getOperand(0), OnlyFirstLaneUsed); @@ -860,7 +861,7 @@ Value *VPInstruction::generate(VPTransformState &State) { Value *Res = State.get(getOperand(0)); for (VPValue *Op : drop_begin(operands())) Res = Builder.CreateOr(Res, State.get(Op)); - return Builder.CreateOrReduce(Res); + return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res); } case VPInstruction::FirstActiveLane: { if (getNumOperands() == 1) { @@ -1033,6 +1034,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { switch (getOpcode()) { case Instruction::ExtractElement: case Instruction::Freeze: + case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: case VPInstruction::AnyOf: @@ -1068,6 +1070,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { return Op == getOperand(1); case Instruction::PHI: return true; + case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: case Instruction::Or: @@ -1100,6 +1103,7 @@ bool VPInstruction::onlyFirstPartUsed(const VPValue *Op) const { switch (getOpcode()) { default: return false; + case Instruction::FCmp: case Instruction::ICmp: case Instruction::Select: return vputils::onlyFirstPartUsed(this); @@ -1786,7 +1790,7 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const { return Opcode == Instruction::ZExt; break; case OperationType::Cmp: - return Opcode == Instruction::ICmp; + return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp; case OperationType::Other: return true; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index 870b1bb68b79a..4d1752fe57565 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -99,6 +99,12 @@ struct VPlanTransforms { /// not valid. static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder); + /// Check if \p Plan contains any FMaxNum or FMinNum reductions. If they do, + /// try to update the vector loop to exit early if any input is NaN and resume + /// executing in the scalar loop to handle the NaNs there. Return false if + /// this attempt was unsuccessful. + static bool handleMaxMinNumReductionsWithoutFastMath(VPlan &Plan); + /// Clear NSW/NUW flags from reduction instructions if necessary. static void cl... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/149736 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits