================
@@ -8974,11 +8982,104 @@ void LoopVectorizationPlanner::attachRuntimeChecks(
assert((!CM.OptForSize ||
CM.Hints->getForce() == LoopVectorizeHints::FK_Enabled) &&
"Cannot SCEV check stride or overflow when optimizing for size");
- VPlanTransforms::attachCheckBlock(Plan, SCEVCheckCond, SCEVCheckBlock,
+ VPlanTransforms::attachCheckBlock(Plan, Plan.getOrAddLiveIn(SCEVCheckCond),
+
Plan.createVPIRBasicBlock(SCEVCheckBlock),
HasBranchWeights);
}
const auto &[MemCheckCond, MemCheckBlock] = RTChecks.getMemRuntimeChecks();
if (MemCheckBlock && MemCheckBlock->hasNPredecessors(0)) {
+ VPValue *MemCheckCondVPV = Plan.getOrAddLiveIn(MemCheckCond);
+ VPBasicBlock *MemCheckBlockVP = Plan.createVPIRBasicBlock(MemCheckBlock);
+ std::optional<ArrayRef<PointerDiffInfo>> ChecksOpt =
+ CM.Legal->getRuntimePointerChecking()->getDiffChecks();
+
+ // Create a mask enabling safe elements for each iteration.
+ if (CM.getRTCheckStyle(TTI) == RTCheckStyle::UseSafeEltsMask &&
+ ChecksOpt.has_value() && ChecksOpt->size() > 0) {
+ ArrayRef<PointerDiffInfo> Checks = *ChecksOpt;
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
+ VPBasicBlock *LoopBody = LoopRegion->getEntryBasicBlock();
+ VPBuilder Builder(MemCheckBlockVP);
+
+ /// Create a mask for each possibly-aliasing pointer pair, ANDing them if
+ /// there's more than one pair.
+ VPValue *AliasMask = nullptr;
+ for (PointerDiffInfo Check : Checks) {
+ VPValue *Sink =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, Check.SinkStart);
+ VPValue *Src =
+ vputils::getOrCreateVPValueForSCEVExpr(Plan, Check.SrcStart);
+
+ Type *PtrType = PointerType::getUnqual(Plan.getContext());
+ Sink = Builder.createScalarCast(Instruction::CastOps::IntToPtr, Sink,
+ PtrType, DebugLoc());
+ Src = Builder.createScalarCast(Instruction::CastOps::IntToPtr, Src,
+ PtrType, DebugLoc());
+
+ SmallVector<VPValue *, 3> Ops{
+ Src, Sink,
+ Plan.getConstantInt(IntegerType::getInt64Ty(Plan.getContext()),
+ Check.AccessSize)};
+ VPWidenIntrinsicRecipe *M = new VPWidenIntrinsicRecipe(
+ Check.WriteAfterRead ? Intrinsic::loop_dependence_war_mask
+ : Intrinsic::loop_dependence_raw_mask,
+ Ops, IntegerType::getInt1Ty(Plan.getContext()));
+ MemCheckBlockVP->appendRecipe(M);
+ if (AliasMask)
+ AliasMask = Builder.createAnd(AliasMask, M);
+ else
+ AliasMask = M;
+ }
+ assert(AliasMask && "Expected an alias mask to have been created");
+
+ // Replace uses of the loop body's active lane mask phi with an AND of
the
+ // phi and the alias mask.
+ for (VPRecipeBase &R : *LoopBody) {
+ auto *MaskPhi = dyn_cast<VPActiveLaneMaskPHIRecipe>(&R);
----------------
fhahn wrote:
I don't think we necessarily need an active-lane-mask, as long as either all
recipes that need predication (memory ops, ops that are immediate UB on poison,
reduction/recurrences) are already predicated (could be due to tail-folding
without active-lane-mask) or we could convert them to predicated variants using
the alias mask.
Also, an active-lane-mask also does not necessarily mean all required recipes
are predicated and use the active-lane-mask (e.g. a transform may convert a
masked memory access to an unmasked one, if it is guaranteed dereferneceable
for the whole loop).
So would probably be good to check if all required recipes are masked and make
sure their masks inlcude AliasMask after the transform
https://github.com/llvm/llvm-project/pull/100579
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits