https://github.com/nhaehnle created https://github.com/llvm/llvm-project/pull/170510
The second pass of promotion to vector can be quite simple. Reflect that simplicity in the code for better maintainability. --- **Stack**: - [2/2] #170509 - [1/2] #170508 ⚠️ *Part of a stack created by [spr](https://github.com/nhaehnle/spr). Merging this PR using the GitHub UI may have unexpected results.* From c0f787395663d05afb340e191ab497f569bfda14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20H=C3=A4hnle?= <[email protected]> Date: Tue, 4 Nov 2025 21:17:14 -0800 Subject: [PATCH] AMDGPU/PromoteAlloca: Simplify how deferred loads work The second pass of promotion to vector can be quite simple. Reflect that simplicity in the code for better maintainability. commit-id:cbc1e9ae --- .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 80 ++++++++----------- 1 file changed, 34 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 77db14513254f..73ec607014d31 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -502,27 +502,14 @@ static Value *promoteAllocaUserToVector( Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy, unsigned VecStoreSize, unsigned ElementSize, DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo, - std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal, - SmallVectorImpl<LoadInst *> &DeferredLoads) { + std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, + function_ref<Value *()> GetCurVal) { // Note: we use InstSimplifyFolder because it can leverage the DataLayout // to do more folding, especially in the case of vector splats. IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(), InstSimplifyFolder(DL)); Builder.SetInsertPoint(Inst); - const auto GetOrLoadCurrentVectorValue = [&]() -> Value * { - if (CurVal) - return CurVal; - - // If the current value is not known, insert a dummy load and lower it on - // the second pass. - LoadInst *Dummy = - Builder.CreateLoad(VectorTy, PoisonValue::get(Builder.getPtrTy()), - "promotealloca.dummyload"); - DeferredLoads.push_back(Dummy); - return Dummy; - }; - const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val, Type *PtrTy) -> Value * { assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy)); @@ -542,12 +529,7 @@ static Value *promoteAllocaUserToVector( switch (Inst->getOpcode()) { case Instruction::Load: { - // Loads can only be lowered if the value is known. - if (!CurVal) { - DeferredLoads.push_back(cast<LoadInst>(Inst)); - return nullptr; - } - + Value *CurVal = GetCurVal(); Value *Index = calculateVectorIndex( cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx); @@ -637,7 +619,7 @@ static Value *promoteAllocaUserToVector( Val = Builder.CreateBitOrPointerCast(Val, SubVecTy); - Value *CurVec = GetOrLoadCurrentVectorValue(); + Value *CurVec = GetCurVal(); for (unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts); K < NumElts; ++K) { Value *CurIdx = @@ -650,8 +632,7 @@ static Value *promoteAllocaUserToVector( if (Val->getType() != VecEltTy) Val = Builder.CreateBitOrPointerCast(Val, VecEltTy); - return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val, - Index); + return Builder.CreateInsertElement(GetCurVal(), Val, Index); } case Instruction::Call: { if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) { @@ -673,7 +654,7 @@ static Value *promoteAllocaUserToVector( } } - return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask); + return Builder.CreateShuffleVector(GetCurVal(), Mask); } if (auto *MSI = dyn_cast<MemSetInst>(Inst)) { @@ -1038,37 +1019,44 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) { Updater.AddAvailableValue(EntryBB, AllocaInitValue); - // First handle the initial worklist. - SmallVector<LoadInst *, 4> DeferredLoads; + // First handle the initial worklist, in basic block order. + // + // Insert a placeholder whenever we need the vector value at the top of a + // basic block. + SmallVector<Instruction *> Placeholders; forEachWorkListItem(WorkList, [&](Instruction *I) { BasicBlock *BB = I->getParent(); - // On the first pass, we only take values that are trivially known, i.e. - // where AddAvailableValue was already called in this block. - Value *Result = promoteAllocaUserToVector( - I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx, - Updater.FindValueForBlock(BB), DeferredLoads); + auto GetCurVal = [&]() -> Value * { + if (Value *CurVal = Updater.FindValueForBlock(BB)) + return CurVal; + + // If the current value in the basic block is not yet known, insert a + // placeholder that we will replace later. + IRBuilder<> Builder(I); + auto *Placeholder = cast<Instruction>(Builder.CreateFreeze( + PoisonValue::get(VectorTy), "promotealloca.placeholder")); + Placeholders.push_back(Placeholder); + Updater.AddAvailableValue(BB, Placeholder); + return Placeholder; + }; + + Value *Result = + promoteAllocaUserToVector(I, *DL, VectorTy, VecStoreSize, ElementSize, + TransferInfo, GEPVectorIdx, GetCurVal); if (Result) Updater.AddAvailableValue(BB, Result); }); - // Then handle deferred loads. - forEachWorkListItem(DeferredLoads, [&](Instruction *I) { - SmallVector<LoadInst *, 0> NewDLs; - BasicBlock *BB = I->getParent(); - // On the second pass, we use GetValueInMiddleOfBlock to guarantee we always - // get a value, inserting PHIs as needed. - Value *Result = promoteAllocaUserToVector( - I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx, - Updater.GetValueInMiddleOfBlock(I->getParent()), NewDLs); - if (Result) - Updater.AddAvailableValue(BB, Result); - assert(NewDLs.empty() && "No more deferred loads should be queued!"); - }); + // Now fixup the placeholders. + for (Instruction *Placeholder : Placeholders) { + Placeholder->replaceAllUsesWith( + Updater.GetValueInMiddleOfBlock(Placeholder->getParent())); + Placeholder->eraseFromParent(); + } // Delete all instructions. On the first pass, new dummy loads may have been // added so we need to collect them too. DenseSet<Instruction *> InstsToDelete(WorkList.begin(), WorkList.end()); - InstsToDelete.insert_range(DeferredLoads); for (Instruction *I : InstsToDelete) { assert(I->use_empty()); I->eraseFromParent(); _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
