llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Nicolai Hähnle (nhaehnle)

<details>
<summary>Changes</summary>

This change is motivated by the overall goal of finding alternative ways
to promote allocas to VGPRs. The current solution is effectively limited
to allocas whose size matches a register class, and we can't keep adding
more register classes. We have some downstream work in this direction,
and I'm currently looking at cleaning that up to bring it upstream.

This refactor paves the way to adding a third way of promoting allocas,
on top of the existing alloca-to-vector and alloca-to-LDS. Much of the
analysis can be shared between the different promotion techniques.

Additionally, the idea behind splitting the pass into an analysis
phase and a commit phase is that it ought to allow us to more easily make
better "big picture" decision about which allocas to promote how in the
future.

---

**Stack**:
- [5/5] #<!-- -->170512 ⬅
- [4/5] #<!-- -->170511
- [3/5] #<!-- -->170510
- [2/5] #<!-- -->170509
- [1/5] #<!-- -->170508


⚠️ *Part of a stack created by [spr](https://github.com/nhaehnle/spr). Merging 
this PR using the GitHub UI may have unexpected results.*

---

Patch is 47.30 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/170512.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+346-298) 
- (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-negative-index.ll (+2-4) 
- (modified) llvm/test/CodeGen/AMDGPU/promote-alloca-scoring.ll (+34-30) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index efd3664266dee..f431535c722ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -85,6 +85,42 @@ static cl::opt<unsigned>
                             "when sorting profitable allocas"),
                    cl::init(4));
 
+// We support vector indices of the form (A * stride) + B
+// All parts are optional.
+struct GEPToVectorIndex {
+  Value *VarIndex = nullptr;         // defaults to 0
+  ConstantInt *VarMul = nullptr;     // defaults to 1
+  ConstantInt *ConstIndex = nullptr; // defaults to 0
+  Value *Full = nullptr;
+};
+
+struct MemTransferInfo {
+  ConstantInt *SrcIndex = nullptr;
+  ConstantInt *DestIndex = nullptr;
+};
+
+// Analysis for planning the different strategies of alloca promotion.
+struct AllocaAnalysis {
+  AllocaInst *Alloca = nullptr;
+  SmallVector<Value *> Pointers;
+  SmallVector<Use *> Uses;
+  unsigned Score = 0;
+  bool HaveSelectOrPHI = false;
+  struct {
+    FixedVectorType *Ty = nullptr;
+    SmallVector<Instruction *> Worklist;
+    SmallVector<Instruction *> UsersToRemove;
+    MapVector<GetElementPtrInst *, GEPToVectorIndex> GEPVectorIdx;
+    MapVector<MemTransferInst *, MemTransferInfo> TransferInfo;
+  } Vector;
+  struct {
+    bool Enable = false;
+    SmallVector<User *> Worklist;
+  } LDS;
+
+  explicit AllocaAnalysis(AllocaInst *Alloca) : Alloca(Alloca) {}
+};
+
 // Shared implementation which can do both promotion to vector and to LDS.
 class AMDGPUPromoteAllocaImpl {
 private:
@@ -106,10 +142,7 @@ class AMDGPUPromoteAllocaImpl {
   std::pair<Value *, Value *> getLocalSizeYZ(IRBuilder<> &Builder);
   Value *getWorkitemID(IRBuilder<> &Builder, unsigned N);
 
-  /// BaseAlloca is the alloca root the search started from.
-  /// Val may be that alloca or a recursive user of it.
-  bool collectUsesWithPtrTypes(Value *BaseAlloca, Value *Val,
-                               std::vector<Value *> &WorkList) const;
+  bool collectAllocaUses(AllocaAnalysis &AA) const;
 
   /// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
   /// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
@@ -123,10 +156,12 @@ class AMDGPUPromoteAllocaImpl {
   bool hasSufficientLocalMem(const Function &F);
 
   FixedVectorType *getVectorTypeForAlloca(Type *AllocaTy) const;
-  bool tryPromoteAllocaToVector(AllocaInst &I);
-  bool tryPromoteAllocaToLDS(AllocaInst &I, bool SufficientLDS);
+  void analyzePromoteToVector(AllocaAnalysis &AA) const;
+  void promoteAllocaToVector(AllocaAnalysis &AA);
+  void analyzePromoteToLDS(AllocaAnalysis &AA) const;
+  bool tryPromoteAllocaToLDS(AllocaAnalysis &AA, bool SufficientLDS);
 
-  void sortAllocasToPromote(SmallVectorImpl<AllocaInst *> &Allocas);
+  void scoreAlloca(AllocaAnalysis &AA) const;
 
   void setFunctionLimits(const Function &F);
 
@@ -237,53 +272,77 @@ FunctionPass *llvm::createAMDGPUPromoteAlloca() {
   return new AMDGPUPromoteAlloca();
 }
 
-static void collectAllocaUses(AllocaInst &Alloca,
-                              SmallVectorImpl<Use *> &Uses) {
-  SmallVector<Instruction *, 4> WorkList({&Alloca});
+bool AMDGPUPromoteAllocaImpl::collectAllocaUses(AllocaAnalysis &AA) const {
+  const auto RejectUser = [&](Instruction *Inst, Twine Msg) {
+    LLVM_DEBUG(dbgs() << "  Cannot promote alloca: " << Msg << "\n"
+                      << "    " << *Inst << "\n");
+    return false;
+  };
+
+  SmallVector<Instruction *, 4> WorkList({AA.Alloca});
   while (!WorkList.empty()) {
     auto *Cur = WorkList.pop_back_val();
+    if (find(AA.Pointers, Cur) != AA.Pointers.end())
+      continue;
+    AA.Pointers.push_back(Cur);
     for (auto &U : Cur->uses()) {
-      Uses.push_back(&U);
+      auto *Inst = cast<Instruction>(U.getUser());
+      if (isa<StoreInst>(Inst)) {
+        if (U.getOperandNo() != StoreInst::getPointerOperandIndex()) {
+          return RejectUser(Inst, "pointer escapes via store");
+        }
+      }
+      AA.Uses.push_back(&U);
 
-      if (isa<GetElementPtrInst>(U.getUser()))
-        WorkList.push_back(cast<Instruction>(U.getUser()));
+      if (isa<GetElementPtrInst>(U.getUser())) {
+        WorkList.push_back(Inst);
+      } else if (auto *SI = dyn_cast<SelectInst>(Inst)) {
+        // Only promote a select if we know that the other select operand is
+        // from another pointer that will also be promoted.
+        if (!binaryOpIsDerivedFromSameAlloca(AA.Alloca, Cur, SI, 1, 2))
+          return RejectUser(Inst, "select from mixed objects");
+        WorkList.push_back(Inst);
+        AA.HaveSelectOrPHI = true;
+      } else if (auto *Phi = dyn_cast<PHINode>(Inst)) {
+        // Repeat for phis.
+
+        // TODO: Handle more complex cases. We should be able to replace loops
+        // over arrays.
+        switch (Phi->getNumIncomingValues()) {
+        case 1:
+          break;
+        case 2:
+          if (!binaryOpIsDerivedFromSameAlloca(AA.Alloca, Cur, Phi, 0, 1))
+            return RejectUser(Inst, "phi from mixed objects");
+          break;
+        default:
+          return RejectUser(Inst, "phi with too many operands");
+        }
+
+        WorkList.push_back(Inst);
+        AA.HaveSelectOrPHI = true;
+      }
     }
   }
+  return true;
 }
 
-void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
-    SmallVectorImpl<AllocaInst *> &Allocas) {
-  DenseMap<AllocaInst *, unsigned> Scores;
-
-  for (auto *Alloca : Allocas) {
-    LLVM_DEBUG(dbgs() << "Scoring: " << *Alloca << "\n");
-    unsigned &Score = Scores[Alloca];
-    // Increment score by one for each user + a bonus for users within loops.
-    SmallVector<Use *, 8> Uses;
-    collectAllocaUses(*Alloca, Uses);
-    for (auto *U : Uses) {
-      Instruction *Inst = cast<Instruction>(U->getUser());
-      if (isa<GetElementPtrInst>(Inst))
-        continue;
-      unsigned UserScore =
-          1 + (LoopUserWeight * LI.getLoopDepth(Inst->getParent()));
-      LLVM_DEBUG(dbgs() << "  [+" << UserScore << "]:\t" << *Inst << "\n");
-      Score += UserScore;
-    }
-    LLVM_DEBUG(dbgs() << "  => Final Score:" << Score << "\n");
+void AMDGPUPromoteAllocaImpl::scoreAlloca(AllocaAnalysis &AA) const {
+  LLVM_DEBUG(dbgs() << "Scoring: " << *AA.Alloca << "\n");
+  unsigned Score = 0;
+  // Increment score by one for each user + a bonus for users within loops.
+  for (auto *U : AA.Uses) {
+    Instruction *Inst = cast<Instruction>(U->getUser());
+    if (isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
+        isa<PHINode>(Inst))
+      continue;
+    unsigned UserScore =
+        1 + (LoopUserWeight * LI.getLoopDepth(Inst->getParent()));
+    LLVM_DEBUG(dbgs() << "  [+" << UserScore << "]:\t" << *Inst << "\n");
+    Score += UserScore;
   }
-
-  stable_sort(Allocas, [&](AllocaInst *A, AllocaInst *B) {
-    return Scores.at(A) > Scores.at(B);
-  });
-
-  // clang-format off
-  LLVM_DEBUG(
-    dbgs() << "Sorted Worklist:\n";
-    for (auto *A: Allocas)
-      dbgs() << "  " << *A << "\n";
-  );
-  // clang-format on
+  LLVM_DEBUG(dbgs() << "  => Final Score:" << Score << "\n");
+  AA.Score = Score;
 }
 
 void AMDGPUPromoteAllocaImpl::setFunctionLimits(const Function &F) {
@@ -320,27 +379,48 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F, bool 
PromoteToLDS) {
                                   : (MaxVGPRs * 32)) /
       VGPRBudgetRatio;
 
-  SmallVector<AllocaInst *, 16> Allocas;
+  std::vector<AllocaAnalysis> Allocas;
   for (Instruction &I : F.getEntryBlock()) {
     if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
       // Array allocations are probably not worth handling, since an allocation
       // of the array type is the canonical form.
       if (!AI->isStaticAlloca() || AI->isArrayAllocation())
         continue;
-      Allocas.push_back(AI);
+
+      LLVM_DEBUG(dbgs() << "Analyzing: " << *AI << '\n');
+
+      AllocaAnalysis AA{AI};
+      if (collectAllocaUses(AA)) {
+        analyzePromoteToVector(AA);
+        if (PromoteToLDS)
+          analyzePromoteToLDS(AA);
+        if (AA.Vector.Ty || AA.LDS.Enable) {
+          scoreAlloca(AA);
+          Allocas.push_back(std::move(AA));
+        }
+      }
     }
   }
 
-  sortAllocasToPromote(Allocas);
+  stable_sort(Allocas,
+              [](const auto &A, const auto &B) { return A.Score > B.Score; });
+
+  // clang-format off
+  LLVM_DEBUG(
+    dbgs() << "Sorted Worklist:\n";
+    for (const auto &AA : Allocas)
+      dbgs() << "  " << *AA.Alloca << "\n";
+  );
+  // clang-format on
 
   bool Changed = false;
-  for (AllocaInst *AI : Allocas) {
-    const unsigned AllocaCost = DL->getTypeSizeInBits(AI->getAllocatedType());
-    // First, check if we have enough budget to vectorize this alloca.
-    if (AllocaCost <= VectorizationBudget) {
-      // If we do, attempt vectorization, otherwise, fall through and try
-      // promoting to LDS instead.
-      if (tryPromoteAllocaToVector(*AI)) {
+  for (AllocaAnalysis &AA : Allocas) {
+    if (AA.Vector.Ty) {
+      const unsigned AllocaCost =
+          DL->getTypeSizeInBits(AA.Alloca->getAllocatedType());
+      // First, check if we have enough budget to vectorize this alloca.
+      if (AllocaCost <= VectorizationBudget) {
+        promoteAllocaToVector(AA);
         Changed = true;
         assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
                "Underflow!");
@@ -348,14 +428,14 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F, bool 
PromoteToLDS) {
         LLVM_DEBUG(dbgs() << "  Remaining vectorization budget:"
                           << VectorizationBudget << "\n");
         continue;
+      } else {
+        LLVM_DEBUG(dbgs() << "Alloca too big for vectorization (size:"
+                          << AllocaCost << ", budget:" << VectorizationBudget
+                          << "): " << *AA.Alloca << "\n");
       }
-    } else {
-      LLVM_DEBUG(dbgs() << "Alloca too big for vectorization (size:"
-                        << AllocaCost << ", budget:" << VectorizationBudget
-                        << "): " << *AI << "\n");
     }
 
-    if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
+    if (AA.LDS.Enable && tryPromoteAllocaToLDS(AA, SufficientLDS))
       Changed = true;
   }
 
@@ -366,11 +446,6 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F, bool 
PromoteToLDS) {
   return Changed;
 }
 
-struct MemTransferInfo {
-  ConstantInt *SrcIndex = nullptr;
-  ConstantInt *DestIndex = nullptr;
-};
-
 // Checks if the instruction I is a memset user of the alloca AI that we can
 // deal with. Currently, only non-volatile memsets that affect the whole alloca
 // are handled.
@@ -388,23 +463,49 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst 
*AI,
          match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }
 
-static Value *calculateVectorIndex(
-    Value *Ptr, const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
-  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
-  if (!GEP)
-    return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+static Value *calculateVectorIndex(Value *Ptr, AllocaAnalysis &AA) {
+  IRBuilder<> B(Ptr->getContext());
+
+  Ptr = Ptr->stripPointerCasts();
+  if (Ptr == AA.Alloca)
+    return B.getInt32(0);
+
+  auto *GEP = cast<GetElementPtrInst>(Ptr);
+  auto I = AA.Vector.GEPVectorIdx.find(GEP);
+  assert(I != AA.Vector.GEPVectorIdx.end() && "Must have entry for GEP!");
+
+  if (!I->second.Full) {
+    Value *Result = nullptr;
+    B.SetInsertPoint(GEP);
 
-  auto I = GEPIdx.find(GEP);
-  assert(I != GEPIdx.end() && "Must have entry for GEP!");
+    if (I->second.VarIndex) {
+      Result = I->second.VarIndex;
+      Result = B.CreateSExtOrTrunc(Result, B.getInt32Ty());
 
-  Value *IndexValue = I->second;
-  assert(IndexValue && "index value missing from GEP index map");
-  return IndexValue;
+      if (I->second.VarMul)
+        Result = B.CreateMul(Result, I->second.VarMul);
+    }
+
+    if (I->second.ConstIndex) {
+      if (Result) {
+        Result = B.CreateAdd(Result, I->second.ConstIndex);
+      } else {
+        Result = I->second.ConstIndex;
+      }
+    }
+
+    if (!Result)
+      Result = B.getInt32(0);
+
+    I->second.Full = Result;
+  }
+
+  return I->second.Full;
 }
 
-static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
-                               Type *VecElemTy, const DataLayout &DL,
-                               SmallVector<Instruction *> &NewInsts) {
+static std::optional<GEPToVectorIndex>
+computeGEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
+                        Type *VecElemTy, const DataLayout &DL) {
   // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
   // helper.
   LLVMContext &Ctx = GEP->getContext();
@@ -432,7 +533,7 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, 
AllocaInst *Alloca,
   Value *CurPtr = GEP;
   while (auto *CurGEP = dyn_cast<GetElementPtrInst>(CurPtr)) {
     if (!CurGEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
-      return nullptr;
+      return {};
 
     // Move to the next outer pointer.
     CurPtr = CurGEP->getPointerOperand();
@@ -442,69 +543,58 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, 
AllocaInst *Alloca,
 
   int64_t VecElemSize = DL.getTypeAllocSize(VecElemTy);
   if (VarOffsets.size() > 1)
-    return nullptr;
+    return {};
 
   APInt IndexQuot;
   int64_t Rem;
   APInt::sdivrem(ConstOffset, VecElemSize, IndexQuot, Rem);
   if (Rem != 0)
-    return nullptr;
-  if (VarOffsets.size() == 0)
-    return ConstantInt::get(Ctx, IndexQuot);
+    return {};
+
+  GEPToVectorIndex Result;
+
+  if (!ConstOffset.isZero())
+    Result.ConstIndex = ConstantInt::get(Ctx, IndexQuot.sextOrTrunc(BW));
 
-  IRBuilder<> Builder(GEP);
+  if (VarOffsets.empty())
+    return Result;
 
   const auto &VarOffset = VarOffsets.front();
   APInt OffsetQuot;
   APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
   if (Rem != 0 || OffsetQuot.isZero())
-    return nullptr;
-
-  Value *Offset = VarOffset.first;
-  if (!isa<IntegerType>(Offset->getType()))
-    return nullptr;
+    return {};
 
-  Offset = Builder.CreateSExtOrTrunc(Offset, Builder.getIntNTy(BW));
-  if (Offset != VarOffset.first)
-    NewInsts.push_back(cast<Instruction>(Offset));
+  Result.VarIndex = VarOffset.first;
+  auto *OffsetType = dyn_cast<IntegerType>(Result.VarIndex->getType());
+  if (!OffsetType)
+    return {};
 
   if (!OffsetQuot.isOne()) {
-    ConstantInt *ConstMul = ConstantInt::get(Ctx, OffsetQuot.sextOrTrunc(BW));
-    Offset = Builder.CreateMul(Offset, ConstMul);
-    if (Instruction *NewInst = dyn_cast<Instruction>(Offset))
-      NewInsts.push_back(NewInst);
+    Result.VarMul = ConstantInt::get(Ctx, OffsetQuot.sextOrTrunc(BW));
   }
-  if (ConstOffset.isZero())
-    return Offset;
-
-  ConstantInt *ConstIndex = ConstantInt::get(Ctx, IndexQuot.sextOrTrunc(BW));
-  Value *IndexAdd = Builder.CreateAdd(Offset, ConstIndex);
-  if (Instruction *NewInst = dyn_cast<Instruction>(IndexAdd))
-    NewInsts.push_back(NewInst);
-  return IndexAdd;
+
+  return Result;
 }
 
 /// Promotes a single user of the alloca to a vector form.
 ///
 /// \param Inst           Instruction to be promoted.
 /// \param DL             Module Data Layout.
-/// \param VectorTy       Vectorized Type.
+/// \param AA             Alloca Analysis.
 /// \param VecStoreSize   Size of \p VectorTy in bytes.
 /// \param ElementSize    Size of \p VectorTy element type in bytes.
-/// \param TransferInfo   MemTransferInst info map.
-/// \param GEPVectorIdx   GEP -> VectorIdx cache.
 /// \param CurVal         Current value of the vector (e.g. last stored value)
 /// \param[out]  DeferredLoads \p Inst is added to this vector if it can't
 ///              be promoted now. This happens when promoting requires \p
 ///              CurVal, but \p CurVal is nullptr.
 /// \return the stored value if \p Inst would have written to the alloca, or
 ///         nullptr otherwise.
-static Value *promoteAllocaUserToVector(
-    Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
-    unsigned VecStoreSize, unsigned ElementSize,
-    DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
-    std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
-    function_ref<Value *()> GetCurVal) {
+static Value *promoteAllocaUserToVector(Instruction *Inst, const DataLayout 
&DL,
+                                        AllocaAnalysis &AA,
+                                        unsigned VecStoreSize,
+                                        unsigned ElementSize,
+                                        function_ref<Value *()> GetCurVal) {
   // Note: we use InstSimplifyFolder because it can leverage the DataLayout
   // to do more folding, especially in the case of vector splats.
   IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
@@ -526,13 +616,13 @@ static Value *promoteAllocaUserToVector(
         Val, FixedVectorType::get(EltTy, NumPtrElts));
   };
 
-  Type *VecEltTy = VectorTy->getElementType();
+  Type *VecEltTy = AA.Vector.Ty->getElementType();
 
   switch (Inst->getOpcode()) {
   case Instruction::Load: {
     Value *CurVal = GetCurVal();
-    Value *Index = calculateVectorIndex(
-        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+    Value *Index =
+        calculateVectorIndex(cast<LoadInst>(Inst)->getPointerOperand(), AA);
 
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
@@ -588,7 +678,7 @@ static Value *promoteAllocaUserToVector(
     // to know the current value. If this is a store of a single element, we
     // need to know the value.
     StoreInst *SI = cast<StoreInst>(Inst);
-    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+    Value *Index = calculateVectorIndex(SI->getPointerOperand(), AA);
     Value *Val = SI->getValueOperand();
 
     // We're storing the full vector, we can handle this without knowing 
CurVal.
@@ -598,9 +688,9 @@ static Value *promoteAllocaUserToVector(
       if (CI->isZeroValue() && AccessSize == VecStoreSize) {
         if (AccessTy->isPtrOrPtrVectorTy())
           Val = CreateTempPtrIntCast(Val, AccessTy);
-        else if (VectorTy->isPtrOrPtrVectorTy())
-          Val = CreateTempPtrIntCast(Val, VectorTy);
-        return Builder.CreateBitOrPointerCast(Val, VectorTy);
+        else if (AA.Vector.Ty->isPtrOrPtrVectorTy())
+          Val = CreateTempPtrIntCast(Val, AA.Vector.Ty);
+        return Builder.CreateBitOrPointerCast(Val, AA.Vector.Ty);
       }
     }
 
@@ -609,7 +699,7 @@ static Value *promoteAllocaUserToVector(
       assert(AccessSize.isKnownMultipleOf(DL.getTypeStoreSize(VecEltTy)));
       const unsigned NumWrittenElts =
           AccessSize / DL.getTypeStoreSize(VecEltTy);
-      const unsigned NumVecElts = VectorTy->getNumElements();
+      const unsigned NumVecElts = AA.Vector.Ty->getNumElements();
       auto *SubVecTy = FixedVectorType::get(VecEltTy, NumWrittenElts);
       assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
 
@@ -640,14 +730,14 @@ static Value *promoteAllocaUserToVector(
       // For memcpy, we need to know curval.
       ConstantInt *Length = cast<ConstantInt>(MTI->getLength());
       unsigned NumCopied = Length->getZExtValue() / ElementSize;
-      MemTransferInfo *TI = &TransferInfo[MTI];
+      MemTransferInfo *TI = &AA.Vector.TransferInfo[MTI];
       unsigned SrcBegin = TI->SrcIndex->getZExtValue();
       unsigned DestBegin = TI->DestIndex->getZExtValue();
 
       SmallVector<int> Mask;
-      for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {
+      for (unsigned Idx = 0; Idx < AA.Vector.Ty->getNumElements(); ++Idx) {
         if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
-          Mask.push_back(SrcBegin < VectorTy->getNumElements()
+          Mask.push_back(SrcBegin < AA.Vector.Ty->getNumElements()
                              ? SrcBegin++
                              : PoisonMaskElem);
         } else {
@@ -676,14 +766,14 @@ static Value *promoteAllocaUserToVector(
           Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
       }
 
-      return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);
+      return Builder.CreateVectorSplat(AA.Vector.Ty->getElementCount(), Elt);
     }
 
     if (auto *Intr = dyn_cast<IntrinsicInst>(In...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/170512
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to