LGTM, pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Ruiling Song > Sent: Thursday, July 13, 2017 10:45 > To: beignet@lists.freedesktop.org > Cc: Song, Ruiling <ruiling.s...@intel.com>; Wang, Rander > <rander.w...@intel.com> > Subject: [Beignet] [PATCH V5] backend: refine load store optimization > > this fix basic test in conformance tests failed for vec8 of char because of > overflow. And it fix many test items failed in opencv because of offset error > > (1)modify the size of searchInsnArray to 32, it is the max size for char > And add check for overflow if too many insn (2)Make sure the start insn is > the first insn of searched array > because if it is not the first, the offset maybe invalid. And > it is complex to modify offset without error > > V2: refine search index, using J not I > V3: remove (2), now add offset to the pointer of start > pass OpenCV, conformance basic and compiler tests, utests > V4: check pointer type, if 64bit, modify it by 64, or 32 > V5: refine findSafeInstruction() and variable naming in > findConsecutiveAccess(). > > Signed-off-by: rander.wang <rander.w...@intel.com> > Signed-off-by: Ruiling Song <ruiling.s...@intel.com> > --- > backend/src/llvm/llvm_loadstore_optimization.cpp | 125 > ++++++++++++++++------- > 1 file changed, 88 insertions(+), 37 deletions(-) > > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp > b/backend/src/llvm/llvm_loadstore_optimization.cpp > index c91c1a0..bb8dc5f 100644 > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp > @@ -68,13 +68,14 @@ namespace gbe { > bool optimizeLoadStore(BasicBlock &BB); > > bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int* > elementSize, int maxVecSize); > - void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged); > - void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged); > + void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged, Instruction *start,int offset); > + void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged, Instruction *start,int offset); > bool findConsecutiveAccess(BasicBlock &BB, > SmallVector<Instruction*, 16> &merged, > const BasicBlock::iterator &start, > unsigned maxVecSize, > - bool isLoad); > + bool isLoad, > + int *addrOffset); > #if LLVM_VERSION_MAJOR * 10 + LLVM_VERSION_MINOR >= 40 > virtual StringRef getPassName() const #else @@ -143,7 +144,10 @@ > namespace gbe { > return (abs(-offset) < sz*maxVecSize); > } > > - void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, > SmallVector<Instruction*, 16> &merged) { > + void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, > + SmallVector<Instruction*, 16> > &merged, > + Instruction *start, > + int offset) { > IRBuilder<> Builder(&BB); > > unsigned size = merged.size(); > @@ -151,14 +155,27 @@ namespace gbe { > for(unsigned i = 0; i < size; i++) { > values.push_back(merged[i]); > } > - LoadInst *ld = cast<LoadInst>(merged[0]); > + LoadInst *ld = cast<LoadInst>(start); > unsigned align = ld->getAlignment(); > unsigned addrSpace = ld->getPointerAddressSpace(); > // insert before first load > Builder.SetInsertPoint(ld); > + > + //modify offset > + Value *newPtr = ld->getPointerOperand(); > + if(offset != 0) > + { > + Type *ptype = ld->getPointerOperand()->getType(); > + unsigned typeSize = TD->getPointerTypeSize(ptype); > + ptype = (typeSize == 4) ? Builder.getInt32Ty():Builder.getInt64Ty(); > + Value *StartAddr = Builder.CreatePtrToInt(ld->getPointerOperand(), > ptype); > + Value *offsetVal = ConstantInt::get(ptype, offset); > + Value *newAddr = Builder.CreateAdd(StartAddr, offsetVal); > + newPtr = Builder.CreateIntToPtr(newAddr, ld->getPointerOperand()- > >getType()); > + } > + > VectorType *vecTy = VectorType::get(ld->getType(), size); > - Value *vecPtr = Builder.CreateBitCast(ld->getPointerOperand(), > - PointerType::get(vecTy, addrSpace)); > + Value *vecPtr = Builder.CreateBitCast(newPtr, > + PointerType::get(vecTy, addrSpace)); > LoadInst *vecValue = Builder.CreateLoad(vecPtr); > vecValue->setAlignment(align); > > @@ -196,8 +213,8 @@ namespace gbe { > SmallVector<Instruction*, 16> &merged, > const BasicBlock::iterator &start, > unsigned maxVecSize, > - bool isLoad) { > - > + bool isLoad, > + int *addrOffset) { > if(!isSimpleLoadStore(&*start)) return false; > > unsigned targetAddrSpace = getAddressSpace(&*start); @@ -207,35 > +224,40 @@ namespace gbe { > ++J; > > unsigned maxLimit = maxVecSize * 8; > - bool reordered = false; > - > + bool crossAddressSpace = false; > + // When we are merging loads and there are some other AddressSpace > stores > + // lies among them, we are saying that loadStoreReorder happens. The > same > + // for merging stores and there are some other AddressSpace load among > them. > + bool loadStoreReorder = false; > bool ready = false; > int elementSize; > > - SmallVector<mergedInfo *, 16> searchInsnArray; > - mergedInfo meInfoArray[16]; > + SmallVector<mergedInfo *, 32> searchInsnArray; > + mergedInfo meInfoArray[32]; > int indx = 0; > meInfoArray[indx++].init(&*start, 0); > - > searchInsnArray.push_back(&meInfoArray[0]); > - BasicBlock::iterator I = start; > - ++I; > > - for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) { > - if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) { > + for(unsigned ss = 0; J!= E && ss <= maxLimit; ++ss, ++J) { > + if((isLoad && isa<LoadInst>(*J)) || (!isLoad && > + isa<StoreInst>(*J))) { > int distance; > - if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, &distance, > &elementSize, maxVecSize)) > + if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*J, > + &distance, &elementSize, maxVecSize)) > { > - meInfoArray[indx].init(&*I, distance); > + meInfoArray[indx].init(&*J, distance); > searchInsnArray.push_back(&meInfoArray[indx]); > indx++; > + if (crossAddressSpace) > + loadStoreReorder = true; > + > + if(indx >= 32) > + break; > } > } else if((isLoad && isa<StoreInst>(*J))) { > // simple stop to keep read/write order > StoreInst *st = cast<StoreInst>(&*J); > unsigned addrSpace = st->getPointerAddressSpace(); > if (addrSpace != targetAddrSpace) { > - reordered = true; > + crossAddressSpace = true; > } else { > break; > } > @@ -243,15 +265,14 @@ namespace gbe { > LoadInst *ld = cast<LoadInst>(&*J); > unsigned addrSpace = ld->getPointerAddressSpace(); > if (addrSpace != targetAddrSpace) { > - reordered = true; > + crossAddressSpace = true; > } else { > break; > } > } > - > - if(merged.size() >= maxVecSize) break; > } > > + > if(indx > 1) > { > //try to sort the load/store by the offset from the start @@ -275,18 > +296,29 @@ namespace gbe { > > if(j > 0 && ready) > { > - for(unsigned k = 0; k < j+1; k++) > + unsigned endIndx = j + 1; > + *addrOffset = searchInsnArray[i]->mOffset; > + endIndx = (endIndx >= 16) ? 16 : (endIndx >= 8 ? 8 : (endIndx > + >= 4 ? 4 : endIndx)); > + > + for(unsigned k = 0; k < endIndx; k++) > + { > merged.push_back(searchInsnArray[i+k]->mInsn); > + if (k >= maxVecSize) > + break; > + } > > break; > } > } > } > > - return reordered; > + return loadStoreReorder; > } > > - void GenLoadStoreOptimization::mergeStore(BasicBlock &BB, > SmallVector<Instruction*, 16> &merged) { > + void GenLoadStoreOptimization::mergeStore(BasicBlock &BB, > + SmallVector<Instruction*, 16> > &merged, > + Instruction *start, > + int offset) { > IRBuilder<> Builder(&BB); > > unsigned size = merged.size(); > @@ -294,7 +326,7 @@ namespace gbe { > for(unsigned i = 0; i < size; i++) { > values.push_back(cast<StoreInst>(merged[i])->getValueOperand()); > } > - StoreInst *st = cast<StoreInst>(merged[0]); > + StoreInst *st = cast<StoreInst>(start); > if(!st) > return; > > @@ -314,12 +346,26 @@ namespace gbe { > Value * stPointer = st->getPointerOperand(); > if(!stPointer) > return; > - Value *newPtr = Builder.CreateBitCast(stPointer, PointerType::get(vecTy, > addrSpace)); > + > + //modify offset > + Value *newSPtr = stPointer; > + if(offset != 0) > + { > + unsigned typeSize = TD->getPointerTypeSize(stPointer->getType()); > + Type *ptype = (typeSize == 4) ? Builder.getInt32Ty() : > Builder.getInt64Ty(); > + Value *StartAddr = Builder.CreatePtrToInt(stPointer, ptype); > + Value *offsetVal = ConstantInt::get(ptype, offset); > + Value *newAddr = Builder.CreateAdd(StartAddr, offsetVal); > + newSPtr = Builder.CreateIntToPtr(newAddr, stPointer->getType()); > + } > + > + Value *newPtr = Builder.CreateBitCast(newSPtr, > + PointerType::get(vecTy, addrSpace)); > StoreInst *newST = Builder.CreateStore(parent, newPtr); > newST->setAlignment(align); > } > > - // Find the safe iterator we can point to. If reorder happens, we need to > + // Find the safe iterator (will not be deleted after the merge) we > + can // point to. If reorder happens, we need to > // point to the instruction after the first of toBeDeleted. If no reorder, > // we are safe to point to the instruction after the last of toBeDeleted > static BasicBlock::iterator > @@ -329,12 +375,15 @@ namespace gbe { > BasicBlock::iterator safe = current; > unsigned size = toBeDeleted.size(); > if (reorder) { > - unsigned i = 0; > - while (i < size && toBeDeleted[i] == &*safe) { > - ++i; > - ++safe; > + BasicBlock *BB = &*current->getParent(); > + for (; safe != BB->end(); ++safe) { > + if (std::find(toBeDeleted.begin(), toBeDeleted.end(), &*safe) == > + toBeDeleted.end()) > + break; > } > } else { > + // TODO we should use the furthest instruction, so that the outer loop > + // ends quicker. > safe = BasicBlock::iterator(toBeDeleted[size - 1]); > ++safe; > } > @@ -355,12 +404,14 @@ namespace gbe { > ((ty->isIntegerTy(8) || ty->isIntegerTy(16)) && isLoad))) > continue; > > + int addrOffset = 0; > unsigned maxVecSize = (ty->isFloatTy() || ty->isIntegerTy(32)) ? 4 : > (ty->isIntegerTy(16) ? 8 : 16); > - bool reorder = findConsecutiveAccess(BB, merged, BBI, maxVecSize, > isLoad); > + bool reorder = findConsecutiveAccess(BB, merged, BBI, > + maxVecSize, isLoad, &addrOffset); > uint32_t size = merged.size(); > uint32_t pos = 0; > bool doDeleting = size > 1; > + BasicBlock::iterator startLS = BBI; > if (doDeleting) { > // choose next undeleted instruction > BBI = findSafeInstruction(merged, BBI, reorder); @@ -372,9 +423,9 > @@ > namespace gbe { > (size >= 4 ? 4 : size)); > SmallVector<Instruction*, 16> mergedVec(merged.begin() + pos, > merged.begin() + pos + vecSize); > if(isLoad) > - mergeLoad(BB, mergedVec); > + mergeLoad(BB, mergedVec, &*startLS, addrOffset); > else > - mergeStore(BB, mergedVec); > + mergeStore(BB, mergedVec, &*startLS, addrOffset); > // remove merged insn > for(uint32_t i = 0; i < mergedVec.size(); i++) > mergedVec[i]->eraseFromParent(); > -- > 2.4.1 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet