Pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Song, Ruiling > Sent: Thursday, June 22, 2017 14:29 > To: Wang, Rander <rander.w...@intel.com>; beig...@freedesktop.org > Cc: Wang, Rander <rander.w...@intel.com> > Subject: Re: [Beignet] [PATCH V2] backend: refine load/store merging > algorithm > > LGTM > > Ruiling > > > -----Original Message----- > > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf > > Of rander.wang > > Sent: Friday, June 16, 2017 9:50 AM > > To: beig...@freedesktop.org > > Cc: Wang, Rander <rander.w...@intel.com> > > Subject: [Beignet] [PATCH V2] backend: refine load/store merging > > algorithm > > > > Now it works for sequence: load(0), load(1), load(2) > > but it cant work for load(2), load(0), load(1). because > > it compared the last merged load and the new one not all > > the loads > > > > for sequence: load(0), load(1), load(2). the load(0) is the > > start, can find that load(1) is successor without space, so > > put it to a merge fifo. then the start is moving to the top > > of fifo load(1), and compared with load(2). Also load(2) can > > be merged > > > > for load(2), load(0), load(1). load(2) cant be merged with > > load(0) for a space between them. So skip load(0) and mov to next > > load(1).And this load(1) can be merged. But it never go back merge > > load(0) > > > > Now change the algorithm. > > (1) find all loads maybe merged arround the start by the distance to > > the start. the distance is depended on data type, for 32bit data, > > the > > distance is 4. Put them in a list > > > > (2) sort the list by the distance from the start. > > > > (3) search the continuous sequence including the start to merge > > > > V2: (1)refine the sort and compare algoritm. First find all the IO > > in small offset compared to start. Then call std:sort > > (2)check the number of candidate IO to be favorable to > > performance > > for most cases there is no chance to merge IO > > > > Signed-off-by: rander.wang <rander.w...@intel.com> > > --- > > backend/src/llvm/llvm_loadstore_optimization.cpp | 87 > > +++++++++++++++++++++--- > > 1 file changed, 78 insertions(+), 9 deletions(-) > > > > diff --git a/backend/src/llvm/llvm_loadstore_optimization.cpp > > b/backend/src/llvm/llvm_loadstore_optimization.cpp > > index 5aa38be..c91c1a0 100644 > > --- a/backend/src/llvm/llvm_loadstore_optimization.cpp > > +++ b/backend/src/llvm/llvm_loadstore_optimization.cpp > > @@ -67,7 +67,7 @@ namespace gbe { > > bool isSimpleLoadStore(Value *I); > > bool optimizeLoadStore(BasicBlock &BB); > > > > - bool isLoadStoreCompatible(Value *A, Value *B); > > + bool isLoadStoreCompatible(Value *A, Value *B, int *dist, int* > elementSize, > > int maxVecSize); > > void mergeLoad(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged); > > void mergeStore(BasicBlock &BB, SmallVector<Instruction*, 16> > &merged); > > bool findConsecutiveAccess(BasicBlock &BB, > > @@ -109,7 +109,7 @@ namespace gbe { > > return NULL; > > } > > > > - bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, > > Value *B) { > > + bool GenLoadStoreOptimization::isLoadStoreCompatible(Value *A, > > + Value *B, > > int *dist, int* elementSize, int maxVecSize) { > > Value *ptrA = getPointerOperand(A); > > Value *ptrB = getPointerOperand(B); > > unsigned ASA = getAddressSpace(A); @@ -136,7 +136,11 @@ > namespace > > gbe { > > // The Instructions are connsecutive if the size of the first > > load/store is > > // the same as the offset. > > int64_t sz = TD->getTypeStoreSize(Ty); > > - return ((-offset) == sz); > > + *dist = -offset; > > + *elementSize = sz; > > + > > + //a insn with small distance from the search load/store is a candidate > one > > + return (abs(-offset) < sz*maxVecSize); > > } > > > > void GenLoadStoreOptimization::mergeLoad(BasicBlock &BB, > > SmallVector<Instruction*, 16> &merged) { @@ -163,6 +167,25 @@ > > namespace gbe { > > values[i]->replaceAllUsesWith(S); > > } > > } > > + > > + class mergedInfo{ > > + public: > > + Instruction* mInsn; > > + int mOffset; > > + > > + void init(Instruction* insn, int offset) > > + { > > + mInsn = insn; > > + mOffset = offset; > > + } > > + }; > > + > > + struct offsetSorter { > > + bool operator()(mergedInfo* m0, mergedInfo* m1) const { > > + return m0->mOffset < m1->mOffset; > > + } > > + }; > > + > > // When searching for consecutive memory access, we do it in a small > window, > > // if the window is too large, it would take up too much compiling time. > > // An Important rule we have followed is don't try to change load/store > order. > > @@ -177,7 +200,6 @@ namespace gbe { > > > > if(!isSimpleLoadStore(&*start)) return false; > > > > - merged.push_back(&*start); > > unsigned targetAddrSpace = getAddressSpace(&*start); > > > > BasicBlock::iterator E = BB.end(); @@ -187,11 +209,27 @@ > > namespace gbe { > > unsigned maxLimit = maxVecSize * 8; > > bool reordered = false; > > > > - for(unsigned ss = 0; J != E && ss <= maxLimit; ++ss, ++J) { > > - if((isLoad && isa<LoadInst>(*J)) || (!isLoad && isa<StoreInst>(*J))) > > { > > - if(isLoadStoreCompatible(merged[merged.size()-1], &*J)) { > > - merged.push_back(&*J); > > - } > > + bool ready = false; > > + int elementSize; > > + > > + SmallVector<mergedInfo *, 16> searchInsnArray; > > + mergedInfo meInfoArray[16]; > > + int indx = 0; > > + meInfoArray[indx++].init(&*start, 0); > > + > > + searchInsnArray.push_back(&meInfoArray[0]); > > + BasicBlock::iterator I = start; > > + ++I; > > + > > + for(unsigned ss = 0; I!= E && ss <= maxLimit; ++ss, ++I) { > > + if((isLoad && isa<LoadInst>(*I)) || (!isLoad && isa<StoreInst>(*J))) > > { > > + int distance; > > + if(isLoadStoreCompatible(searchInsnArray[0]->mInsn, &*I, > > + &distance, > > &elementSize, maxVecSize)) > > + { > > + meInfoArray[indx].init(&*I, distance); > > + searchInsnArray.push_back(&meInfoArray[indx]); > > + indx++; > > + } > > } else if((isLoad && isa<StoreInst>(*J))) { > > // simple stop to keep read/write order > > StoreInst *st = cast<StoreInst>(&*J); @@ -214,6 +252,37 @@ > > namespace gbe { > > if(merged.size() >= maxVecSize) break; > > } > > > > + if(indx > 1) > > + { > > + //try to sort the load/store by the offset from the start > > + //the farthest is at the beginning. this is easy to find the > > + //continuous load/store > > + std::sort(searchInsnArray.begin(), searchInsnArray.end(), > > + offsetSorter()); > > + > > + // try to find continuous loadstore insn in the candidate array > > + for(unsigned i = 0; i < searchInsnArray.size(); i++) > > + { > > + unsigned j; > > + for(j = 0; j < maxVecSize-1 && (j+i+1) < searchInsnArray.size(); > > j++) > > + { > > + if(searchInsnArray[i+j+1]->mOffset - > > + searchInsnArray[i+j]->mOffset != > > elementSize) > > + break; > > + > > + //this means the search load/store which offset is 0, is in the > sequence > > + if(searchInsnArray[i+j]->mOffset == 0 || > > + searchInsnArray[i+j+1]->mOffset > > == 0) > > + ready = true; > > + } > > + > > + if(j > 0 && ready) > > + { > > + for(unsigned k = 0; k < j+1; k++) > > + merged.push_back(searchInsnArray[i+k]->mInsn); > > + > > + break; > > + } > > + } > > + } > > + > > return reordered; > > } > > > > -- > > 2.7.4 > > > > _______________________________________________ > > Beignet mailing list > > Beignet@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet