Pushed, thanks.
> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Song, Ruiling > Sent: Thursday, June 22, 2017 14:30 > To: Wang, Rander <rander.w...@intel.com>; beig...@freedesktop.org > Cc: Wang, Rander <rander.w...@intel.com> > Subject: Re: [Beignet] [PATCH V4] backend: add global immediate > optimization > > LGTM > > Ruiling > > > -----Original Message----- > > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf > > Of rander.wang > > Sent: Wednesday, June 14, 2017 1:56 PM > > To: beig...@freedesktop.org > > Cc: Wang, Rander <rander.w...@intel.com> > > Subject: [Beignet] [PATCH V4] backend: add global immediate > > optimization > > > > there are some global immediates in global var list of LLVM. > > these imm can be integrated in instructions. for > > compiler_global_immediate_optimized test > > in utest, there are two global immediates: > > L0: > > MOV(1) %42<0>:UD : 0x0:UD > > MOV(1) %43<0>:UD : 0x30:UD > > > > used by: > > ADD(16) %49<1>:D : %42<0,1,0>:D > > %48<8,8,1>:D > > ADD(16) %54<1>:D : %43<0,1,0>:D > > %53<8,8,1>:D > > > > it can be > > ADD(16) %49<1>:D : %48<8,8,1>:D 0x0:UD > > ADD(16) %54<1>:D : %53<8,8,1>:D 0x30:UD > > > > Then the MOV can be removed. And after this optimization, ADD 0 > can > > be change > > to MOV, then local copy propagation can be done. > > > > V2: (1) add environment variable to enable/disable the optimization > > (2) refine the architecture of imm optimization, inherit from global > > optimizer not local block optimizer > > > > V3: merge with latest master driver > > > > V4: (1)refine some type errors > > (2)remove UD/D check for no need > > (3)refine imm calculate for UD/D > > > > Signed-off-by: rander.wang <rander.w...@intel.com> > > --- > > .../src/backend/gen_insn_selection_optimize.cpp | 367 > > +++++++++++++++++++-- > > 1 file changed, 342 insertions(+), 25 deletions(-) > > > > diff --git a/backend/src/backend/gen_insn_selection_optimize.cpp > > b/backend/src/backend/gen_insn_selection_optimize.cpp > > index 07547ec..eb93a20 100644 > > --- a/backend/src/backend/gen_insn_selection_optimize.cpp > > +++ b/backend/src/backend/gen_insn_selection_optimize.cpp > > @@ -40,6 +40,33 @@ namespace gbe > > return elements; > > } > > > > + class ReplaceInfo > > + { > > + public: > > + ReplaceInfo(SelectionInstruction &insn, > > + const GenRegister &intermedia, > > + const GenRegister &replacement) : insn(insn), > > + intermedia(intermedia), > > replacement(replacement) > > + { > > + assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD); > > + assert(&(insn.dst(0)) == &intermedia); > > + this->elements = CalculateElements(intermedia, insn.state.execWidth); > > + replacementOverwritten = false; > > + } > > + ~ReplaceInfo() > > + { > > + this->toBeReplaceds.clear(); > > + } > > + > > + SelectionInstruction &insn; > > + const GenRegister &intermedia; > > + uint32_t elements; > > + const GenRegister &replacement; > > + set<GenRegister *> toBeReplaceds; > > + set<SelectionInstruction*> toBeReplacedInsns; > > + bool replacementOverwritten; > > + GBE_CLASS(ReplaceInfo); > > + }; > > + > > class SelOptimizer > > { > > public: > > @@ -66,32 +93,7 @@ namespace gbe > > > > private: > > // local copy propagation > > - class ReplaceInfo > > - { > > - public: > > - ReplaceInfo(SelectionInstruction& insn, > > - const GenRegister& intermedia, > > - const GenRegister& replacement) : > > - insn(insn), intermedia(intermedia), > > replacement(replacement) > > - { > > - assert(insn.opcode == SEL_OP_MOV || insn.opcode == SEL_OP_ADD); > > - assert(&(insn.dst(0)) == &intermedia); > > - this->elements = CalculateElements(intermedia, > insn.state.execWidth); > > - replacementOverwritten = false; > > - } > > - ~ReplaceInfo() > > - { > > - this->toBeReplaceds.clear(); > > - } > > > > - SelectionInstruction& insn; > > - const GenRegister& intermedia; > > - uint32_t elements; > > - const GenRegister& replacement; > > - set<GenRegister*> toBeReplaceds; > > - bool replacementOverwritten; > > - GBE_CLASS(ReplaceInfo); > > - }; > > typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap; > > ReplaceInfoMap replaceInfoMap; > > void doLocalCopyPropagation(); > > @@ -298,13 +300,328 @@ namespace gbe > > virtual void run(); > > }; > > > > + class SelGlobalImmMovOpt : public SelGlobalOptimizer { > > + public: > > + SelGlobalImmMovOpt(const GenContext& ctx, uint32_t features, > > intrusive_list<SelectionBlock> *blockList) : > > + SelGlobalOptimizer(ctx, features) > > + { > > + mblockList = blockList; > > + } > > + > > + virtual void run(); > > + > > + void addToReplaceInfoMap(SelectionInstruction& insn); > > + void doGlobalCopyPropagation(); > > + bool CanBeReplaced(const ReplaceInfo* info, SelectionInstruction& > > + insn, > > const GenRegister& var); > > + void cleanReplaceInfoMap(); > > + void doReplacement(ReplaceInfo* info); > > + > > + private: > > + intrusive_list<SelectionBlock> *mblockList; > > + > > + typedef map<ir::Register, ReplaceInfo*> ReplaceInfoMap; > > + ReplaceInfoMap replaceInfoMap; > > + > > + }; > > + > > + extern void outputSelectionInst(SelectionInstruction &insn); > > + > > + void SelGlobalImmMovOpt::cleanReplaceInfoMap() > > + { > > + for (auto& pair : replaceInfoMap) { > > + ReplaceInfo* info = pair.second; > > + doReplacement(info); > > + delete info; > > + } > > + replaceInfoMap.clear(); > > + } > > + > > +#define RESULT() switch(insn->opcode) \ > > + { \ > > + case SEL_OP_ADD: \ > > + result = s0 + s1; \ > > + break; \ > > + case SEL_OP_MUL: \ > > + result = s0 * s1; \ > > + break; \ > > + case SEL_OP_AND: \ > > + result = s0 & s1; \ > > + break; \ > > + case SEL_OP_OR: \ > > + result = s0 | s1; \ > > + break; \ > > + case SEL_OP_XOR: \ > > + result = s0 ^ s1; \ > > + break; \ > > + default: \ > > + assert(0); \ > > + break; \ > > + } > > + > > + void SelGlobalImmMovOpt::doReplacement(ReplaceInfo* info) { > > + for (GenRegister* reg : info->toBeReplaceds) { > > + GenRegister::propagateRegister(*reg, info->replacement); > > + } > > + > > + //after imm opt, maybe both src are imm, convert it to mov > > + for (SelectionInstruction* insn : info->toBeReplacedInsns) { > > + GenRegister& src0 = insn->src(0); > > + GenRegister& src1 = insn->src(1); > > + if (src0.file == GEN_IMMEDIATE_VALUE) > > + { > > + if (src1.file == GEN_IMMEDIATE_VALUE) > > + { > > + if (src0.type == GEN_TYPE_F) > > + { > > + float s0 = src0.value.f; > > + if (src0.absolute) > > + s0 = fabs(s0); > > + if (src0.negation) > > + s0 = -s0; > > + > > + float s1 = src1.value.f; > > + if (src1.absolute) > > + s1 = fabs(s1); > > + if (src1.negation) > > + s1 = -s1; > > + > > + float result; > > + switch(insn->opcode) > > + { > > + case SEL_OP_ADD: > > + result = s0 + s1; > > + break; > > + case SEL_OP_MUL: > > + result = s0 * s1; > > + break; > > + default: > > + assert(0); > > + break; > > + } > > + > > + insn->src(0) = GenRegister::immf(result); > > + } > > + else if (src0.type == GEN_TYPE_D && src1.type == GEN_TYPE_D) > > + { > > + int s0 = src0.value.d; > > + if (src0.absolute) > > + s0 = abs(s0); > > + if (src0.negation) > > + s0 = -s0; > > + > > + int s1 = src1.value.d; > > + if (src1.absolute) > > + s1 = abs(s1); > > + if (src1.negation) > > + s1 = -s1; > > + > > + int result; > > + RESULT(); > > + insn->src(0) = GenRegister::immd(result); > > + } > > + else if(src0.type == GEN_TYPE_UD || src1.type == GEN_TYPE_UD) > > + { > > + unsigned int s0 = src0.value.ud; > > + if (src0.absolute) > > + s0 = abs(s0); > > + if (src0.negation) > > + s0 = -s0; > > + > > + unsigned int s1 = src1.value.ud; > > + if (src1.absolute) > > + s1 = abs(s1); > > + if (src1.negation) > > + s1 = -s1; > > + > > + unsigned int result; > > + RESULT(); > > + insn->src(0) = GenRegister::immud(result); > > + } > > + else > > + { > > + assert(0); > > + } > > + > > + insn->opcode = SEL_OP_MOV; > > + insn->srcNum = 1; > > + } > > + else > > + { > > + //src0 cant be immediate, so exchange with src1 > > + GenRegister tmp; > > + tmp = src0; > > + src0 = src1; > > + src1 = tmp; > > + } > > + } > > + } > > + > > + info->insn.parent->insnList.erase(&(info->insn)); > > + } > > + > > + void > SelGlobalImmMovOpt::addToReplaceInfoMap(SelectionInstruction& > > + insn) { > > + assert(insn.opcode == SEL_OP_MOV); > > + const GenRegister& src = insn.src(0); > > + const GenRegister& dst = insn.dst(0); > > + if (src.type != dst.type) > > + return; > > + > > + ReplaceInfo* info = new ReplaceInfo(insn, dst, src); > > + replaceInfoMap[dst.reg()] = info; } > > + > > + bool SelGlobalImmMovOpt::CanBeReplaced(const ReplaceInfo* info, > > SelectionInstruction& insn, const GenRegister& var) > > + { > > + if(var.file == GEN_IMMEDIATE_VALUE) > > + return false; > > + > > + switch(insn.opcode) > > + { > > + // imm source is supported by these instructions. And > > + // the src operators can be exchange in these instructions > > + case SEL_OP_ADD: > > + case SEL_OP_MUL: > > + case SEL_OP_AND: > > + case SEL_OP_OR: > > + case SEL_OP_XOR: > > + break; > > + default: > > + return false; > > + } > > + > > + if(info->intermedia.type != var.type) > > + { > > + assert(info->replacement.file == GEN_IMMEDIATE_VALUE); > > + if(!((var.type == GEN_TYPE_D && info->intermedia.type == > GEN_TYPE_UD) > > + || (var.type == GEN_TYPE_UD && info->intermedia.type == > > GEN_TYPE_D))) > > + { > > + return false; > > + } > > + } > > + > > + if (info->intermedia.quarter == var.quarter && > > + info->intermedia.subnr == var.subnr && > > + info->intermedia.nr == var.nr) > > + { > > + uint32_t elements = CalculateElements(var, > > + insn.state.execWidth); > > //considering width, hstrid, vstrid and execWidth > > + if (info->elements != elements) > > + return false; > > + } > > + > > +#ifdef DEBUG_GLOBAL_IMM_OPT > > + outputSelectionInst(insn); > > +#endif > > + > > + return true; > > + } > > + > > + void SelGlobalImmMovOpt::doGlobalCopyPropagation() > > + { > > + for(SelectionBlock &block : *mblockList) > > + { > > + for (SelectionInstruction &insn :block.insnList) > > + { > > + for (uint8_t i = 0; i < insn.srcNum; ++i) > > + { > > + ReplaceInfoMap::iterator it = > > replaceInfoMap.find(insn.src(i).reg()); > > + if (it != replaceInfoMap.end()) > > + { > > + ReplaceInfo *info = it->second; > > + if (CanBeReplaced(info, insn, insn.src(i))) > > + { > > + info->toBeReplaceds.insert(&insn.src(i)); > > + info->toBeReplacedInsns.insert(&insn); > > + } > > + else > > + { > > + replaceInfoMap.erase(it); > > + delete info; > > + } > > + } > > + } > > + > > + for (uint8_t i = 0; i < insn.dstNum; ++i) > > + { > > + ReplaceInfoMap::iterator it = > > replaceInfoMap.find(insn.dst(i).reg()); > > + if (it != replaceInfoMap.end()) > > + { > > + ReplaceInfo *info = it->second; > > + if(&(info->insn) != &insn) > > + { > > + replaceInfoMap.erase(it); > > + delete info; > > + } > > + } > > + } > > + } > > + > > + if(replaceInfoMap.empty()) > > + break; > > + } > > + > > + cleanReplaceInfoMap(); > > + } > > + > > + void SelGlobalImmMovOpt::run() > > + { > > + bool canbeOpt = false; > > + > > + /*global immediates are set in entry block, the following is > > + example of GEN > > IR > > + > > + decl_input.global %41 dst > > + ## 0 output register ## > > + ## 0 pushed register > > + ## 3 blocks ## > > + LABEL $0 > > + LOADI.uint32 %42 0 > > + LOADI.uint32 %43 48 > > + > > + LABEL $1 > > + MUL.int32 %44 %3 %12 > > + ADD.int32 %49 %42 %48 > > + ... > > + */ > > + SelectionBlock &block = *mblockList->begin(); > > + for(SelectionInstruction &insn : block.insnList) > > + { > > + GenRegister src0 = insn.src(0); > > + if(insn.opcode == SEL_OP_MOV && > > + src0.file == GEN_IMMEDIATE_VALUE && > > + (src0.type == GEN_TYPE_UD || src0.type == GEN_TYPE_D || > > + src0.type > > == GEN_TYPE_F)) > > + { > > + addToReplaceInfoMap(insn); > > + canbeOpt = true; > > + > > +#ifdef DEBUG_GLOBAL_IMM_OPT > > + outputSelectionInst(insn); > > +#endif > > + } > > + } > > + > > + if(!canbeOpt) return; > > + > > + doGlobalCopyPropagation(); > > + } > > + > > void SelGlobalOptimizer::run() > > { > > > > } > > > > + BVAR(OCL_GLOBAL_IMM_OPTIMIZATION, true); > > + > > void Selection::optimize() > > { > > + //do global imm opt first to make more local opt > > + if(OCL_GLOBAL_IMM_OPTIMIZATION) > > + { > > + SelGlobalImmMovOpt gopt(getCtx(), opt_features, blockList); > > + gopt.run(); > > + } > > + > > //do basic block level optimization > > for (SelectionBlock &block : *blockList) { > > SelBasicBlockOptimizer bbopt(getCtx(), > > getCtx().getLiveOut(block.bb), opt_features, block); > > -- > > 2.7.4 > > > > _______________________________________________ > > Beignet mailing list > > Beignet@lists.freedesktop.org > > https://lists.freedesktop.org/mailman/listinfo/beignet > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet