Take below example: bb2: mov x, x-copy add x2, x, 1 mul x3, x2, 2 mov x-copy, x3
this is a self-loop block. In previous implementation, we have try our best to coaleasce phi/phiCopy/phiCopySrc. But in this example, we can do more, we can even coaleasce x2 to x, x-copy and x3. in real world, there may be many x2's lies in the use/def chain. So we do some check to see whether we can totally coaleasce the use/def chain lies between phi/phiCopySrc. We place various careful check in this path. Some may be too strict, so we need further refine to this logic. Signed-off-by: Ruiling Song <ruiling.s...@intel.com> --- backend/src/ir/instruction.cpp | 18 ++++++ backend/src/ir/instruction.hpp | 11 ++++ backend/src/llvm/llvm_gen_backend.cpp | 101 ++++++++++++++++++++++++++++++++-- 3 files changed, 124 insertions(+), 6 deletions(-) diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp index ed64580..a5bf89f 100644 --- a/backend/src/ir/instruction.cpp +++ b/backend/src/ir/instruction.cpp @@ -2292,6 +2292,24 @@ END_FUNCTION(Instruction, Register) if (new_ins) *new_ins = insn; } + bool Instruction::isNativeInstruction() const { + if (BinaryInstruction::isClassOf(*this)) { + //const BinaryInstruction * const insn = cast<BinaryInstruction *>(this); + const BinaryInstruction &insn = cast<BinaryInstruction>(*this); + if (insn.getType() == ir::TYPE_U32 || + insn.getType() == ir::TYPE_S32 || + insn.getType() == ir::TYPE_FLOAT) { + if (opcode == OP_MUL || opcode == OP_ADD || opcode == OP_MOV) + return true; + } + } + + if (TernaryInstruction::isClassOf(*this)) { + if (opcode == OP_MAD) + return true; + } + return false; + } bool Instruction::hasSideEffect(void) const { return opcode == OP_STORE || diff --git a/backend/src/ir/instruction.hpp b/backend/src/ir/instruction.hpp index b2b0b49..f710b5e 100644 --- a/backend/src/ir/instruction.hpp +++ b/backend/src/ir/instruction.hpp @@ -184,10 +184,21 @@ namespace ir { RegisterData getDstData(uint32_t ID = 0u) const; /*! Get the register of the given destination */ RegisterData getSrcData(uint32_t ID = 0u) const; + /*! whether the instruction directly maps to native instr. + * this is used to determine whether we can do aggressive + * register coaleascing for source and destination */ + bool isNativeInstruction() const; /*! Set a register in src srcID */ void setSrc(uint32_t srcID, Register reg); /*! Set a register in dst dstID */ void setDst(uint32_t dstID, Register reg); + bool use(Register reg) const { + for (uint32_t i = 0; i < getSrcNum(); i++) { + if (getSrc(i) == reg) + return true; + } + return false; + } /*! Is there any side effect in the memory sub-system? */ bool hasSideEffect(void) const; /*! Get / set the parent basic block */ diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 41cb783..7d78163 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2272,6 +2272,64 @@ namespace gbe }); } + bool useRegisters(const ir::Instruction *inst, std::vector<ir::Register> ®s) { + if (regs.empty()) return false; + for (auto & r : regs) { + if (inst->use(r)) + return true; + } + return false; + } + void findPossibleReplaceChain(ir::Liveness &liveness, ir::Function &fn, + std::vector<ir::Register> &replaceChain, + ir::Register phi, ir::Register phiCopy, + const ir::BasicBlock *bb, ir::BasicBlock::const_iterator searchEnd) { + ir::BasicBlock::const_iterator iter = bb->begin(); + ir::Register targetReg = phi; + const ir::Liveness::LiveOut &out = liveness.getLiveOut(bb); + + for ( ; iter != searchEnd; ++iter) { + const ir::Instruction *inst = iter.node(); + if (useRegisters(inst, replaceChain)) { + replaceChain.clear(); + // should stop + break; + } + if (inst->use(targetReg)) { + if (!inst->isNativeInstruction()) { + replaceChain.clear(); + // we need to stop further coaleascing + // in fact we need careful consideration here. + // I just break and clear the replaceChain now. + break; + } + + const ir::RegisterData oldData = fn.getRegisterData(targetReg); + const ir::RegisterData newData = fn.getRegisterData(inst->getDst(0)); + if(oldData.family != newData.family) { + replaceChain.clear(); + break; + } + // the value cannot be an liveout + if (out.contains(targetReg)) { + replaceChain.clear(); + break; + } + + replaceChain.push_back(targetReg); + targetReg = inst->getDst(0); + } + } + // check no use of the registers in replaceChain till the bb->end() + if (!replaceChain.empty()) { + for (; iter != bb->end(); ++iter) { + if (useRegisters(iter.node(), replaceChain)) { + replaceChain.clear(); + } + } + } + } + void GenWriter::optimizePhiCopy(ir::Liveness &liveness, ir::Function &fn, map<ir::Register, ir::Register> &replaceMap, map<ir::Register, ir::Register> &redundantPhiCopyMap) @@ -2292,7 +2350,7 @@ namespace gbe const ir::DefSet *phiCopyDef = dag->getRegDef(phiCopy); const ir::UseSet *phiUse = dag->getRegUse(phi); const DefSet *phiDef = dag->getRegDef(phi); - bool isOpt = true; + bool coalescePhiPhiCopy = true; // FIXME, I find under some situation, the phiDef maybe null, seems a bug when building FunctionDAg. // need fix it there. @@ -2307,7 +2365,7 @@ namespace gbe // phi & phiCopy are both alive at the endpoint of bb, // thus can not be optimized. if (out.contains(phi)) { - isOpt = false; + coalescePhiPhiCopy = false; break; } @@ -2333,7 +2391,17 @@ namespace gbe // add x2, x, 1 // mov x-copy, x2 // obviously x2, x-copy and x2 can be mapped to same virtual register - + // the situation may be more complex that this example, like + // bb2: + // mov x, x-copy + // add x2, x, 1 + // mul x3, x2, 2 + // mov x-copy, x3 + // that's why we add findPossibleReplaceChain() to solve such case. + // that is to replace all registers in the chain (x2, x3) by x-copy. + // note I did not merge the new 'ReplaceChain' with previous phiCopy/phiCopySrc + // coaleascing, the reason is 'ReplaceChain' is very hard to match. But + // phiCopy/phiCopySrc is eady to meet. The code here need further refine. ir::BasicBlock::const_iterator iter = ir::BasicBlock::const_iterator(phiCopySrcDefInsn); ir::BasicBlock::const_iterator iterE = bb->end(); @@ -2351,6 +2419,12 @@ namespace gbe } ++iter; } + + // Go through the basic block, to find chain of [phi, ... phiCopy] + std::vector<ir::Register> replaceChain; + findPossibleReplaceChain(liveness, fn, replaceChain, phi, phiCopy, bb, + ir::BasicBlock::const_iterator(phiCopyDefInsn)); + if (!phiPhiCopySrcInterfere) { replaceSrc(const_cast<Instruction *>(phiCopyDefInsn), phiCopySrc, phiCopy); @@ -2364,6 +2438,21 @@ namespace gbe replaceSrc(const_cast<Instruction *>(phiSrcUseInsn), phiCopySrc, phiCopy); } replaceMap.insert(std::make_pair(phiCopySrc, phiCopy)); + + if (!replaceChain.empty()) { + for (auto &r : replaceChain) { + const ir::DefSet *d = dag->getRegDef(r); + const ir::UseSet *u = dag->getRegUse(r); + replaceMap.insert(std::make_pair(r, phiCopy)); + + for (auto &dd : *d) { + replaceDst(const_cast<Instruction *>(dd->getInstruction()), r, phiCopy); + } + for (auto &uu : *u) { + replaceSrc(const_cast<Instruction *>(uu->getInstruction()), r, phiCopy); + } + } + } } } } else { @@ -2396,7 +2485,7 @@ namespace gbe if (&p == phiCopyDefInsn) break; // we only care MOV here if (p.getSrcNum() == 1 && p.getSrc(0) == phi) { - isOpt = false; + coalescePhiPhiCopy = false; break; } } @@ -2404,14 +2493,14 @@ namespace gbe } // coalease phi and phiCopy - if (isOpt) { + if (coalescePhiPhiCopy) { + replaceMap.insert(std::make_pair(phi, phiCopy)); for (auto &x : *phiDef) { replaceDst(const_cast<Instruction *>(x->getInstruction()), phi, phiCopy); } for (auto &x : *phiUse) { const Instruction *phiUseInsn = x->getInstruction(); replaceSrc(const_cast<Instruction *>(phiUseInsn), phi, phiCopy); - replaceMap.insert(std::make_pair(phi, phiCopy)); } } } -- 2.4.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet