there some patterns like: sqrt r1, r2; load r4, 1.0; ===> rqrt r3, r2 div r3, r4, r1;
Signed-off-by: rander.wang <rander.w...@intel.com> --- backend/src/backend/gen_insn_selection.cpp | 71 ++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 822357e..832fbfe 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -3741,6 +3741,76 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp } }; +/*! there some patterns like: + sqrt r1, r2; + load r4, 1.0; ===> rqrt r3, r2 + div r3, r4, r1; */ + class SqrtDivInstructionPattern : public SelectionPattern + { + public: + /*! Register the pattern for all opcodes of the family */ + SqrtDivInstructionPattern(void) : SelectionPattern(1, 1) { + this->opcodes.push_back(ir::OP_DIV); + } + + /*! Implements base class */ + virtual bool emit(Selection::Opaque &sel, SelectionDAG &dag) const + { + using namespace ir; + + // We are good to try. We need a MUL for one of the two sources + const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn); + if (insn.getType() != TYPE_FLOAT) + return false; + SelectionDAG *child0 = dag.child[0]; + SelectionDAG *child1 = dag.child[1]; + const GenRegister dst = sel.selReg(insn.getDst(0), TYPE_FLOAT); + + if (child1 && child1->insn.getOpcode() == OP_SQR) { + GBE_ASSERT(cast<ir::UnaryInstruction>(child1->insn).getType() == TYPE_FLOAT); + GenRegister srcSQR = sel.selReg(child1->insn.getSrc(0), TYPE_FLOAT); + const GenRegister dstSQR = sel.selReg(child1->insn.getDst(0), TYPE_FLOAT); + const GenRegister src0 = sel.selReg(insn.getSrc(0), TYPE_FLOAT); + const GenRegister src1 = sel.selReg(insn.getSrc(1), TYPE_FLOAT); + float val = 0.0f; + + if(child0 && child0->insn.getOpcode() == OP_LOADI) + { + const auto &loadimm = cast<LoadImmInstruction>(child0->insn); + const Immediate imm = loadimm.getImmediate(); + const Type type = imm.getType(); + if(type == TYPE_FLOAT) + val = imm.getFloatValue(); + else if(type == TYPE_S32) + val = imm.getIntegerValue(); + } + + sel.push(); + if (sel.isScalarReg(insn.getDst(0))) + sel.curr.execWidth = 1; + + if(val == 1.0f) + { + sel.MATH(dst, GEN_MATH_FUNCTION_RSQ, srcSQR); + child0->isRoot = 1; + child1->isRoot = 1; + } + else + { + sel.MATH(dstSQR, GEN_MATH_FUNCTION_RSQ, srcSQR); + sel.MUL(dst, src0, src1); + sel.pop(); + if (child1->child[0]) child1->child[0]->isRoot = 1; + if (child1->child[1]) child1->child[1]->isRoot = 1; + if (child0) child0->isRoot = 1; + } + + return true; + } + return false; + } + }; + /*! sel.{le,l,ge...} like patterns */ class SelectModifierInstructionPattern : public SelectionPattern { @@ -8078,6 +8148,7 @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in program.cpp SelectionLibrary::SelectionLibrary(void) { this->insert<UnaryInstructionPattern>(); + this->insert<SqrtDivInstructionPattern>(); this->insert<BinaryInstructionPattern>(); this->insert<TypedWriteInstructionPattern>(); this->insert<SyncInstructionPattern>(); -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet