BDW support int32 * int32 directly. So add a flag to selection for it. BDW use int32*int16 when use acc. Because int32*int16 also work in IVB, change to int32*int16 when use acc. Need refine int32*int32 to long later.
Signed-off-by: Yang Rong <rong.r.y...@intel.com> --- backend/src/backend/gen8_context.cpp | 2 +- backend/src/backend/gen_context.cpp | 5 ++-- backend/src/backend/gen_insn_selection.cpp | 46 +++++++++++++++++++----------- backend/src/backend/gen_insn_selection.hpp | 7 +++++ 4 files changed, 40 insertions(+), 20 deletions(-) diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index a9914f6..7247682 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -107,7 +107,7 @@ namespace gbe } void Gen8Context::newSelection(void) { - this->sel = GBE_NEW(Selection75, *this); + this->sel = GBE_NEW(Selection8, *this); } } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 81758eb..c2412d8 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -346,7 +346,8 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NONE; p->curr.noMask = 1; - p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, src1); + p->MUL(GenRegister::retype(GenRegister::acc(), GEN_TYPE_UD), src0, + GenRegister::h2(GenRegister::retype(src1, GEN_TYPE_UW))); p->curr.accWrEnable = 1; p->MACH(tmp, src0, src1); p->pop(); @@ -1262,7 +1263,7 @@ namespace gbe p->push(); p->curr.execWidth = 8; for(int i = 0; i < execWidth; i += 8) { - p->MUL(acc, src0, src1); + p->MUL(acc, src0, GenRegister::h2(GenRegister::retype(src1, GEN_TYPE_UW))); p->curr.accWrEnable = 1; p->MACH(high, src0, src1); p->curr.accWrEnable = 0; diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index e3ee35d..dc10fa4 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -343,6 +343,8 @@ namespace gbe /*! should add per thread offset to the local memory address when load/store/atomic */ bool needPatchSLMAddr() const { return patchSLMAddr; } void setPatchSLMAddr(bool b) { patchSLMAddr = b; } + bool has32X32Mul() const { return bHas32X32Mul; } + void setHas32X32Mul(bool b) { bHas32X32Mul = b; } /*! indicate whether a register is a scalar/uniform register. */ INLINE bool isScalarReg(const ir::Register ®) const { const ir::RegisterData ®Data = getRegisterData(reg); @@ -625,6 +627,7 @@ namespace gbe /*! Auxiliary label for if/endif. */ uint16_t currAuxLabel; bool patchSLMAddr; + bool bHas32X32Mul; INLINE ir::LabelIndex newAuxLabel() { currAuxLabel++; @@ -663,7 +666,8 @@ namespace gbe ctx(ctx), block(NULL), curr(ctx.getSimdWidth()), file(ctx.getFunction().getRegisterFile()), maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum), - stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()), patchSLMAddr(false) + stateNum(0), vectorNum(0), bwdCodeGeneration(false), currAuxLabel(ctx.getFunction().labelNum()), + patchSLMAddr(false), bHas32X32Mul(false) { const ir::Function &fn = ctx.getFunction(); this->regNum = fn.regNum(); @@ -1667,6 +1671,11 @@ namespace gbe this->opaque->setPatchSLMAddr(true); } + Selection8::Selection8(GenContext &ctx) : Selection(ctx) { + this->opaque->setPatchSLMAddr(true); + this->opaque->setHas32X32Mul(true); + } + void Selection::Opaque::TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool is3D) { uint32_t elemID = 0; @@ -2444,18 +2453,23 @@ namespace gbe using namespace ir; const ir::BinaryInstruction &insn = cast<ir::BinaryInstruction>(dag.insn); const Type type = insn.getType(); - if (type == TYPE_U32 || type == TYPE_S32) { + if (type != TYPE_U32 && type != TYPE_S32) + return false; + + GenRegister dst = sel.selReg(insn.getDst(0), type); + GenRegister src0 = sel.selReg(insn.getSrc(0), type); + GenRegister src1 = sel.selReg(insn.getSrc(1), type); + if (sel.has32X32Mul()) { + sel.MUL(dst, src0, src1); + } else { sel.push(); - if (sel.isScalarReg(insn.getDst(0)) == true) { - sel.curr.execWidth = 1; - sel.curr.predicate = GEN_PREDICATE_NONE; - sel.curr.noMask = 1; - } - const uint32_t simdWidth = sel.curr.execWidth; + if (sel.isScalarReg(insn.getDst(0)) == true) { + sel.curr.execWidth = 1; + sel.curr.predicate = GEN_PREDICATE_NONE; + sel.curr.noMask = 1; + } - GenRegister dst = sel.selReg(insn.getDst(0), type); - GenRegister src0 = sel.selReg(insn.getSrc(0), type); - GenRegister src1 = sel.selReg(insn.getSrc(1), type); + const int simdWidth = sel.curr.execWidth; // Either left part of the 16-wide register or just a simd 8 register dst = GenRegister::retype(dst, GEN_TYPE_D); @@ -2498,13 +2512,11 @@ namespace gbe } else sel.MOV(GenRegister::retype(GenRegister::next(dst), GEN_TYPE_F), GenRegister::acc()); } - sel.pop(); - // All children are marked as root - markAllChildren(dag); - return true; - } else - return false; + } + // All children are marked as root + markAllChildren(dag); + return true; } }; diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp index 9bcce6f..e39aa6e 100644 --- a/backend/src/backend/gen_insn_selection.hpp +++ b/backend/src/backend/gen_insn_selection.hpp @@ -284,6 +284,13 @@ namespace gbe Selection75(GenContext &ctx); }; + class Selection8: public Selection + { + public: + /*! Initialize internal structures used for the selection */ + Selection8(GenContext &ctx); + }; + } /* namespace gbe */ #endif /* __GEN_INSN_SELECTION_HPP__ */ -- 1.8.3.2 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/beignet