From: Grigore Lupescu <grigore.lupescu at intel.com> Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com> --- backend/src/backend/gen_context.cpp | 71 ++++++++++++++++++++---------- backend/src/backend/gen_insn_selection.cpp | 6 +-- 2 files changed, 51 insertions(+), 26 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index dd8ff77..7a1c3cc 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2668,22 +2668,27 @@ namespace gbe p->MOV(threadLoop, ra->genReg(GenRegister::ud1grf(ir::ocl::threadid))); } - /* TODO implement communication for DW types */ - if(dst.type == GEN_TYPE_UL || - dst.type == GEN_TYPE_L || - dst.type == GEN_TYPE_DF_IMM) + /* All threads write the partial results to SLM memory */ + if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L) { - p->curr.execWidth = 16; - p->MOV(dst, threadData); - return; - } + GenRegister threadDataL = GenRegister::retype(threadData, GEN_TYPE_D); + GenRegister threadDataH = threadDataL.offset(threadDataL, 0, 4); + p->MOV(msgData.offset(msgData, 0), threadDataL); + p->MOV(msgData.offset(msgData, 1), threadDataH); - /* All threads write the partial results to SLM memory */ - p->curr.execWidth = 8; - p->MOV(msgData, threadData); - p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); - p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1); + p->curr.execWidth = 8; + p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); + p->ADD(msgAddr, msgAddr, msgSlmOff); + p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2); + } + else + { + p->curr.execWidth = 8; + p->MOV(msgData, threadData); + p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); + p->ADD(msgAddr, msgAddr, msgSlmOff); + p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1); + } /* Init partialData register, it will hold the final result */ initValue(p, partialData, wg_op); @@ -2697,17 +2702,37 @@ namespace gbe p->push();{ jip0 = p->n_instruction(); - p->curr.execWidth = 8; - p->curr.predicate = GEN_PREDICATE_NONE; - /* Read in chunks of 4 to optimize SLM reads and reduce SEND messages */ - p->ADD(threadLoop, threadLoop, GenRegister::immd(-1)); - p->MUL(msgAddr, threadLoop, GenRegister::immd(0x4)); - p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_READ(msgData, msgAddr, GenRegister::immw(0xFE), 1); + if(dst.type == GEN_TYPE_UL || dst.type == GEN_TYPE_L) + { + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->ADD(threadLoop, threadLoop, GenRegister::immd(-1)); + p->MUL(msgAddr, threadLoop, GenRegister::immd(0x8)); + p->ADD(msgAddr, msgAddr, msgSlmOff); + p->UNTYPED_READ(msgData, msgAddr, GenRegister::immw(0xFE), 2); + + GenRegister msgDataL = msgData.retype(msgData.offset(msgData, 0, 4), GEN_TYPE_D); + GenRegister msgDataH = msgData.retype(msgData.offset(msgData, 1, 4), GEN_TYPE_D); + msgDataL.hstride = 2; + msgDataH.hstride = 2; + p->MOV(msgDataL, msgDataH); + + /* Perform operation, partialData will hold result */ + workgroupOp(partialData, partialData, msgData.offset(msgData, 0), wg_op, p); + } + else + { + p->curr.execWidth = 8; + p->curr.predicate = GEN_PREDICATE_NONE; + p->ADD(threadLoop, threadLoop, GenRegister::immd(-1)); + p->MUL(msgAddr, threadLoop, GenRegister::immd(0x4)); + p->ADD(msgAddr, msgAddr, msgSlmOff); + p->UNTYPED_READ(msgData, msgAddr, GenRegister::immw(0xFE), 1); - /* Perform operation, process 4 elements, partialData will hold result */ - workgroupOp(partialData, partialData, msgData.offset(msgData, 0), wg_op, p); + /* Perform operation, partialData will hold result */ + workgroupOp(partialData, partialData, msgData.offset(msgData, 0), wg_op, p); + } /* While threadN is not 0, cycle read SLM / update value */ p->curr.noMask = 1; diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp index 12a0cf4..3fe0465 100644 --- a/backend/src/backend/gen_insn_selection.cpp +++ b/backend/src/backend/gen_insn_selection.cpp @@ -6462,9 +6462,9 @@ namespace gbe GBE_ASSERT(srcNum == 3); GBE_ASSERT(insn.getSrc(0) == ir::ocl::threadn); GBE_ASSERT(insn.getSrc(1) == ir::ocl::threadid); - GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_DWORD)), type); - GenRegister data = sel.selReg(sel.reg(FAMILY_DWORD), type); - GenRegister slmOff = sel.selReg(sel.reg(FAMILY_DWORD), ir::TYPE_U32); + GenRegister tmp = GenRegister::retype(sel.selReg(sel.reg(FAMILY_QWORD)), type); + GenRegister data = sel.selReg(sel.reg(FAMILY_QWORD), type); + GenRegister slmOff = sel.selReg(sel.reg(FAMILY_QWORD), ir::TYPE_U32); vector<GenRegister> msg; for(uint32_t i = 0; i < 6; i++) -- 2.5.0 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet