> -----Original Message----- > From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of > Guo, Yejun > Sent: Tuesday, November 22, 2016 2:43 PM > To: beignet@lists.freedesktop.org > Cc: Guo, Yejun <yejun....@intel.com> > Subject: [Beignet] [PATCH 2/4] support sends (split send) for untyped write > > sends is a new instruction starting from gen9 to split the registers > of address and data for write, the register pressure can be loosed > since they are not necessary to be continuous any more. > > more patches for sends will be sent out. > > we can choose send or sends based on hasSends() in selection stage, > only enabeld as default for skylake now. > > Signed-off-by: Guo, Yejun <yejun....@intel.com> > --- > backend/src/backend/gen75_encoder.cpp | 2 +- > backend/src/backend/gen75_encoder.hpp | 2 +- > backend/src/backend/gen8_context.cpp | 21 +++++++---- > backend/src/backend/gen8_encoder.cpp | 2 +- > backend/src/backend/gen8_encoder.hpp | 2 +- > backend/src/backend/gen9_encoder.cpp | 58 > ++++++++++++++++++++++++++++++ > backend/src/backend/gen9_encoder.hpp | 3 +- > backend/src/backend/gen_context.cpp | 41 ++++++++++++--------- > backend/src/backend/gen_encoder.cpp | 12 ++++++- > backend/src/backend/gen_encoder.hpp | 4 ++- > backend/src/backend/gen_insn_selection.cpp | 22 ++++++++++-- > backend/src/backend/gen_insn_selection.hpp | 1 + > 12 files changed, 137 insertions(+), 33 deletions(-) > > diff --git a/backend/src/backend/gen75_encoder.cpp > b/backend/src/backend/gen75_encoder.cpp > index fc37991..9cafaa7 100644 > --- a/backend/src/backend/gen75_encoder.cpp > +++ b/backend/src/backend/gen75_encoder.cpp > @@ -199,7 +199,7 @@ namespace gbe > return insn->bits3.ud; > } > > - void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, > uint32_t elemNum) { > + void Gen75Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, > GenRegister bti, uint32_t elemNum) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > assert(elemNum >= 1 || elemNum <= 4); > this->setHeader(insn); > diff --git a/backend/src/backend/gen75_encoder.hpp > b/backend/src/backend/gen75_encoder.hpp > index d06f393..517afff 100644 > --- a/backend/src/backend/gen75_encoder.hpp > +++ b/backend/src/backend/gen75_encoder.hpp > @@ -44,7 +44,7 @@ namespace gbe > virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip); > virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, > GenRegister bti, uint32_t srcNum); > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister > bti, uint32_t elemNum); > - virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t > elemNum); > + virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, > GenRegister bti, uint32_t elemNum); > virtual void setHeader(GenNativeInstruction *insn); > virtual void setDPUntypedRW(GenNativeInstruction *insn, uint32_t bti, > uint32_t rgba, > uint32_t msg_type, uint32_t msg_length, uint32_t > response_length); > diff --git a/backend/src/backend/gen8_context.cpp > b/backend/src/backend/gen8_context.cpp > index 71c54fb..95b1013 100644 > --- a/backend/src/backend/gen8_context.cpp > +++ b/backend/src/backend/gen8_context.cpp > @@ -968,6 +968,9 @@ namespace gbe > GBE_ASSERT(elemNum == 1); > const GenRegister addr = ra->genReg(insn.src(elemNum)); > const GenRegister bti = ra->genReg(insn.src(elemNum*2+1)); > + GenRegister data = ra->genReg(insn.src(elemNum+1)); > + if (!insn.extra.splitSend) > + data = addr; > > /* Because BDW's store and load send instructions for 64 bits require > the bti > to be surfaceless, > which we can not accept. We just fallback to 2 DW untypewrite here. */ > @@ -978,11 +981,15 @@ namespace gbe > } > > if (bti.file == GEN_IMMEDIATE_VALUE) { > - p->UNTYPED_WRITE(addr, bti, elemNum*2); > + p->UNTYPED_WRITE(addr, data, bti, elemNum*2); > } else { > const GenRegister tmp = ra->genReg(insn.dst(elemNum)); > const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1)); > - unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum*2); > + unsigned desc = 0; > + if (insn.extra.splitSend) > + desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum*2); > + else > + desc = p->generateUntypedWriteMessageDesc(0, elemNum*2); > > unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc); > > @@ -990,7 +997,7 @@ namespace gbe > p->push(); > p->curr.predicate = GEN_PREDICATE_NORMAL; > p->curr.useFlag(insn.state.flag, insn.state.subFlag); > - p->UNTYPED_WRITE(addr, GenRegister::addr1(0), elemNum*2); > + p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2); > p->pop(); > afterMessage(insn, bti, tmp, btiTmp, jip0); > } > @@ -1351,7 +1358,7 @@ namespace gbe > nextDst = GenRegister::Qn(tempDst, 1); > p->MOV(nextDst, nextSrc); > p->pop(); > - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > > p->push(); > @@ -1367,7 +1374,7 @@ namespace gbe > nextDst = GenRegister::Qn(tempDst, 1); > p->MOV(nextDst, nextSrc); > p->pop(); > - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > } > > @@ -1794,7 +1801,7 @@ namespace gbe > p->curr.execWidth = 8; > p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); > p->ADD(msgAddr, msgAddr, msgSlmOff); > - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2); > + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2); > } > else > { > @@ -1802,7 +1809,7 @@ namespace gbe > p->MOV(msgData, threadData); > p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); > p->ADD(msgAddr, msgAddr, msgSlmOff); > - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1); > + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1); > } > > /* init partialData register, it will hold the final result */ > diff --git a/backend/src/backend/gen8_encoder.cpp > b/backend/src/backend/gen8_encoder.cpp > index 6638805..4239e84 100644 > --- a/backend/src/backend/gen8_encoder.cpp > +++ b/backend/src/backend/gen8_encoder.cpp > @@ -268,7 +268,7 @@ namespace gbe > return insn->bits3.ud; > } > > - void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, > uint32_t elemNum) { > + void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, > GenRegister bti, uint32_t elemNum) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > assert(elemNum >= 1 || elemNum <= 4); > this->setHeader(insn); > diff --git a/backend/src/backend/gen8_encoder.hpp > b/backend/src/backend/gen8_encoder.hpp > index b73beb3..f6a91a0 100644 > --- a/backend/src/backend/gen8_encoder.hpp > +++ b/backend/src/backend/gen8_encoder.hpp > @@ -47,7 +47,7 @@ namespace gbe > virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, > GenRegister bti, uint32_t srcNum); > virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister > src, > GenRegister bti, uint32_t srcNum); > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister > bti, uint32_t elemNum); > - virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t > elemNum); > + virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, > GenRegister bti, uint32_t elemNum); > virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t > elemNum); > virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum); > virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t > elemSize); > diff --git a/backend/src/backend/gen9_encoder.cpp > b/backend/src/backend/gen9_encoder.cpp > index 80df50d..351788c 100644 > --- a/backend/src/backend/gen9_encoder.cpp > +++ b/backend/src/backend/gen9_encoder.cpp > @@ -26,6 +26,14 @@ > > ***************************************************************** > *****/ > > #include "backend/gen9_encoder.hpp" > +#include "backend/gen9_instruction.hpp" > +static const uint32_t untypedRWMask[] = { > + > GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN|GEN_U > NTYPED_RED, > + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE|GEN_UNTYPED_GREEN, > + GEN_UNTYPED_ALPHA|GEN_UNTYPED_BLUE, > + GEN_UNTYPED_ALPHA, > + 0 > +}; > > namespace gbe > { > @@ -65,4 +73,54 @@ namespace gbe > header_present, > simd_mode, return_format); > } > + unsigned > Gen9Encoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum) The message desc encoding is same for send and sends, what about calling existing function? 66 void Gen8Encoder::setDPUntypedRW(GenNativeInstruction *insn, 67 uint32_t bti, 68 uint32_t rgba, 69 uint32_t msg_type, 70 uint32_t msg_length, 71 uint32_t response_length) > + { > + Gen9NativeInstruction *gen9_insn = &insn->gen9_insn; > + gen9_insn->bits3.sends_untyped_rw.header_present = 0; > + gen9_insn->bits3.sends_untyped_rw.response_length = 0; > + gen9_insn->bits3.sends_untyped_rw.end_of_thread = 0; > + gen9_insn->bits3.sends_untyped_rw.msg_type = > GEN75_P1_UNTYPED_SURFACE_WRITE; > + gen9_insn->bits3.sends_untyped_rw.bti = bti; > + gen9_insn->bits3.sends_untyped_rw.rgba = untypedRWMask[elemNum]; > + if (this->curr.execWidth == 8) { > + gen9_insn->bits3.sends_untyped_rw.src0_length = 1; > + gen9_insn->bits3.sends_untyped_rw.simd_mode = GEN_UNTYPED_SIMD8; > + } else if (this->curr.execWidth == 16) { > + gen9_insn->bits3.sends_untyped_rw.src0_length = 2; > + gen9_insn->bits3.sends_untyped_rw.simd_mode = GEN_UNTYPED_SIMD16; > + } > + return gen9_insn->bits3.ud; > + } > + void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, > GenRegister bti, uint32_t elemNum) > + { > + if (addr.reg() == data.reg()) > + Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum); > + else { > + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS); > + Gen9NativeInstruction *gen9_insn = &insn->gen9_insn; > + assert(elemNum >= 1 || elemNum <= 4); > + this->setHeader(insn); > + insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA; > + gen9_insn->bits1.sends.dest_reg_file_0 = 1; //01 for GRF Generally, we should set sends destination to null register, so it is ARF.
> + gen9_insn->bits1.sends.src1_reg_file_0 = 1; > + gen9_insn->bits1.sends.src1_reg_nr = data.nr; > + gen9_insn->bits1.sends.dest_subreg_nr = 0; > + gen9_insn->bits1.sends.dest_reg_nr = 0; > + gen9_insn->bits1.sends.dest_address_mode = 0; //direct mode > + gen9_insn->bits2.sends.src0_subreg_nr = addr.subnr; Setting src0_subreg_nr here is meaningless, only the src0_subreg_nr[4] bit left, I am not sure whether hw use it correctly. Generally the message payload register subnr should be 0. You can remove above line, add an assert(addr.subnr == 0); And I would also suggest you define below functions to implement sends encoding logic as sends has very different encoding. setSendsDst(nullreg); setSendsSrc0(src0); setSendsSrc1(src1); so that untyped_write() byte_scatter() typed_write can call these functions instead of repeating same logic at every place. > + gen9_insn->bits2.sends.src0_reg_nr = addr.nr; > + gen9_insn->bits2.sends.src0_address_mode = 0; > + if (this->curr.execWidth == 8) > + gen9_insn->bits2.sends.src1_length = elemNum; > + else if (this->curr.execWidth == 16) > + gen9_insn->bits2.sends.src1_length = 2 * elemNum; > + else > + assert(!"unsupported"); > + if (bti.file == GEN_IMMEDIATE_VALUE) { > + gen9_insn->bits2.sends.sel_reg32_desc = 0; > + setUntypedWriteSendsMessageDesc(insn, bti.value.ud, elemNum); > + } else > + gen9_insn->bits2.sends.sel_reg32_desc = 1; > + } > + } > } /* End of the name space. */ > diff --git a/backend/src/backend/gen9_encoder.hpp > b/backend/src/backend/gen9_encoder.hpp > index 319e871..7b9f0df 100644 > --- a/backend/src/backend/gen9_encoder.hpp > +++ b/backend/src/backend/gen9_encoder.hpp > @@ -47,7 +47,8 @@ namespace gbe > uint32_t return_format, > bool isLD, > bool isUniform); > - > + virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, > GenRegister bti, uint32_t elemNum); > + virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction > *insn, unsigned bti, unsigned elemNum); > }; > } > #endif /* __GBE_GEN9_ENCODER_HPP__ */ > diff --git a/backend/src/backend/gen_context.cpp > b/backend/src/backend/gen_context.cpp > index c38b7af..848933e 100644 > --- a/backend/src/backend/gen_context.cpp > +++ b/backend/src/backend/gen_context.cpp > @@ -2146,7 +2146,7 @@ namespace gbe > const GenRegister bti = ra->genReg(insn.src(elemNum+1)); > > if (bti.file == GEN_IMMEDIATE_VALUE) { > - p->UNTYPED_WRITE(src, bti, elemNum*2); > + p->UNTYPED_WRITE(src, src, bti, elemNum*2); > } else { > const GenRegister tmp = ra->genReg(insn.dst(0)); > const GenRegister btiTmp = ra->genReg(insn.dst(1)); > @@ -2158,22 +2158,29 @@ namespace gbe > p->push(); > p->curr.predicate = GEN_PREDICATE_NORMAL; > p->curr.useFlag(insn.state.flag, insn.state.subFlag); > - p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum*2); > + p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2); > p->pop(); > afterMessage(insn, bti, tmp, btiTmp, jip0); > } > } > > void GenContext::emitUntypedWriteInstruction(const SelectionInstruction > &insn) { > - const GenRegister src = ra->genReg(insn.src(0)); > + const GenRegister addr = ra->genReg(insn.src(0)); > + GenRegister data = ra->genReg(insn.src(1)); > + if (!insn.extra.splitSend) > + data = addr; > const uint32_t elemNum = insn.extra.elem; > const GenRegister bti = ra->genReg(insn.src(elemNum+1)); > if (bti.file == GEN_IMMEDIATE_VALUE) { > - p->UNTYPED_WRITE(src, bti, elemNum); > + p->UNTYPED_WRITE(addr, data, bti, elemNum); > } else { > const GenRegister tmp = ra->genReg(insn.dst(0)); > const GenRegister btiTmp = ra->genReg(insn.dst(1)); > - unsigned desc = p->generateUntypedWriteMessageDesc(0, elemNum); > + unsigned desc = 0; > + if (insn.extra.splitSend) > + desc = p->generateUntypedWriteSendsMessageDesc(0, elemNum); > + else > + desc = p->generateUntypedWriteMessageDesc(0, elemNum); > > unsigned jip0 = beforeMessage(insn, bti, tmp, btiTmp, desc); > > @@ -2181,7 +2188,7 @@ namespace gbe > p->push(); > p->curr.predicate = GEN_PREDICATE_NORMAL; > p->curr.useFlag(insn.state.flag, insn.state.subFlag); > - p->UNTYPED_WRITE(src, GenRegister::addr1(0), elemNum); > + p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum); > p->pop(); > afterMessage(insn, bti, tmp, btiTmp, jip0); > } > @@ -2881,14 +2888,14 @@ namespace gbe > // Write it out. > p->curr.execWidth = 8; > p->curr.noMask = 1; > - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); > p->ADD(addr, addr, GenRegister::immud(32)); > > // time stamps > for (int i = 0; i < 3; i++) { > p->curr.execWidth = 8; > p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD)); > - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); > p->ADD(addr, addr, GenRegister::immud(32)); > } > } p->pop(); > @@ -3294,7 +3301,7 @@ namespace gbe > p->curr.execWidth = 8; > p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); > p->ADD(msgAddr, msgAddr, msgSlmOff); > - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 2); > + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2); > } > else > { > @@ -3302,7 +3309,7 @@ namespace gbe > p->MOV(msgData, threadData); > p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); > p->ADD(msgAddr, msgAddr, msgSlmOff); > - p->UNTYPED_WRITE(msg, GenRegister::immw(0xFE), 1); > + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1); > } > > /* init partialData register, it will hold the final result */ > @@ -3460,11 +3467,11 @@ namespace gbe > void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister& > data, > GenRegister& src, uint32_t bti) > { > p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half()); > - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > > p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.top_half(this- > >simdWidth)); > - p->UNTYPED_WRITE(addr, GenRegister::immud(bti), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > } > > @@ -3492,15 +3499,15 @@ namespace gbe > p->ATOMIC(addr, GEN_ATOMIC_OP_ADD, addr, > GenRegister::immud(insn.extra.printfBTI), 2); > /* Write out the header. */ > p->MOV(data, GenRegister::immud(0xAABBCCDD)); > - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), > 1); > > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > p->MOV(data, GenRegister::immud(insn.extra.printfSize + 12)); > - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), > 1); > > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > p->MOV(data, GenRegister::immud(insn.extra.printfNum)); > - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1); > + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), > 1); > > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > } > @@ -3510,11 +3517,11 @@ namespace gbe > src = ra->genReg(insn.src(i)); > if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D || src.type == > GEN_TYPE_F) { > p->MOV(GenRegister::retype(data, src.type), src); > - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1); > + p->UNTYPED_WRITE(addr, addr, > GenRegister::immud(insn.extra.printfBTI), > 1); > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > } else if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_UB ) { > p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src); > - p->UNTYPED_WRITE(addr, GenRegister::immud(insn.extra.printfBTI), 1); > + p->UNTYPED_WRITE(addr, addr, > GenRegister::immud(insn.extra.printfBTI), > 1); > p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); > } else if (src.type == GEN_TYPE_L || src.type == GEN_TYPE_UL ) { > emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI); > diff --git a/backend/src/backend/gen_encoder.cpp > b/backend/src/backend/gen_encoder.cpp > index a69adc7..dc6dc63 100644 > --- a/backend/src/backend/gen_encoder.cpp > +++ b/backend/src/backend/gen_encoder.cpp > @@ -392,6 +392,11 @@ namespace gbe > return setUntypedWriteMessageDesc(&insn, bti, elemNum); > } > > + unsigned GenEncoder::generateUntypedWriteSendsMessageDesc(unsigned > bti, unsigned elemNum) { > + GenNativeInstruction insn; > + memset(&insn, 0, sizeof(GenNativeInstruction)); > + return setUntypedWriteSendsMessageDesc(&insn, bti, elemNum); > + } > unsigned GenEncoder::setUntypedWriteMessageDesc(GenNativeInstruction > *insn, unsigned bti, unsigned elemNum) { > uint32_t msg_length = 0; > uint32_t response_length = 0; > @@ -411,6 +416,11 @@ namespace gbe > return insn->bits3.ud; > } > > + unsigned > GenEncoder::setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum) > + { > + assert(0); > + return 0; > + } > void GenEncoder::UNTYPED_READA64(GenRegister dst, GenRegister src, > uint32_t elemNum) { > assert(0); > } > @@ -423,7 +433,7 @@ namespace gbe > assert(0); > } > > - void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister bti, > uint32_t elemNum) { > + void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, > GenRegister bti, uint32_t elemNum) { > GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); > assert(elemNum >= 1 || elemNum <= 4); > this->setHeader(insn); > diff --git a/backend/src/backend/gen_encoder.hpp > b/backend/src/backend/gen_encoder.hpp > index 00d3eaa..e6f362b 100644 > --- a/backend/src/backend/gen_encoder.hpp > +++ b/backend/src/backend/gen_encoder.hpp > @@ -177,7 +177,7 @@ namespace gbe > /*! Untyped read (upto 4 channels) */ > virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister > bti, uint32_t elemNum); > /*! Untyped write (upto 4 channels) */ > - virtual void UNTYPED_WRITE(GenRegister src, GenRegister bti, uint32_t > elemNum); > + virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, > GenRegister bti, uint32_t elemNum); > /*! Untyped read A64(upto 4 channels) */ > virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t > elemNum); > /*! Untyped write (upto 4 channels) */ > @@ -260,12 +260,14 @@ namespace gbe > virtual unsigned setAtomicA64MessageDesc(GenNativeInstruction *insn, > unsigned function, unsigned bti, unsigned srcNum, int type_long); > virtual unsigned setUntypedReadMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum); > virtual unsigned setUntypedWriteMessageDesc(GenNativeInstruction *insn, > unsigned bti, unsigned elemNum); > + virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction > *insn, unsigned bti, unsigned elemNum); > unsigned setByteGatherMessageDesc(GenNativeInstruction *insn, unsigned > bti, unsigned elemSize); > unsigned setByteScatterMessageDesc(GenNativeInstruction *insn, unsigned > bti, unsigned elemSize); > > unsigned generateAtomicMessageDesc(unsigned function, unsigned bti, > unsigned srcNum); > unsigned generateUntypedReadMessageDesc(unsigned bti, unsigned > elemNum); > unsigned generateUntypedWriteMessageDesc(unsigned bti, unsigned > elemNum); > + unsigned generateUntypedWriteSendsMessageDesc(unsigned bti, unsigned > elemNum); > unsigned generateByteGatherMessageDesc(unsigned bti, unsigned elemSize); > unsigned generateByteScatterMessageDesc(unsigned bti, unsigned elemSize); > > diff --git a/backend/src/backend/gen_insn_selection.cpp > b/backend/src/backend/gen_insn_selection.cpp > index c14e0bc..deebafa 100644 > --- a/backend/src/backend/gen_insn_selection.cpp > +++ b/backend/src/backend/gen_insn_selection.cpp > @@ -373,7 +373,9 @@ namespace gbe > /*! spill a register (insert spill/unspill instructions) */ > INLINE bool spillRegs(const SpilledRegs &spilledRegs, uint32_t > registerPool); > bool has32X32Mul() const { return bHas32X32Mul; } > + bool hasSends() const { return bHasSends; } > void setHas32X32Mul(bool b) { bHas32X32Mul = b; } > + void setHasSends(bool b) { bHasSends = b; } > bool hasLongType() const { return bHasLongType; } > bool hasDoubleType() const { return bHasDoubleType; } > bool hasHalfType() const { return bHasHalfType; } > @@ -822,6 +824,7 @@ namespace gbe > bool bHasDoubleType; > bool bHasHalfType; > bool bLongRegRestrict; > + bool bHasSends; > uint32_t ldMsgOrder; > bool slowByteGather; > INLINE ir::LabelIndex newAuxLabel() > @@ -864,7 +867,7 @@ namespace gbe > maxInsnNum(ctx.getFunction().getLargestBlockSize()), dagPool(maxInsnNum), > stateNum(0), vectorNum(0), bwdCodeGeneration(false), > storeThreadMap(false), > currAuxLabel(ctx.getFunction().labelNum()), bHas32X32Mul(false), > bHasLongType(false), > - bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false), > + bHasDoubleType(false), bHasHalfType(false), bLongRegRestrict(false), > bHasSends(false), > ldMsgOrder(LD_MSG_ORDER_IVB), slowByteGather(false) > { > const ir::Function &fn = ctx.getFunction(); > @@ -1665,7 +1668,6 @@ namespace gbe > unsigned dstNum = temps.size(); > unsigned srcNum = elemNum + 2 + temps.size(); > SelectionInstruction *insn = this->appendInsn(SEL_OP_UNTYPED_WRITE, > dstNum, srcNum); > - SelectionVector *vector = this->appendVector(); > > if (bti.file != GEN_IMMEDIATE_VALUE) { > insn->state.flag = 0; > @@ -1685,11 +1687,26 @@ namespace gbe > } > insn->extra.elem = elemNum; > > + if (hasSends()) { > + insn->extra.splitSend = 1; > + SelectionVector *vector = this->appendVector(); > + vector->regNum = elemNum; > + vector->reg = &insn->src(1); > + vector->offsetID = 1; > + vector->isSrc = 1; > + vector = this->appendVector(); > + vector->regNum = 1; > + vector->reg = &insn->src(0); > + vector->offsetID = 0; > + vector->isSrc = 1; > + } else { > // Sends require contiguous allocation for the sources > + SelectionVector *vector = this->appendVector(); > vector->regNum = elemNum+1; > vector->reg = &insn->src(0); > vector->offsetID = 0; > vector->isSrc = 1; > + } > } > > void Selection::Opaque::UNTYPED_WRITEA64(const GenRegister *src, > @@ -2722,6 +2739,7 @@ extern bool OCL_DEBUGINFO; // first defined by > calling BVAR in program.cpp > this->opaque->setLdMsgOrder(LD_MSG_ORDER_SKL); > this->opaque->setSlowByteGather(false); > this->opaque->setHasHalfType(true); > + this->opaque->setHasSends(true); > opt_features = SIOF_LOGICAL_SRCMOD; > } > > diff --git a/backend/src/backend/gen_insn_selection.hpp > b/backend/src/backend/gen_insn_selection.hpp > index 14ac05f..7ce2b94 100644 > --- a/backend/src/backend/gen_insn_selection.hpp > +++ b/backend/src/backend/gen_insn_selection.hpp > @@ -104,6 +104,7 @@ namespace gbe > uint16_t function:8; > /*! elemSize for byte scatters / gathers, elemNum for untyped msg, > operand number for atomic */ > uint16_t elem:8; > + uint16_t splitSend:1; > }; > struct { > /*! Number of sources in the tuple */ > -- > 1.9.1 > > _______________________________________________ > Beignet mailing list > Beignet@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/beignet _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet