it makes possible to switch send and sends within the encoder function. v2: use GBE_ASSERT etc. Signed-off-by: Guo, Yejun <yejun....@intel.com> --- backend/src/backend/gen8_context.cpp | 14 ++++++------- backend/src/backend/gen8_encoder.cpp | 2 +- backend/src/backend/gen8_encoder.hpp | 2 +- backend/src/backend/gen9_encoder.cpp | 22 ++++++++++++--------- backend/src/backend/gen9_encoder.hpp | 4 ++-- backend/src/backend/gen_context.cpp | 38 ++++++++++++++++-------------------- backend/src/backend/gen_encoder.cpp | 4 ++-- backend/src/backend/gen_encoder.hpp | 4 ++-- 8 files changed, 44 insertions(+), 46 deletions(-)
diff --git a/backend/src/backend/gen8_context.cpp b/backend/src/backend/gen8_context.cpp index 95b1013..a3045ce 100644 --- a/backend/src/backend/gen8_context.cpp +++ b/backend/src/backend/gen8_context.cpp @@ -969,8 +969,6 @@ namespace gbe const GenRegister addr = ra->genReg(insn.src(elemNum)); const GenRegister bti = ra->genReg(insn.src(elemNum*2+1)); GenRegister data = ra->genReg(insn.src(elemNum+1)); - if (!insn.extra.splitSend) - data = addr; /* Because BDW's store and load send instructions for 64 bits require the bti to be surfaceless, which we can not accept. We just fallback to 2 DW untypewrite here. */ @@ -981,7 +979,7 @@ namespace gbe } if (bti.file == GEN_IMMEDIATE_VALUE) { - p->UNTYPED_WRITE(addr, data, bti, elemNum*2); + p->UNTYPED_WRITE(addr, data, bti, elemNum*2, insn.extra.splitSend); } else { const GenRegister tmp = ra->genReg(insn.dst(elemNum)); const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1)); @@ -997,7 +995,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(insn.state.flag, insn.state.subFlag); - p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2); + p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2, insn.extra.splitSend); p->pop(); afterMessage(insn, bti, tmp, btiTmp, jip0); } @@ -1358,7 +1356,7 @@ namespace gbe nextDst = GenRegister::Qn(tempDst, 1); p->MOV(nextDst, nextSrc); p->pop(); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); p->push(); @@ -1374,7 +1372,7 @@ namespace gbe nextDst = GenRegister::Qn(tempDst, 1); p->MOV(nextDst, nextSrc); p->pop(); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); } @@ -1801,7 +1799,7 @@ namespace gbe p->curr.execWidth = 8; p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2); + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false); } else { @@ -1809,7 +1807,7 @@ namespace gbe p->MOV(msgData, threadData); p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1); + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false); } /* init partialData register, it will hold the final result */ diff --git a/backend/src/backend/gen8_encoder.cpp b/backend/src/backend/gen8_encoder.cpp index 8f73346..2928943 100644 --- a/backend/src/backend/gen8_encoder.cpp +++ b/backend/src/backend/gen8_encoder.cpp @@ -268,7 +268,7 @@ namespace gbe return insn->bits3.ud; } - void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) { + void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); assert(elemNum >= 1 || elemNum <= 4); this->setHeader(insn); diff --git a/backend/src/backend/gen8_encoder.hpp b/backend/src/backend/gen8_encoder.hpp index f6a91a0..4afec0c 100644 --- a/backend/src/backend/gen8_encoder.hpp +++ b/backend/src/backend/gen8_encoder.hpp @@ -47,7 +47,7 @@ namespace gbe virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister src, GenRegister bti, uint32_t srcNum); virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); - virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends); virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum); virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum); virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize); diff --git a/backend/src/backend/gen9_encoder.cpp b/backend/src/backend/gen9_encoder.cpp index b5be852..47175f6 100644 --- a/backend/src/backend/gen9_encoder.cpp +++ b/backend/src/backend/gen9_encoder.cpp @@ -84,7 +84,7 @@ namespace gbe else if (dst.file == GEN_GENERAL_REGISTER_FILE) gen9_insn->bits1.sends.dest_reg_file_0 = 1; else - assert(!"should not reach here"); + NOT_SUPPORTED; gen9_insn->bits1.sends.src1_reg_file_0 = 1; gen9_insn->bits1.sends.src1_reg_nr = src1.nr; @@ -116,11 +116,13 @@ namespace gbe return insn->bits3.ud; } - void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum) + void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends) { - if (addr.reg() == data.reg()) - Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum); + if (!useSends) + Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum, false); else { + GBE_ASSERT(addr.reg() != data.reg()); + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS); Gen9NativeInstruction *gen9_insn = &insn->gen9_insn; assert(elemNum >= 1 || elemNum <= 4); @@ -134,7 +136,7 @@ namespace gbe else if (this->curr.execWidth == 16) gen9_insn->bits2.sends.src1_length = 2 * elemNum; else - assert(!"unsupported"); + NOT_SUPPORTED; if (bti.file == GEN_IMMEDIATE_VALUE) { gen9_insn->bits2.sends.sel_reg32_desc = 0; @@ -164,11 +166,13 @@ namespace gbe return insn->bits3.ud; } - void Gen9Encoder::BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize) + void Gen9Encoder::BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize, bool useSends) { - if (addr.reg() == data.reg()) - Gen8Encoder::BYTE_SCATTER(addr, data, bti, elemSize); + if (!useSends) + Gen8Encoder::BYTE_SCATTER(addr, data, bti, elemSize, false); else { + GBE_ASSERT(addr.reg() != data.reg()); + GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS); Gen9NativeInstruction *gen9_insn = &insn->gen9_insn; @@ -181,7 +185,7 @@ namespace gbe else if (this->curr.execWidth == 16) gen9_insn->bits2.sends.src1_length = 2; else - assert(!"unsupported"); + NOT_SUPPORTED; if (bti.file == GEN_IMMEDIATE_VALUE) { gen9_insn->bits2.sends.sel_reg32_desc = 0; diff --git a/backend/src/backend/gen9_encoder.hpp b/backend/src/backend/gen9_encoder.hpp index 1c40b92..4eb36e4 100644 --- a/backend/src/backend/gen9_encoder.hpp +++ b/backend/src/backend/gen9_encoder.hpp @@ -48,9 +48,9 @@ namespace gbe bool isLD, bool isUniform); void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, GenRegister src0, GenRegister src1); - virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends); virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemNum); - virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize); + virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize, bool useSends); virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction *insn, unsigned bti, unsigned elemSize); }; } diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp index 8288fa5..4341677 100644 --- a/backend/src/backend/gen_context.cpp +++ b/backend/src/backend/gen_context.cpp @@ -2146,7 +2146,7 @@ namespace gbe const GenRegister bti = ra->genReg(insn.src(elemNum+1)); if (bti.file == GEN_IMMEDIATE_VALUE) { - p->UNTYPED_WRITE(src, src, bti, elemNum*2); + p->UNTYPED_WRITE(src, src, bti, elemNum*2, false); } else { const GenRegister tmp = ra->genReg(insn.dst(0)); const GenRegister btiTmp = ra->genReg(insn.dst(1)); @@ -2158,7 +2158,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(insn.state.flag, insn.state.subFlag); - p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2); + p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2, false); p->pop(); afterMessage(insn, bti, tmp, btiTmp, jip0); } @@ -2167,12 +2167,10 @@ namespace gbe void GenContext::emitUntypedWriteInstruction(const SelectionInstruction &insn) { const GenRegister addr = ra->genReg(insn.src(0)); GenRegister data = ra->genReg(insn.src(1)); - if (!insn.extra.splitSend) - data = addr; const uint32_t elemNum = insn.extra.elem; const GenRegister bti = ra->genReg(insn.src(elemNum+1)); if (bti.file == GEN_IMMEDIATE_VALUE) { - p->UNTYPED_WRITE(addr, data, bti, elemNum); + p->UNTYPED_WRITE(addr, data, bti, elemNum, insn.extra.splitSend); } else { const GenRegister tmp = ra->genReg(insn.dst(0)); const GenRegister btiTmp = ra->genReg(insn.dst(1)); @@ -2188,7 +2186,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(insn.state.flag, insn.state.subFlag); - p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum); + p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum, insn.extra.splitSend); p->pop(); afterMessage(insn, bti, tmp, btiTmp, jip0); } @@ -2222,13 +2220,11 @@ namespace gbe void GenContext::emitByteScatterInstruction(const SelectionInstruction &insn) { const GenRegister addr = ra->genReg(insn.src(0)); GenRegister data = ra->genReg(insn.src(1)); - if (!insn.extra.splitSend) - data = addr; const uint32_t elemSize = insn.extra.elem; const GenRegister bti = ra->genReg(insn.src(2)); if (bti.file == GEN_IMMEDIATE_VALUE) { - p->BYTE_SCATTER(addr, data, bti, elemSize); + p->BYTE_SCATTER(addr, data, bti, elemSize, insn.extra.splitSend); } else { const GenRegister tmp = ra->genReg(insn.dst(0)); const GenRegister btiTmp = ra->genReg(insn.dst(1)); @@ -2244,7 +2240,7 @@ namespace gbe p->push(); p->curr.predicate = GEN_PREDICATE_NORMAL; p->curr.useFlag(insn.state.flag, insn.state.subFlag); - p->BYTE_SCATTER(addr, data, GenRegister::addr1(0), elemSize); + p->BYTE_SCATTER(addr, data, GenRegister::addr1(0), elemSize, insn.extra.splitSend); p->pop(); afterMessage(insn, bti, tmp, btiTmp, jip0); } @@ -2895,14 +2891,14 @@ namespace gbe // Write it out. p->curr.execWidth = 8; p->curr.noMask = 1; - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(32)); // time stamps for (int i = 0; i < 3; i++) { p->curr.execWidth = 8; p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD)); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(32)); } } p->pop(); @@ -3308,7 +3304,7 @@ namespace gbe p->curr.execWidth = 8; p->MUL(msgAddr, threadId, GenRegister::immd(0x8)); p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2); + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false); } else { @@ -3316,7 +3312,7 @@ namespace gbe p->MOV(msgData, threadData); p->MUL(msgAddr, threadId, GenRegister::immd(0x4)); p->ADD(msgAddr, msgAddr, msgSlmOff); - p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1); + p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false); } /* init partialData register, it will hold the final result */ @@ -3474,11 +3470,11 @@ namespace gbe void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister& data, GenRegister& src, uint32_t bti) { p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half()); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.top_half(this->simdWidth)); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); } @@ -3503,15 +3499,15 @@ namespace gbe p->ATOMIC(addr, GEN_ATOMIC_OP_ADD, addr, GenRegister::immud(insn.extra.printfBTI), 2); /* Write out the header. */ p->MOV(data, GenRegister::immud(0xAABBCCDD)); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); p->MOV(data, GenRegister::immud(insn.extra.printfSize + 12)); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); p->MOV(data, GenRegister::immud(insn.extra.printfNum)); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); } @@ -3521,11 +3517,11 @@ namespace gbe src = ra->genReg(insn.src(i)); if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D || src.type == GEN_TYPE_F) { p->MOV(GenRegister::retype(data, src.type), src); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); } else if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_UB ) { p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src); - p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1); + p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 1, false); p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t))); } else if (src.type == GEN_TYPE_L || src.type == GEN_TYPE_UL ) { emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI); diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp index 49d93e8..9d23df3 100644 --- a/backend/src/backend/gen_encoder.cpp +++ b/backend/src/backend/gen_encoder.cpp @@ -433,7 +433,7 @@ namespace gbe assert(0); } - void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum) { + void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); assert(elemNum >= 1 || elemNum <= 4); this->setHeader(insn); @@ -534,7 +534,7 @@ namespace gbe return insn->bits3.ud; } - void GenEncoder::BYTE_SCATTER(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemSize) { + void GenEncoder::BYTE_SCATTER(GenRegister msg, GenRegister data, GenRegister bti, uint32_t elemSize, bool useSends) { GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND); this->setHeader(insn); diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp index e5eb2e2..31b6e92 100644 --- a/backend/src/backend/gen_encoder.hpp +++ b/backend/src/backend/gen_encoder.hpp @@ -177,7 +177,7 @@ namespace gbe /*! Untyped read (upto 4 channels) */ virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemNum); /*! Untyped write (upto 4 channels) */ - virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum); + virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemNum, bool useSends); /*! Untyped read A64(upto 4 channels) */ virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t elemNum); /*! Untyped write (upto 4 channels) */ @@ -185,7 +185,7 @@ namespace gbe /*! Byte gather (for unaligned bytes, shorts and ints) */ void BYTE_GATHER(GenRegister dst, GenRegister src, GenRegister bti, uint32_t elemSize); /*! Byte scatter (for unaligned bytes, shorts and ints) */ - virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize); + virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister bti, uint32_t elemSize, bool useSends); /*! Byte gather a64 (for unaligned bytes, shorts and ints) */ virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t elemSize); /*! Byte scatter a64 (for unaligned bytes, shorts and ints) */ -- 1.9.1 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet