it makes possible to switch send and sends within the encoder function.

v2: use GBE_ASSERT etc.
Signed-off-by: Guo, Yejun <yejun....@intel.com>
---
 backend/src/backend/gen8_context.cpp | 14 ++++++-------
 backend/src/backend/gen8_encoder.cpp |  2 +-
 backend/src/backend/gen8_encoder.hpp |  2 +-
 backend/src/backend/gen9_encoder.cpp | 22 ++++++++++++---------
 backend/src/backend/gen9_encoder.hpp |  4 ++--
 backend/src/backend/gen_context.cpp  | 38 ++++++++++++++++--------------------
 backend/src/backend/gen_encoder.cpp  |  4 ++--
 backend/src/backend/gen_encoder.hpp  |  4 ++--
 8 files changed, 44 insertions(+), 46 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index 95b1013..a3045ce 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -969,8 +969,6 @@ namespace gbe
     const GenRegister addr = ra->genReg(insn.src(elemNum));
     const GenRegister bti = ra->genReg(insn.src(elemNum*2+1));
     GenRegister data = ra->genReg(insn.src(elemNum+1));
-    if (!insn.extra.splitSend)
-      data = addr;
 
     /* Because BDW's store and load send instructions for 64 bits require the 
bti to be surfaceless,
        which we can not accept. We just fallback to 2 DW untypewrite here. */
@@ -981,7 +979,7 @@ namespace gbe
     }
 
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->UNTYPED_WRITE(addr, data, bti, elemNum*2);
+      p->UNTYPED_WRITE(addr, data, bti, elemNum*2, insn.extra.splitSend);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(elemNum));
       const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
@@ -997,7 +995,7 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2);
+        p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2, 
insn.extra.splitSend);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
@@ -1358,7 +1356,7 @@ namespace gbe
       nextDst = GenRegister::Qn(tempDst, 1);
       p->MOV(nextDst, nextSrc);
     p->pop();
-    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
 
     p->push();
@@ -1374,7 +1372,7 @@ namespace gbe
       nextDst = GenRegister::Qn(tempDst, 1);
       p->MOV(nextDst, nextSrc);
     p->pop();
-    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
   }
 
@@ -1801,7 +1799,7 @@ namespace gbe
       p->curr.execWidth = 8;
       p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
     }
     else
     {
@@ -1809,7 +1807,7 @@ namespace gbe
       p->MOV(msgData, threadData);
       p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false);
     }
 
     /* init partialData register, it will hold the final result */
diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 8f73346..2928943 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -268,7 +268,7 @@ namespace gbe
     return insn->bits3.ud;
   }
 
-  void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum) {
+  void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum, bool useSends) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     this->setHeader(insn);
diff --git a/backend/src/backend/gen8_encoder.hpp 
b/backend/src/backend/gen8_encoder.hpp
index f6a91a0..4afec0c 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -47,7 +47,7 @@ namespace gbe
     virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
     virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister bti, uint32_t srcNum);
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
-    virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum, bool useSends);
     virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t 
elemNum);
     virtual void UNTYPED_WRITEA64(GenRegister src, uint32_t elemNum);
     virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t 
elemSize);
diff --git a/backend/src/backend/gen9_encoder.cpp 
b/backend/src/backend/gen9_encoder.cpp
index b5be852..47175f6 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -84,7 +84,7 @@ namespace gbe
     else if (dst.file == GEN_GENERAL_REGISTER_FILE)
       gen9_insn->bits1.sends.dest_reg_file_0 = 1;
     else
-      assert(!"should not reach here");
+      NOT_SUPPORTED;
 
     gen9_insn->bits1.sends.src1_reg_file_0 = 1;
     gen9_insn->bits1.sends.src1_reg_nr = src1.nr;
@@ -116,11 +116,13 @@ namespace gbe
     return insn->bits3.ud;
   }
 
-  void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, 
GenRegister bti, uint32_t elemNum)
+  void Gen9Encoder::UNTYPED_WRITE(GenRegister addr, GenRegister data, 
GenRegister bti, uint32_t elemNum, bool useSends)
   {
-    if (addr.reg() == data.reg())
-      Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum);
+    if (!useSends)
+      Gen8Encoder::UNTYPED_WRITE(addr, data, bti, elemNum, false);
     else {
+      GBE_ASSERT(addr.reg() != data.reg());
+
       GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
       Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
       assert(elemNum >= 1 || elemNum <= 4);
@@ -134,7 +136,7 @@ namespace gbe
       else if (this->curr.execWidth == 16)
         gen9_insn->bits2.sends.src1_length = 2 * elemNum;
       else
-        assert(!"unsupported");
+        NOT_SUPPORTED;
 
       if (bti.file == GEN_IMMEDIATE_VALUE) {
         gen9_insn->bits2.sends.sel_reg32_desc = 0;
@@ -164,11 +166,13 @@ namespace gbe
     return insn->bits3.ud;
   }
 
-  void Gen9Encoder::BYTE_SCATTER(GenRegister addr, GenRegister data, 
GenRegister bti, uint32_t elemSize)
+  void Gen9Encoder::BYTE_SCATTER(GenRegister addr, GenRegister data, 
GenRegister bti, uint32_t elemSize, bool useSends)
   {
-    if (addr.reg() == data.reg())
-      Gen8Encoder::BYTE_SCATTER(addr, data, bti, elemSize);
+    if (!useSends)
+      Gen8Encoder::BYTE_SCATTER(addr, data, bti, elemSize, false);
     else {
+      GBE_ASSERT(addr.reg() != data.reg());
+
       GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
       Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
 
@@ -181,7 +185,7 @@ namespace gbe
       else if (this->curr.execWidth == 16)
         gen9_insn->bits2.sends.src1_length = 2;
       else
-        assert(!"unsupported");
+        NOT_SUPPORTED;
 
       if (bti.file == GEN_IMMEDIATE_VALUE) {
         gen9_insn->bits2.sends.sel_reg32_desc = 0;
diff --git a/backend/src/backend/gen9_encoder.hpp 
b/backend/src/backend/gen9_encoder.hpp
index 1c40b92..4eb36e4 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -48,9 +48,9 @@ namespace gbe
                 bool isLD,
                 bool isUniform);
     void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, 
GenRegister src0, GenRegister src1);
-    virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum, bool useSends);
     virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemNum);
-    virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize);
+    virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize, bool useSends);
     virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize);
   };
 }
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 8288fa5..4341677 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2146,7 +2146,7 @@ namespace gbe
     const GenRegister bti = ra->genReg(insn.src(elemNum+1));
 
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->UNTYPED_WRITE(src, src, bti, elemNum*2);
+      p->UNTYPED_WRITE(src, src, bti, elemNum*2, false);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
       const GenRegister btiTmp = ra->genReg(insn.dst(1));
@@ -2158,7 +2158,7 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2);
+        p->UNTYPED_WRITE(src, src, GenRegister::addr1(0), elemNum*2, false);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
@@ -2167,12 +2167,10 @@ namespace gbe
   void GenContext::emitUntypedWriteInstruction(const SelectionInstruction 
&insn) {
     const GenRegister addr = ra->genReg(insn.src(0));
     GenRegister data = ra->genReg(insn.src(1));
-    if (!insn.extra.splitSend)
-      data = addr;
     const uint32_t elemNum = insn.extra.elem;
     const GenRegister bti = ra->genReg(insn.src(elemNum+1));
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->UNTYPED_WRITE(addr, data, bti, elemNum);
+      p->UNTYPED_WRITE(addr, data, bti, elemNum, insn.extra.splitSend);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
       const GenRegister btiTmp = ra->genReg(insn.dst(1));
@@ -2188,7 +2186,7 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum);
+        p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum, 
insn.extra.splitSend);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
@@ -2222,13 +2220,11 @@ namespace gbe
   void GenContext::emitByteScatterInstruction(const SelectionInstruction 
&insn) {
     const GenRegister addr = ra->genReg(insn.src(0));
     GenRegister data = ra->genReg(insn.src(1));
-    if (!insn.extra.splitSend)
-      data = addr;
     const uint32_t elemSize = insn.extra.elem;
     const GenRegister bti = ra->genReg(insn.src(2));
 
     if (bti.file == GEN_IMMEDIATE_VALUE) {
-      p->BYTE_SCATTER(addr, data, bti, elemSize);
+      p->BYTE_SCATTER(addr, data, bti, elemSize, insn.extra.splitSend);
     } else {
       const GenRegister tmp = ra->genReg(insn.dst(0));
       const GenRegister btiTmp = ra->genReg(insn.dst(1));
@@ -2244,7 +2240,7 @@ namespace gbe
       p->push();
         p->curr.predicate = GEN_PREDICATE_NORMAL;
         p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-        p->BYTE_SCATTER(addr, data, GenRegister::addr1(0), elemSize);
+        p->BYTE_SCATTER(addr, data, GenRegister::addr1(0), elemSize, 
insn.extra.splitSend);
       p->pop();
       afterMessage(insn, bti, tmp, btiTmp, jip0);
     }
@@ -2895,14 +2891,14 @@ namespace gbe
       // Write it out.
       p->curr.execWidth = 8;
       p->curr.noMask = 1;
-      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
       p->ADD(addr, addr, GenRegister::immud(32));
 
       // time stamps
       for (int i = 0; i < 3; i++) {
         p->curr.execWidth = 8;
         p->MOV(data, GenRegister::retype(profilingReg[i], GEN_TYPE_UD));
-        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
         p->ADD(addr, addr, GenRegister::immud(32));
       }
     } p->pop();
@@ -3308,7 +3304,7 @@ namespace gbe
       p->curr.execWidth = 8;
       p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
     }
     else
     {
@@ -3316,7 +3312,7 @@ namespace gbe
       p->MOV(msgData, threadData);
       p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
       p->ADD(msgAddr, msgAddr, msgSlmOff);
-      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
+      p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false);
     }
 
     /* init partialData register, it will hold the final result */
@@ -3474,11 +3470,11 @@ namespace gbe
   void GenContext::emitPrintfLongInstruction(GenRegister& addr, GenRegister& 
data,
                                              GenRegister& src, uint32_t bti) {
     p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src.bottom_half());
-    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
 
     p->MOV(GenRegister::retype(data, GEN_TYPE_UD), 
src.top_half(this->simdWidth));
-    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+    p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
     p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
   }
 
@@ -3503,15 +3499,15 @@ namespace gbe
       p->ATOMIC(addr, GEN_ATOMIC_OP_ADD, addr, 
GenRegister::immud(insn.extra.printfBTI), 2);
       /* Write out the header. */
       p->MOV(data, GenRegister::immud(0xAABBCCDD));
-      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1, false);
 
       p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       p->MOV(data, GenRegister::immud(insn.extra.printfSize + 12));
-      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1, false);
 
       p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       p->MOV(data, GenRegister::immud(insn.extra.printfNum));
-      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1);
+      p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1, false);
 
       p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
     }
@@ -3521,11 +3517,11 @@ namespace gbe
       src = ra->genReg(insn.src(i));
       if (src.type == GEN_TYPE_UD || src.type == GEN_TYPE_D || src.type == 
GEN_TYPE_F) {
         p->MOV(GenRegister::retype(data, src.type), src);
-        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1);
+        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1, false);
         p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       } else if (src.type == GEN_TYPE_B || src.type == GEN_TYPE_UB ) {
         p->MOV(GenRegister::retype(data, GEN_TYPE_UD), src);
-        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1);
+        p->UNTYPED_WRITE(addr, addr, GenRegister::immud(insn.extra.printfBTI), 
1, false);
         p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
       } else if (src.type == GEN_TYPE_L || src.type == GEN_TYPE_UL ) {
         emitPrintfLongInstruction(addr, data, src, insn.extra.printfBTI);
diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index 49d93e8..9d23df3 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -433,7 +433,7 @@ namespace gbe
     assert(0);
   }
 
-  void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum) {
+  void GenEncoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum, bool useSends) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
     assert(elemNum >= 1 || elemNum <= 4);
     this->setHeader(insn);
@@ -534,7 +534,7 @@ namespace gbe
     return insn->bits3.ud;
   }
 
-  void GenEncoder::BYTE_SCATTER(GenRegister msg, GenRegister data, GenRegister 
bti, uint32_t elemSize) {
+  void GenEncoder::BYTE_SCATTER(GenRegister msg, GenRegister data, GenRegister 
bti, uint32_t elemSize, bool useSends) {
     GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 
     this->setHeader(insn);
diff --git a/backend/src/backend/gen_encoder.hpp 
b/backend/src/backend/gen_encoder.hpp
index e5eb2e2..31b6e92 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -177,7 +177,7 @@ namespace gbe
     /*! Untyped read (upto 4 channels) */
     virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
     /*! Untyped write (upto 4 channels) */
-    virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum);
+    virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum, bool useSends);
     /*! Untyped read A64(upto 4 channels) */
     virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t 
elemNum);
     /*! Untyped write (upto 4 channels) */
@@ -185,7 +185,7 @@ namespace gbe
     /*! Byte gather (for unaligned bytes, shorts and ints) */
     void BYTE_GATHER(GenRegister dst, GenRegister src, GenRegister bti, 
uint32_t elemSize);
     /*! Byte scatter (for unaligned bytes, shorts and ints) */
-    virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize);
+    virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize, bool useSends);
     /*! Byte gather a64 (for unaligned bytes, shorts and ints) */
     virtual void BYTE_GATHERA64(GenRegister dst, GenRegister src, uint32_t 
elemSize);
     /*! Byte scatter a64 (for unaligned bytes, shorts and ints) */
-- 
1.9.1

_______________________________________________
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet

Reply via email to