[Beignet] [PATCH 5/5] enable sends for typed write

2016-12-09 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen9_encoder.cpp   | 22 +
 backend/src/backend/gen9_encoder.hpp   |  1 +
 backend/src/backend/gen_context.cpp|  3 ++-
 backend/src/backend/gen_encoder.cpp|  2 +-
 backend/src/backend/gen_encoder.hpp|  4 +++-
 backend/src/backend/gen_insn_selection.cpp | 31 --
 backend/src/backend/gen_insn_selection.hpp |  1 +
 7 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/backend/src/backend/gen9_encoder.cpp 
b/backend/src/backend/gen9_encoder.cpp
index b42c833..cfbf985 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -146,6 +146,28 @@ namespace gbe
 }
   }
 
+  void Gen9Encoder::TYPED_WRITE(GenRegister header, GenRegister data, bool 
header_present, unsigned char bti, bool useSends)
+  {
+if (!useSends)
+  Gen8Encoder::TYPED_WRITE(header, data, header_present, bti, false);
+else {
+  assert(header.reg() != data.reg());
+
+  GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+  Gen9NativeInstruction *gen9_insn = >gen9_insn;
+  assert(header_present);
+
+  this->setHeader(insn);
+  insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
+
+  setSendsOperands(gen9_insn, GenRegister::null(), header, data);
+  gen9_insn->bits2.sends.src1_length = 4;   //src0_length: 
5(header+u+v+w+lod), src1_length: 4(data)
+
+  gen9_insn->bits2.sends.sel_reg32_desc = 0;
+  setTypedWriteMessage(insn, bti, GEN_TYPED_WRITE, 5, header_present);
+}
+  }
+
   unsigned Gen9Encoder::setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize)
   {
 uint32_t msg_length = 0;
diff --git a/backend/src/backend/gen9_encoder.hpp 
b/backend/src/backend/gen9_encoder.hpp
index 9b3af13..d754d59 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -49,6 +49,7 @@ namespace gbe
 bool isUniform);
 void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, 
GenRegister src0, GenRegister src1);
 virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum, bool useSends);
+virtual void TYPED_WRITE(GenRegister header, GenRegister data, bool 
header_present, unsigned char bti, bool useSends);
 virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemNum);
 virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize, bool useSends);
 virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index d161ebf..c8019e3 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2465,8 +2465,9 @@ namespace gbe
 
   void GenContext::emitTypedWriteInstruction(const SelectionInstruction ) 
{
 const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), 
GEN_TYPE_UD);
+GenRegister data = ra->genReg(insn.src(5));
 const uint32_t bti = insn.getbti();
-p->TYPED_WRITE(header, true, bti);
+p->TYPED_WRITE(header, data, true, bti, insn.extra.typedWriteSplitSend);
   }
 
   static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int 
subFlag, int dim, GenContext *gc)
diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index a9bdd3a..5dea48a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1257,7 +1257,7 @@ namespace gbe
   msg_type, vme_search_path_lut, lut_sub);
   }
 
-  void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned 
char bti)
+  void GenEncoder::TYPED_WRITE(GenRegister msg, GenRegister data, bool 
header_present, unsigned char bti, bool useSends)
   {
 GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 uint32_t msg_type = GEN_TYPED_WRITE;
diff --git a/backend/src/backend/gen_encoder.hpp 
b/backend/src/backend/gen_encoder.hpp
index b86e9e4..66aa9cb 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -234,8 +234,10 @@ namespace gbe
 
 /*! TypedWrite instruction for texture */
 virtual void TYPED_WRITE(GenRegister header,
+ GenRegister data,
  bool header_present,
- unsigned char bti);
+ unsigned char bti,
+ bool useSends);
 /*! Extended math function (2 sources) */
 void MATH(GenRegister dst, uint32_t function, GenRegister src0, 
GenRegister src1);
 /*! Extended math function (1 source) */
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 

[Beignet] [PATCH 4/5] refine code starting from header in typedwrite

2016-12-09 Thread Guo, Yejun
With this refine, the virtual reg and physical reg will be logically
1:1 mapping, and it helps the later instruction sends

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen_insn_selection.cpp | 145 -
 1 file changed, 78 insertions(+), 67 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 1ebbbe6..ec0897c 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6809,86 +6809,97 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   {
 INLINE bool emitOne(Selection::Opaque , const 
ir::TypedWriteInstruction , bool ) const
 {
-  using namespace ir;
-  const uint32_t simdWidth = sel.ctx.getSimdWidth();
-  GenRegister msgs[9]; // (header + U + V + R + LOD + 4)
-  const uint32_t msgNum = (8 / (simdWidth / 8)) + 1;
-  const uint32_t dim = insn.getSrcNum() - 4;
-
-  if (simdWidth == 16) {
-for(uint32_t i = 0; i < msgNum; i++)
-  msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-  } else {
-uint32_t valueID = 0;
-uint32_t msgID = 0;
-msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-for(; msgID < 1 + dim; msgID++, valueID++)
-  msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), 
insn.getCoordType());
-
-// fake v.
-if (dim < 2)
-  msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-// fake w.
-if (dim < 3)
-  msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-// LOD.
-msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-for(; valueID < insn.getSrcNum(); msgID++, valueID++)
-  msgs[msgID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
-  }
-
+  const GenRegister header = GenRegister::ud8grf(sel.reg(ir::FAMILY_REG));
   sel.push();
   sel.curr.predicate = GEN_PREDICATE_NONE;
   sel.curr.noMask = 1;
-  sel.MOV(msgs[0], GenRegister::immud(0));
+  sel.MOV(header, GenRegister::immud(0));
   sel.curr.execWidth = 1;
-
-  GenRegister channelEn = sel.getOffsetReg(msgs[0], 0, 7*4);
+  GenRegister channelEn = sel.getOffsetReg(header, 0, 7*4);
   // Enable all channels.
   sel.MOV(channelEn, GenRegister::immud(0x));
-  sel.curr.execWidth = 8;
-  // Set zero LOD.
-  if (simdWidth == 8)
-sel.MOV(msgs[4], GenRegister::immud(0));
-  else
-sel.MOV(GenRegister::Qn(msgs[2], 0), GenRegister::immud(0));
   sel.pop();
 
+  const uint32_t simdWidth = sel.ctx.getSimdWidth();
+  if (simdWidth == 16)
+emitWithSimd16(sel, insn, markChildren, header);
+  else if (simdWidth == 8)
+emitWithSimd8(sel, insn, markChildren, header);
+  else
+assert(!"not supported");
+  return true;
+}
+
+INLINE bool emitWithSimd16(Selection::Opaque , const 
ir::TypedWriteInstruction , bool , const GenRegister& header) 
const
+{
+  using namespace ir;
+
+  GenRegister msgs[9]; // (header + U + V + W + LOD + 4)
+  msgs[0] = header;
+  for (uint32_t i = 1; i < 9; ++i) {
+//SIMD16 will be split into two SIMD8,
+//each virtual reg in msgs requires one physical reg with 8 DWORDs (32 
bytes),
+//so, declare with FAMILY_WORD, and the allocated size will be 
sizeof(WORD)*SIMD16 = 32 bytes
+msgs[i] = sel.selReg(sel.reg(FAMILY_WORD), TYPE_U32);
+  }
+
+  const uint32_t dims = insn.getSrcNum() - 4;
   uint32_t bti = insn.getImageIndex();
-  if (simdWidth == 8)
-sel.TYPED_WRITE(msgs, msgNum, bti, dim == 3);
-  else {
-sel.push();
-sel.curr.execWidth = 8;
-for( uint32_t quarter = 0; quarter < 2; quarter++)
-{
-  #define QUARTER_MOV0(msgs, msgid, src) \
-sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], 
GEN_TYPE_UD), msgid % 2), \
-GenRegister::Qn(src, quarter))
-
-  #define QUARTER_MOV1(msgs, msgid, src) \
-  sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], 
src.type), msgid % 2), \
-  GenRegister::Qn(src, quarter))
-  sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : 
GEN_COMPRESSION_Q2;
-  // Set U,V,W
-  QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), 
insn.getCoordType()));
-  if (dim > 1)
-QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), 
insn.getCoordType()));
-  if (dim > 2)
-QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), 
insn.getCoordType()));
-  // Set R, G, B, A
-  QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(dim), 
insn.getSrcType()));
-  QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(dim + 1), 
insn.getSrcType()));
-  QUARTER_MOV1(msgs, 7, 

[Beignet] [PATCH 3/5] add sends for atomic operation, only for ocl 1.2

2016-12-09 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen75_encoder.cpp  |  2 +-
 backend/src/backend/gen75_encoder.hpp  |  2 +-
 backend/src/backend/gen8_encoder.cpp   |  2 +-
 backend/src/backend/gen8_encoder.hpp   |  2 +-
 backend/src/backend/gen9_encoder.cpp   | 28 
 backend/src/backend/gen9_encoder.hpp   |  1 +
 backend/src/backend/gen_context.cpp| 20 ++--
 backend/src/backend/gen_encoder.cpp|  4 ++--
 backend/src/backend/gen_encoder.hpp|  2 +-
 backend/src/backend/gen_insn_selection.cpp | 25 -
 10 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/backend/src/backend/gen75_encoder.cpp 
b/backend/src/backend/gen75_encoder.cpp
index 9cafaa7..725c774 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -126,7 +126,7 @@ namespace gbe
 return gen7_insn->bits3.ud;
   }
 
-  void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister bti, uint32_t srcNum) {
+  void Gen75Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister bti, uint32_t srcNum, bool useSends) {
 GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 
 this->setHeader(insn);
diff --git a/backend/src/backend/gen75_encoder.hpp 
b/backend/src/backend/gen75_encoder.hpp
index 517afff..2a226cc 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -42,7 +42,7 @@ namespace gbe
 virtual void JMPI(GenRegister src, bool longjmp = false);
 /*! Patch JMPI/BRC/BRD (located at index insnID) with the given jump 
distance */
 virtual void patchJMPI(uint32_t insnID, int32_t jip, int32_t uip);
-virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
+virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum, bool useSends);
 virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
 virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum);
 virtual void setHeader(GenNativeInstruction *insn);
diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 2928943..277acda 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -153,7 +153,7 @@ namespace gbe
 return gen8_insn->bits3.ud;
   }
 
-  void Gen8Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister bti, uint32_t srcNum) {
+  void Gen8Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister data, GenRegister bti, uint32_t srcNum, bool useSends) {
 GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 
 this->setHeader(insn);
diff --git a/backend/src/backend/gen8_encoder.hpp 
b/backend/src/backend/gen8_encoder.hpp
index 4afec0c..fa62a8d 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -44,7 +44,7 @@ namespace gbe
 virtual void F16TO32(GenRegister dest, GenRegister src0);
 virtual void F32TO16(GenRegister dest, GenRegister src0);
 virtual void LOAD_INT64_IMM(GenRegister dest, GenRegister value);
-virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
+virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister addr, 
GenRegister data, GenRegister bti, uint32_t srcNum, bool useSends);
 virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister bti, uint32_t srcNum);
 virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
 virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum, bool useSends);
diff --git a/backend/src/backend/gen9_encoder.cpp 
b/backend/src/backend/gen9_encoder.cpp
index 37ffb0d..b42c833 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -194,4 +194,32 @@ namespace gbe
 gen9_insn->bits2.sends.sel_reg32_desc = 1;
 }
   }
+
+  void Gen9Encoder::ATOMIC(GenRegister dst, uint32_t function, GenRegister 
addr, GenRegister data, GenRegister bti, uint32_t srcNum, bool useSends)
+  {
+if (!useSends)
+  Gen8Encoder::ATOMIC(dst, function, addr, data, bti, srcNum, false);
+else {
+  assert(addr.reg() != data.reg());
+
+  GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+  Gen9NativeInstruction *gen9_insn = >gen9_insn;
+  this->setHeader(insn);
+  insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
+
+  setSendsOperands(gen9_insn, dst, addr, data);
+  if (this->curr.execWidth == 8)
+gen9_insn->bits2.sends.src1_length = srcNum - 1;
+  else if (this->curr.execWidth == 16)
+

[Beignet] [PATCH 2/5] support sends for long write

2016-12-09 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen_insn_selection.cpp | 28 +++-
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 1cd6137..f46207f 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -1625,7 +1625,6 @@ namespace gbe
   // dst: srcNum, (flagTemp)
   // src: srcNum, addr, srcNum, bti.
   insn = this->appendInsn(SEL_OP_WRITE64, dstNum, srcNum*2 + 2);
-  vector = this->appendVector();
 
   for (uint32_t elemID = 0; elemID < srcNum; ++elemID)
 insn->src(elemID) = src[elemID];
@@ -1646,10 +1645,29 @@ namespace gbe
   }
   insn->extra.elem = srcNum;
 
-  vector->regNum = srcNum + 1;
-  vector->offsetID = srcNum;
-  vector->reg = >src(srcNum);
-  vector->isSrc = 1;
+  if (hasSends()) {
+insn->extra.splitSend = 1;
+
+//addr regs
+vector = this->appendVector();
+vector->regNum = 1;
+vector->offsetID = srcNum;
+vector->reg = >src(srcNum);
+vector->isSrc = 1;
+
+//data regs
+vector = this->appendVector();
+vector->regNum = srcNum;
+vector->offsetID = srcNum+1;
+vector->reg = >src(srcNum+1);
+vector->isSrc = 1;
+  } else {
+vector = this->appendVector();
+vector->regNum = srcNum + 1;
+vector->offsetID = srcNum;
+vector->reg = >src(srcNum);
+vector->isSrc = 1;
+  }
 }
 
 if (bti.file != GEN_IMMEDIATE_VALUE) {
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH 1/5] refine code to change insn.extra.splitSend as encoder funtion parameter

2016-12-09 Thread Guo, Yejun
it makes possible to switch send and sends within the encoder function.

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen8_context.cpp | 14 ++---
 backend/src/backend/gen8_encoder.cpp |  2 +-
 backend/src/backend/gen8_encoder.hpp |  2 +-
 backend/src/backend/gen9_encoder.cpp | 16 +--
 backend/src/backend/gen9_encoder.hpp |  4 ++--
 backend/src/backend/gen_context.cpp  | 38 
 backend/src/backend/gen_encoder.cpp  |  4 ++--
 backend/src/backend/gen_encoder.hpp  |  4 ++--
 8 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/backend/src/backend/gen8_context.cpp 
b/backend/src/backend/gen8_context.cpp
index 95b1013..a3045ce 100644
--- a/backend/src/backend/gen8_context.cpp
+++ b/backend/src/backend/gen8_context.cpp
@@ -969,8 +969,6 @@ namespace gbe
 const GenRegister addr = ra->genReg(insn.src(elemNum));
 const GenRegister bti = ra->genReg(insn.src(elemNum*2+1));
 GenRegister data = ra->genReg(insn.src(elemNum+1));
-if (!insn.extra.splitSend)
-  data = addr;
 
 /* Because BDW's store and load send instructions for 64 bits require the 
bti to be surfaceless,
which we can not accept. We just fallback to 2 DW untypewrite here. */
@@ -981,7 +979,7 @@ namespace gbe
 }
 
 if (bti.file == GEN_IMMEDIATE_VALUE) {
-  p->UNTYPED_WRITE(addr, data, bti, elemNum*2);
+  p->UNTYPED_WRITE(addr, data, bti, elemNum*2, insn.extra.splitSend);
 } else {
   const GenRegister tmp = ra->genReg(insn.dst(elemNum));
   const GenRegister btiTmp = ra->genReg(insn.dst(elemNum + 1));
@@ -997,7 +995,7 @@ namespace gbe
   p->push();
 p->curr.predicate = GEN_PREDICATE_NORMAL;
 p->curr.useFlag(insn.state.flag, insn.state.subFlag);
-p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2);
+p->UNTYPED_WRITE(addr, data, GenRegister::addr1(0), elemNum*2, 
insn.extra.splitSend);
   p->pop();
   afterMessage(insn, bti, tmp, btiTmp, jip0);
 }
@@ -1358,7 +1356,7 @@ namespace gbe
   nextDst = GenRegister::Qn(tempDst, 1);
   p->MOV(nextDst, nextSrc);
 p->pop();
-p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
 p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
 
 p->push();
@@ -1374,7 +1372,7 @@ namespace gbe
   nextDst = GenRegister::Qn(tempDst, 1);
   p->MOV(nextDst, nextSrc);
 p->pop();
-p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1);
+p->UNTYPED_WRITE(addr, addr, GenRegister::immud(bti), 1, false);
 p->ADD(addr, addr, GenRegister::immud(sizeof(uint32_t)));
   }
 
@@ -1801,7 +1799,7 @@ namespace gbe
   p->curr.execWidth = 8;
   p->MUL(msgAddr, threadId, GenRegister::immd(0x8));
   p->ADD(msgAddr, msgAddr, msgSlmOff);
-  p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2);
+  p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 2, false);
 }
 else
 {
@@ -1809,7 +1807,7 @@ namespace gbe
   p->MOV(msgData, threadData);
   p->MUL(msgAddr, threadId, GenRegister::immd(0x4));
   p->ADD(msgAddr, msgAddr, msgSlmOff);
-  p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1);
+  p->UNTYPED_WRITE(msg, msg, GenRegister::immw(0xFE), 1, false);
 }
 
 /* init partialData register, it will hold the final result */
diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 8f73346..2928943 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -268,7 +268,7 @@ namespace gbe
 return insn->bits3.ud;
   }
 
-  void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum) {
+  void Gen8Encoder::UNTYPED_WRITE(GenRegister msg, GenRegister data, 
GenRegister bti, uint32_t elemNum, bool useSends) {
 GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 assert(elemNum >= 1 || elemNum <= 4);
 this->setHeader(insn);
diff --git a/backend/src/backend/gen8_encoder.hpp 
b/backend/src/backend/gen8_encoder.hpp
index f6a91a0..4afec0c 100644
--- a/backend/src/backend/gen8_encoder.hpp
+++ b/backend/src/backend/gen8_encoder.hpp
@@ -47,7 +47,7 @@ namespace gbe
 virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, 
GenRegister bti, uint32_t srcNum);
 virtual void ATOMICA64(GenRegister dst, uint32_t function, GenRegister 
src, GenRegister bti, uint32_t srcNum);
 virtual void UNTYPED_READ(GenRegister dst, GenRegister src, GenRegister 
bti, uint32_t elemNum);
-virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum);
+virtual void UNTYPED_WRITE(GenRegister src, GenRegister data, GenRegister 
bti, uint32_t elemNum, bool useSends);
 virtual void UNTYPED_READA64(GenRegister dst, GenRegister src, uint32_t 
elemNum);
 virtual void UNTYPED_WRITEA64(GenRegister src, 

Re: [Beignet] [PATCH] Runtime: Use cl_ulong as CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.

2016-12-09 Thread Simon Richter
Hi,

On Fri, Dec 09, 2016 at 06:49:56AM +, Yang, Rong R wrote:

> The change is for the case that param_value_size > sizeof(device->FIELD).
> For example:
>   cl_ulong max_alloc_size;
>   clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, 
> sizeof(max_alloc_size), _alloc_size, NULL);

> param_value_size is 8, if beignet's device->max_alloc_size is size_t, 
> sizeof(device->max_alloc_size) is 4 in i386 systems.

Yes, but this has been fixed now.

> Because max_alloc_size hasn't been initialized, is garbage, and 
> memcpy(param_value, >FIELD, sizeof device->FIELD); 
> only copy the low 4 bytes, the high 4 bytes is still garbage.
> For example max_alloc_size is 0xdeaddeaddeaddead before call clGetDeviceInfo, 
> and device-> max_alloc_size is 0x4000,
> After clGetDeviceInfo, max_alloc_size's value is 0xdeaddead4000.

Right, but the returned size value is 4, so the program should know that
there was a communication error, and bail out anyway, because if Beignet
doesn't return sizeof(cl_ulong) there, then obviously it hasn't stored a
cl_ulong, so the result cannot be interpreted that way.

If a program doesn't do that check, it is broken. I cannot expect

  __uint128_t max_alloc_size;
  clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_alloc_size), 
_alloc_size, NULL);

to work either. People need to write this out completely, as

  __uint128_t max_alloc_size;
  cl_uint max_alloc_size_size;
  int error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof 
max_alloc_size, _alloc_size, _alloc_size_size);
  if(error != CL_SUCCESS)
goto error;
  if(max_alloc_size_size != sizeof max_alloc_size)
goto error;

and if they don't, I want valgrind to say "btw, they are using a value with
uninitialized bits here."

> So add memset(param_value, 0, param_value_size) to clear param_value, the 
> param_value's size is param_value_size, I think it is safe.

Safe, yes, because the spec doesn't say what should happen to the rest of
the buffer (so overwriting with zeros is fine), but also unnecessary.

   Simon
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet