[Beignet] [PATCH 1/3] refine code starting from header in typedwrite

2016-12-07 Thread Guo, Yejun
With this refine, the virtual reg and physical reg will be logically
1:1 mapping, and it helps the later instruction sends

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen_insn_selection.cpp | 145 -
 1 file changed, 78 insertions(+), 67 deletions(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 6624337..94c5e9e 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -6734,86 +6734,97 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
   {
 INLINE bool emitOne(Selection::Opaque &sel, const 
ir::TypedWriteInstruction &insn, bool &markChildren) const
 {
-  using namespace ir;
-  const uint32_t simdWidth = sel.ctx.getSimdWidth();
-  GenRegister msgs[9]; // (header + U + V + R + LOD + 4)
-  const uint32_t msgNum = (8 / (simdWidth / 8)) + 1;
-  const uint32_t dim = insn.getSrcNum() - 4;
-
-  if (simdWidth == 16) {
-for(uint32_t i = 0; i < msgNum; i++)
-  msgs[i] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-  } else {
-uint32_t valueID = 0;
-uint32_t msgID = 0;
-msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-for(; msgID < 1 + dim; msgID++, valueID++)
-  msgs[msgID] = sel.selReg(insn.getSrc(msgID - 1), 
insn.getCoordType());
-
-// fake v.
-if (dim < 2)
-  msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-// fake w.
-if (dim < 3)
-  msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-// LOD.
-msgs[msgID++] = sel.selReg(sel.reg(FAMILY_DWORD), TYPE_U32);
-for(; valueID < insn.getSrcNum(); msgID++, valueID++)
-  msgs[msgID] = sel.selReg(insn.getSrc(valueID), insn.getSrcType());
-  }
-
+  const GenRegister header = GenRegister::ud8grf(sel.reg(ir::FAMILY_REG));
   sel.push();
   sel.curr.predicate = GEN_PREDICATE_NONE;
   sel.curr.noMask = 1;
-  sel.MOV(msgs[0], GenRegister::immud(0));
+  sel.MOV(header, GenRegister::immud(0));
   sel.curr.execWidth = 1;
-
-  GenRegister channelEn = sel.getOffsetReg(msgs[0], 0, 7*4);
+  GenRegister channelEn = sel.getOffsetReg(header, 0, 7*4);
   // Enable all channels.
   sel.MOV(channelEn, GenRegister::immud(0x));
-  sel.curr.execWidth = 8;
-  // Set zero LOD.
-  if (simdWidth == 8)
-sel.MOV(msgs[4], GenRegister::immud(0));
-  else
-sel.MOV(GenRegister::Qn(msgs[2], 0), GenRegister::immud(0));
   sel.pop();
 
+  const uint32_t simdWidth = sel.ctx.getSimdWidth();
+  if (simdWidth == 16)
+emitWithSimd16(sel, insn, markChildren, header);
+  else if (simdWidth == 8)
+emitWithSimd8(sel, insn, markChildren, header);
+  else
+assert(!"not supported");
+  return true;
+}
+
+INLINE bool emitWithSimd16(Selection::Opaque &sel, const 
ir::TypedWriteInstruction &insn, bool &markChildren, const GenRegister& header) 
const
+{
+  using namespace ir;
+
+  GenRegister msgs[9]; // (header + U + V + W + LOD + 4)
+  msgs[0] = header;
+  for (uint32_t i = 1; i < 9; ++i) {
+//SIMD16 will be split into two SIMD8,
+//each virtual reg in msgs requires one physical reg with 8 DWORDs (32 
bytes),
+//so, declare with FAMILY_WORD, and the allocated size will be 
sizeof(WORD)*SIMD16 = 32 bytes
+msgs[i] = sel.selReg(sel.reg(FAMILY_WORD), TYPE_U32);
+  }
+
+  const uint32_t dims = insn.getSrcNum() - 4;
   uint32_t bti = insn.getImageIndex();
-  if (simdWidth == 8)
-sel.TYPED_WRITE(msgs, msgNum, bti, dim == 3);
-  else {
-sel.push();
-sel.curr.execWidth = 8;
-for( uint32_t quarter = 0; quarter < 2; quarter++)
-{
-  #define QUARTER_MOV0(msgs, msgid, src) \
-sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], 
GEN_TYPE_UD), msgid % 2), \
-GenRegister::Qn(src, quarter))
-
-  #define QUARTER_MOV1(msgs, msgid, src) \
-  sel.MOV(GenRegister::Qn(GenRegister::retype(msgs[msgid/2], 
src.type), msgid % 2), \
-  GenRegister::Qn(src, quarter))
-  sel.curr.quarterControl = (quarter == 0) ? GEN_COMPRESSION_Q1 : 
GEN_COMPRESSION_Q2;
-  // Set U,V,W
-  QUARTER_MOV0(msgs, 1, sel.selReg(insn.getSrc(0), 
insn.getCoordType()));
-  if (dim > 1)
-QUARTER_MOV0(msgs, 2, sel.selReg(insn.getSrc(1), 
insn.getCoordType()));
-  if (dim > 2)
-QUARTER_MOV0(msgs, 3, sel.selReg(insn.getSrc(2), 
insn.getCoordType()));
-  // Set R, G, B, A
-  QUARTER_MOV1(msgs, 5, sel.selReg(insn.getSrc(dim), 
insn.getSrcType()));
-  QUARTER_MOV1(msgs, 6, sel.selReg(insn.getSrc(dim + 1), 
insn.getSrcType()));
-  QUARTER_MOV1(msgs, 7, se

[Beignet] [PATCH 3/3] enable sends for typed write

2016-12-07 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen9_encoder.cpp   | 20 +++
 backend/src/backend/gen9_encoder.hpp   |  1 +
 backend/src/backend/gen_context.cpp|  5 -
 backend/src/backend/gen_insn_selection.cpp | 31 --
 backend/src/backend/gen_insn_selection.hpp |  1 +
 5 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/backend/src/backend/gen9_encoder.cpp 
b/backend/src/backend/gen9_encoder.cpp
index b5be852..35fbcb9 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -144,6 +144,26 @@ namespace gbe
 }
   }
 
+  void Gen9Encoder::TYPED_WRITE(GenRegister header, GenRegister data, bool 
header_present, unsigned char bti)
+  {
+if (header.reg() == data.reg())
+  Gen8Encoder::TYPED_WRITE(header, data, header_present, bti);
+else {
+  GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+  Gen9NativeInstruction *gen9_insn = &insn->gen9_insn;
+  assert(header_present);
+
+  this->setHeader(insn);
+  insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
+
+  setSendsOperands(gen9_insn, GenRegister::null(), header, data);
+  gen9_insn->bits2.sends.src1_length = 4;   //src0_length: 
5(header+u+v+w+lod), src1_length: 4(data)
+
+  gen9_insn->bits2.sends.sel_reg32_desc = 0;
+  setTypedWriteMessage(insn, bti, GEN_TYPED_WRITE, 5, header_present);
+}
+  }
+
   unsigned Gen9Encoder::setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize)
   {
 uint32_t msg_length = 0;
diff --git a/backend/src/backend/gen9_encoder.hpp 
b/backend/src/backend/gen9_encoder.hpp
index 1c40b92..20f269f 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -49,6 +49,7 @@ namespace gbe
 bool isUniform);
 void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, 
GenRegister src0, GenRegister src1);
 virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum);
+virtual void TYPED_WRITE(GenRegister header, GenRegister data, bool 
header_present, unsigned char bti);
 virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemNum);
 virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize);
 virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize);
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 302a65b..090470f 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2461,8 +2461,11 @@ namespace gbe
 
   void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) 
{
 const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), 
GEN_TYPE_UD);
+GenRegister data = ra->genReg(insn.src(5));
+if (!insn.extra.typedWriteSplitSend)
+  data = header;
 const uint32_t bti = insn.getbti();
-p->TYPED_WRITE(header, header, true, bti);
+p->TYPED_WRITE(header, data, true, bti);
   }
 
   static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int 
subFlag, int dim, GenContext *gc)
diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 94c5e9e..44d7fbc 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2759,7 +2759,6 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 uint32_t elemID = 0;
 uint32_t i;
 SelectionInstruction *insn = this->appendInsn(SEL_OP_TYPED_WRITE, 0, 
msgNum);
-SelectionVector *msgVector = this->appendVector();;
 
 for( i = 0; i < msgNum; ++i, ++elemID)
   insn->src(elemID) = msgs[i];
@@ -2767,11 +2766,31 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 insn->setbti(bti);
 insn->extra.msglen = msgNum;
 insn->extra.is3DWrite = is3D;
-// Sends require contiguous allocation
-msgVector->regNum = msgNum;
-msgVector->isSrc = 1;
-msgVector->offsetID = 0;
-msgVector->reg = &insn->src(0);
+
+if (hasSends()) {
+  assert(msgNum == 9);
+  insn->extra.typedWriteSplitSend = 1;
+  //header + coords
+  SelectionVector *msgVector = this->appendVector();
+  msgVector->regNum = 5;
+  msgVector->isSrc = 1;
+  msgVector->offsetID = 0;
+  msgVector->reg = &insn->src(0);
+
+  //data
+  msgVector = this->appendVector();
+  msgVector->regNum = 4;
+  msgVector->isSrc = 1;
+  msgVector->offsetID = 5;
+  msgVector->reg = &insn->src(5);
+} else {
+  // Send require contiguous allocation
+  SelectionVector *msgVector = this->appendVector();
+  msgVector->regNum = msgNum;
+  msgVector->isSrc = 1;
+  msgVector->offsetID = 0;
+   

[Beignet] [PATCH 2/3] change interface for TYPED_WRITE, preparing for sends

2016-12-07 Thread Guo, Yejun
Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen_context.cpp | 2 +-
 backend/src/backend/gen_encoder.cpp | 2 +-
 backend/src/backend/gen_encoder.hpp | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 798fac8..302a65b 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2462,7 +2462,7 @@ namespace gbe
   void GenContext::emitTypedWriteInstruction(const SelectionInstruction &insn) 
{
 const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), 
GEN_TYPE_UD);
 const uint32_t bti = insn.getbti();
-p->TYPED_WRITE(header, true, bti);
+p->TYPED_WRITE(header, header, true, bti);
   }
 
   static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int 
subFlag, int dim, GenContext *gc)
diff --git a/backend/src/backend/gen_encoder.cpp 
b/backend/src/backend/gen_encoder.cpp
index 49d93e8..3a4b936 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1257,7 +1257,7 @@ namespace gbe
   msg_type, vme_search_path_lut, lut_sub);
   }
 
-  void GenEncoder::TYPED_WRITE(GenRegister msg, bool header_present, unsigned 
char bti)
+  void GenEncoder::TYPED_WRITE(GenRegister msg, GenRegister data, bool 
header_present, unsigned char bti)
   {
 GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 uint32_t msg_type = GEN_TYPED_WRITE;
diff --git a/backend/src/backend/gen_encoder.hpp 
b/backend/src/backend/gen_encoder.hpp
index e5eb2e2..3e0a650 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -234,6 +234,7 @@ namespace gbe
 
 /*! TypedWrite instruction for texture */
 virtual void TYPED_WRITE(GenRegister header,
+ GenRegister data,
  bool header_present,
  unsigned char bti);
 /*! Extended math function (2 sources) */
-- 
1.9.1

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Runtime: Use cl_ulong as CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.

2016-12-07 Thread Yang Rong
From: Meng Mengmeng 

Also memset the param_value to avoid garbage when param_value_size >
filed sz.

Signed-off-by: Yang Rong 
---
 src/cl_device_id.c | 1 +
 src/cl_device_id.h | 2 +-
 src/cl_mem.c   | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/cl_device_id.c b/src/cl_device_id.c
index 24334fd..71a7be1 100644
--- a/src/cl_device_id.c
+++ b/src/cl_device_id.c
@@ -926,6 +926,7 @@ cl_get_device_ids(cl_platform_idplatform,
 }   \
 if (param_value_size < sizeof device->FIELD)\
   return CL_INVALID_VALUE;  \
+memset(param_value, 0, param_value_size);   \
 memcpy(param_value, &device->FIELD, sizeof device->FIELD);  \
 return CL_SUCCESS;
 
diff --git a/src/cl_device_id.h b/src/cl_device_id.h
index 69aeeac..58d1d76 100644
--- a/src/cl_device_id.h
+++ b/src/cl_device_id.h
@@ -54,7 +54,7 @@ struct _cl_device_id {
   cl_uint  native_vector_width_half;
   cl_uint  max_clock_frequency;
   cl_uint  address_bits;
-  size_t   max_mem_alloc_size;
+  cl_ulong max_mem_alloc_size;
   cl_device_svm_capabilities  svm_capabilities;
   cl_uint preferred_platform_atomic_alignment;
   cl_uint preferred_global_atomic_alignment;
diff --git a/src/cl_mem.c b/src/cl_mem.c
index afce315..4707f48 100644
--- a/src/cl_mem.c
+++ b/src/cl_mem.c
@@ -141,6 +141,7 @@ cl_get_mem_object_info(cl_mem mem,
 break;
   case CL_MEM_USES_SVM_POINTER:
 *((cl_uint *)param_value) = mem->is_svm;
+break;
   }
 
   return CL_SUCCESS;
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Backend: Fix GenRegister::offset sub reg offset

2016-12-07 Thread Pan, Xiuli
Ping for review.

-Original Message-
From: Pan, Xiuli 
Sent: Monday, November 7, 2016 4:06 PM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli 
Subject: [PATCH] Backend: Fix GenRegister::offset sub reg offset

From: Pan Xiuli 

We used to ignore the reg.nr for subreg offset, but after GenRegister offset
is refined, we need to calculate the suboffset with nr and subnr.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_reg_allocation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_reg_allocation.cpp 
b/backend/src/backend/gen_reg_allocation.cpp
index 4451efb..d1c53f7 100644
--- a/backend/src/backend/gen_reg_allocation.cpp
+++ b/backend/src/backend/gen_reg_allocation.cpp
@@ -1472,7 +1472,7 @@ do { \
   }
   GBE_ASSERT(RA.contains(reg.reg()) != false);
   const uint32_t grfOffset = RA.find(reg.reg())->second;
-  const uint32_t suboffset = reg.subphysical ? reg.subnr : 0;
+  const uint32_t suboffset = reg.subphysical ? reg.nr * GEN_REG_SIZE + 
reg.subnr : 0;
   const GenRegister dst = setGenReg(reg, grfOffset + suboffset);
   if (reg.quarter != 0)
 return GenRegister::Qn(dst, reg.quarter);
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet