[Beignet] [PATCH] Backend: Refine block read/write instruction selection

2016-12-08 Thread Xiuli Pan
From: Pan Xiuli 

Move the block pack/unpack into instruction selection in order to get
optimization.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_context.cpp| 459 ++---
 backend/src/backend/gen_insn_selection.cpp | 439 ---
 2 files changed, 346 insertions(+), 552 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 798fac8..4e971a2 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3551,458 +3551,39 @@ namespace gbe
   }
 
   void GenContext::emitOBReadInstruction(const SelectionInstruction ) {
-const GenRegister dst= ra->genReg(insn.dst(1));
-const GenRegister addrreg = ra->genReg(insn.src(0));
-uint32_t type = dst.type;
-uint32_t typesize = typeSize(type);
-const uint32_t vec_size = insn.extra.elem;
-const GenRegister tmp = GenRegister::retype(ra->genReg(insn.dst(1 + 
vec_size)), type);
-const uint32_t simdWidth = p->curr.execWidth;
-const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), 
GEN_TYPE_UD);
-const GenRegister addr = GenRegister::toUniform(addrreg, addrreg.type);
-GenRegister headeraddr;
-bool isA64 = insn.getbti() == 255;
+const GenRegister header = ra->genReg(insn.src(0));
+const GenRegister tmp = ra->genReg(insn.dst(0));
+const uint32_t bti = insn.getbti();
+const uint32_t ow_size = insn.extra.elem;
+bool isA64 = bti == 255;
 if (isA64)
-  headeraddr = GenRegister::retype(GenRegister::offset(header, 0, 0), 
GEN_TYPE_UL);
+   p->OBREADA64(tmp, header, bti, ow_size);
 else
-  headeraddr = GenRegister::offset(header, 0, 2*4);
-
-// Make header
-p->push();
-{
-  // Copy r0 into the header first
-  p->curr.execWidth = 8;
-  p->curr.predicate = GEN_PREDICATE_NONE;
-  p->curr.noMask = 1;
-  p->MOV(header, GenRegister::ud8grf(0, 0));
-
-  // Update the header with the current address
-  p->curr.execWidth = 1;
-  p->MOV(headeraddr, addr);
-
-  // Put zero in the general state base address
-  if (!isA64)
-p->MOV(GenRegister::offset(header, 0, 5 * 4), GenRegister::immud(0));
-
-}
-p->pop();
-// Now read the data, oword block read can only work with simd16 and no 
mask
-if (vec_size == 1) {
-  p->push();
-  {
-p->curr.execWidth = 16;
-p->curr.noMask = 1;
-if (isA64) {
-  //p->curr.execWidth = 8;
-  p->OBREADA64(dst, header, insn.getbti(), simdWidth * typesize / 16);
-}
-else
-  p->OBREAD(dst, header, insn.getbti(), simdWidth * typesize / 16);
-  }
-  p->pop();
-} else if (vec_size == 2) {
-  p->push();
-  {
-p->curr.execWidth = 16;
-p->curr.noMask = 1;
-if (isA64)
-  p->OBREADA64(tmp, header, insn.getbti(), simdWidth * typesize / 8);
-else
-  p->OBREAD(tmp, header, insn.getbti(), simdWidth * typesize / 8);
-  }
-  p->pop();
-  p->MOV(ra->genReg(insn.dst(1)), GenRegister::offset(tmp, 0));
-  p->MOV(ra->genReg(insn.dst(2)), GenRegister::offset(tmp, 0, simdWidth * 
typesize ));
-} else if (vec_size == 4) {
-  if (simdWidth == 8) {
-p->push();
-{
-  p->curr.execWidth = 16;
-  p->curr.noMask = 1;
-  if (isA64)
-p->OBREADA64(tmp, header, insn.getbti(), 2 * typesize);
-  else
-p->OBREAD(tmp, header, insn.getbti(), 2 * typesize);
-}
-p->pop();
-for (uint32_t j = 0; j < 4; j++)
-  p->MOV(ra->genReg(insn.dst(1 + j)), GenRegister::offset(tmp, 0, j * 
simdWidth * typesize ));
-  } else {
-for (uint32_t i = 0; i < typesize / 2; i++) {
-  if (i > 0) {
-p->push();
-{
-  // Update the address in header
-  p->curr.execWidth = 1;
-  p->ADD(headeraddr, headeraddr, GenRegister::immud(128));
-}
-p->pop();
-  }
-  if (isA64)
-p->OBREADA64(tmp, header, insn.getbti(), 8);
-  else
-p->OBREAD(tmp, header, insn.getbti(), 8);
-  for (uint32_t j = 0; j < 8 / typesize ; j++)
-p->MOV(ra->genReg(insn.dst(1 + j + i * 2)), 
GenRegister::offset(tmp, 0 ,j * simdWidth * typesize ));
-}
-  }
-} else if (vec_size == 8) {
-  if (simdWidth == 8) {
-for (uint32_t i = 0; i < typesize / 2; i++) {
-  if (i > 0) {
-p->push();
-{
-  // Update the address in header
-  p->curr.execWidth = 1;
-  p->ADD(headeraddr, headeraddr, GenRegister::immud(128));
-}
-p->pop();
-  }
-  p->push();
-  {
-p->curr.execWidth = 16;
-p->curr.noMask = 1;
-if (isA64)
-  

Re: [Beignet] [PATCH 2/7] Backend: Refine flag usage in instrction selection

2016-12-08 Thread Yang, Rong R
Pushed the first 5 of 7 of this patchset.
As discuss offline, remove some useless "sel.curr.flag = 0;  sel.curr.subFlag = 
1;" code when sel.curr.physicalFlag = 0;

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Monday, November 14, 2016 16:20
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCH 2/7] Backend: Refine flag usage in instrction
> selection
> 
> From: Pan Xiuli 
> 
> We used to mix use physical and virtuial flags in instrction selection and it 
> will
> confuse the denpendency track in instrction scheduling.
> Refine all physical flag in instrction selection stage to flag 0.1 and remove 
> the
> flag 0.0 reserve.
> 
> Signed-off-by: Pan Xiuli 
> ---
>  backend/src/backend/gen_insn_selection.cpp | 35
> +-
> backend/src/backend/gen_insn_selection.hpp |  1 -
> backend/src/backend/gen_reg_allocation.cpp | 10 ++---
>  3 files changed, 31 insertions(+), 15 deletions(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index c14e0bc..5adcaed 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -246,7 +246,7 @@ namespace gbe
>// SelectionBlock
>///
> 
> -  SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb),
> isLargeBlock(false), endifLabel( (ir::LabelIndex) 0),
> removeSimpleIfEndif(false){}
> +  SelectionBlock::SelectionBlock(const ir::BasicBlock *bb) : bb(bb),
> + endifLabel( (ir::LabelIndex) 0), removeSimpleIfEndif(false){}
> 
>void SelectionBlock::append(ir::Register reg) { tmp.push_back(reg); }
> 
> @@ -1118,7 +1118,7 @@ namespace gbe
>if(this->block->removeSimpleIfEndif){
>  mov->state.predicate = GEN_PREDICATE_NORMAL;
>  mov->state.flag = 0;
> -mov->state.subFlag = 0;
> +mov->state.subFlag = 1;
>}
>if (this->isScalarReg(insn->src(regID).reg()))
>  mov->state.noMask = 1;
> @@ -1152,7 +1152,7 @@ namespace gbe
>if(this->block->removeSimpleIfEndif){
>  mov->state.predicate = GEN_PREDICATE_NORMAL;
>  mov->state.flag = 0;
> -mov->state.subFlag = 0;
> +mov->state.subFlag = 1;
>}
>if (simdWidth == 1) {
>  mov->state.noMask = 1;
> @@ -2562,7 +2562,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>this->push();
>  this->curr.predicate = GEN_PREDICATE_NORMAL;
>  this->curr.flag = 0;
> -this->curr.subFlag = 0;
> +this->curr.subFlag = 1;
>  }
>  // If there is no branch at the end of this block.
> 
> @@ -2577,7 +2577,7 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>  if(this->block->removeSimpleIfEndif){
>  this->curr.predicate = GEN_PREDICATE_NONE;
>  this->curr.flag = 0;
> -this->curr.subFlag = 0;
> +this->curr.subFlag = 1;
>this->pop();
>  }
>  // If we are in if/endif fix mode, and this block is @@ -2587,13 
> +2587,14
> @@ extern bool OCL_DEBUGINFO; // first defined by calling BVAR in
> program.cpp
>  this->block->insnList.size() != 0 &&
>  this->block->insnList.size() % 1000 == 0 &&
>  this->block->endifLabel.value() != 0) {
> +  this->curr.flag = 0;
> +  this->curr.subFlag = 1;
>ir::LabelIndex jip = this->block->endifLabel;
>this->ENDIF(GenRegister::immd(0), jip);
>this->push();
>  this->curr.predicate = GEN_PREDICATE_NORMAL;
>  this->IF(GenRegister::immd(0), jip, jip);
>this->pop();
> -  this->block->isLargeBlock = true;
>  }
>  // Output the code in the current basic block
>  this->endBackwardGeneration();
> @@ -6534,6 +6535,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>sel.push();
>  sel.curr.noMask = 1;
>  sel.curr.predicate = GEN_PREDICATE_NONE;
> +sel.curr.flag = 0;
> +sel.curr.subFlag = 1;
>  sel.cmpBlockIP(GEN_CONDITIONAL_LE, src0, src1);
>sel.pop();
> 
> @@ -6544,6 +6547,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> calling BVAR in program.cpp
>  // this block, as it will always excute with all lanes activated.
>  sel.push();
>sel.curr.predicate = GEN_PREDICATE_NORMAL;
> +  sel.curr.flag = 0;
> +  sel.curr.subFlag = 1;
>sel.setBlockIP(src0, sel.ctx.getMaxLabel());
>sel.curr.predicate = GEN_PREDICATE_NONE;
>sel.curr.noMask = 1;
> @@ -6562,6 +6567,8 @@ extern bool OCL_DEBUGINFO; // first defined by
> 

Re: [Beignet] [PATCH V2] Backend: Initialize the extra value for selection instruction

2016-12-08 Thread Yang, Rong R
As discuss offline, can use extra = {0} to initialize the extra, it is more 
clearly, so push extra = {0} version patch.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Friday, December 9, 2016 11:09
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli 
> Subject: [Beignet] [PATCH V2] Backend: Initialize the extra value for 
> selection
> instruction
> 
> From: Pan Xiuli 
> 
> If we do not initialize the extra, we may get some random result when just
> use some bits of the extra, ex splitSend.
> V2: Refine the value to be uint64_t to make sure all bits is set
> 
> Signed-off-by: Pan Xiuli 
> ---
>  backend/src/backend/gen_insn_selection.cpp | 2 +-
> backend/src/backend/gen_insn_selection.hpp | 1 +
>  2 files changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/backend/src/backend/gen_insn_selection.cpp
> b/backend/src/backend/gen_insn_selection.cpp
> index 6624337..7fc22fa 100644
> --- a/backend/src/backend/gen_insn_selection.cpp
> +++ b/backend/src/backend/gen_insn_selection.cpp
> @@ -169,7 +169,7 @@ namespace gbe
>SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t 
> dst,
> uint32_t src) :
>  parent(NULL), opcode(op), dstNum(dst), srcNum(src)
>{
> -extra.function = 0;
> +extra.value = 0ul;
>}
> 
>void SelectionInstruction::prepend(SelectionInstruction ) { diff 
> --git
> a/backend/src/backend/gen_insn_selection.hpp
> b/backend/src/backend/gen_insn_selection.hpp
> index 7ce2b94..1ba5253 100644
> --- a/backend/src/backend/gen_insn_selection.hpp
> +++ b/backend/src/backend/gen_insn_selection.hpp
> @@ -157,6 +157,7 @@ namespace gbe
>  uint16_t printfSize;
>};
>uint32_t workgroupOp;
> +  uint64_t value;
>  } extra;
>  /*! Gen opcode */
>  uint8_t opcode;
> --
> 2.7.4
> 
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Restore jump threading pass for reducing compiling time when run the large and complex kernel like Luxmark.

2016-12-08 Thread yan . wang
From: Yan Wang 

Jump threading pass could optimize the connection between LLVM
basic blocks of the function and provide the chance to merge and
remove unnecessary basic blocks to reduce the compilation time and
ASM code size.

Signed-off-by: Yan Wang 
---
 backend/src/llvm/llvm_to_gen.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
index 42f24b3..e108810 100644
--- a/backend/src/llvm/llvm_to_gen.cpp
+++ b/backend/src/llvm/llvm_to_gen.cpp
@@ -214,7 +214,7 @@ namespace gbe
 // Run instcombine after redundancy elimination to exploit opportunities
 // opened up by them.
 MPM.add(createInstructionCombiningPass());
-//MPM.add(createJumpThreadingPass()); // Thread jumps
+MPM.add(createJumpThreadingPass()); // Thread jumps
 MPM.add(createCorrelatedValuePropagationPass());
 MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
 MPM.add(createAggressiveDCEPass()); // Delete dead instructions
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH] Runtime: Use cl_ulong as CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.

2016-12-08 Thread Yang, Rong R


> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Simon Richter
> Sent: Thursday, December 8, 2016 20:36
> To: beignet@lists.freedesktop.org
> Subject: Re: [Beignet] [PATCH] Runtime: Use cl_ulong as
> CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.
> 
> Hi,
> 
> On Thu, Dec 08, 2016 at 03:47:28PM +0800, Yang Rong wrote:
> 
> > diff --git a/src/cl_device_id.c b/src/cl_device_id.c index
> > 24334fd..71a7be1 100644
> > --- a/src/cl_device_id.c
> > +++ b/src/cl_device_id.c
> > @@ -926,6 +926,7 @@ cl_get_device_ids(cl_platform_idplatform,
> >  }   \
> >  if (param_value_size < sizeof device->FIELD)\
> >return CL_INVALID_VALUE;  \
> > +memset(param_value, 0, param_value_size);   \
> >  memcpy(param_value, >FIELD, sizeof device->FIELD);  \
> >  return CL_SUCCESS;
> >
> 
> I don't see the point -- programs are not supposed to behave differently
> here, and it might hide errors when running under valgrind. I don't have any
> strong feelings on this though.

Thanks for your review.
The change is for the case that param_value_size > sizeof(device->FIELD).
For example:
  cl_ulong max_alloc_size;
  clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(max_alloc_size), 
_alloc_size, NULL);

param_value_size is 8, if beignet's device->max_alloc_size is size_t, 
sizeof(device->max_alloc_size) is 4 in i386 systems.
Because max_alloc_size hasn't been initialized, is garbage, and 
memcpy(param_value, >FIELD, sizeof device->FIELD); 
only copy the low 4 bytes, the high 4 bytes is still garbage.
For example max_alloc_size is 0xdeaddeaddeaddead before call clGetDeviceInfo, 
and device-> max_alloc_size is 0x4000,
After clGetDeviceInfo, max_alloc_size's value is 0xdeaddead4000.

So add memset(param_value, 0, param_value_size) to clear param_value, the 
param_value's size is param_value_size, I think it is safe.

What do you think about?

> 
> The rest of the patch looks good to me.
> 
>Simon
> ___
> Beignet mailing list
> Beignet@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH V2] Backend: Initialize the extra value for selection instruction

2016-12-08 Thread Guo, Yejun
good catch, looks fine to me, thanks.

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Xiuli 
Pan
Sent: Friday, December 09, 2016 11:09 AM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli
Subject: [Beignet] [PATCH V2] Backend: Initialize the extra value for selection 
instruction

From: Pan Xiuli 

If we do not initialize the extra, we may get some random result when just use 
some bits of the extra, ex splitSend.
V2: Refine the value to be uint64_t to make sure all bits is set

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_insn_selection.cpp | 2 +-  
backend/src/backend/gen_insn_selection.hpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 6624337..7fc22fa 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -169,7 +169,7 @@ namespace gbe
   SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t dst, 
uint32_t src) :
 parent(NULL), opcode(op), dstNum(dst), srcNum(src)
   {
-extra.function = 0;
+extra.value = 0ul;
   }
 
   void SelectionInstruction::prepend(SelectionInstruction ) { diff --git 
a/backend/src/backend/gen_insn_selection.hpp 
b/backend/src/backend/gen_insn_selection.hpp
index 7ce2b94..1ba5253 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -157,6 +157,7 @@ namespace gbe
 uint16_t printfSize;
   };
   uint32_t workgroupOp;
+  uint64_t value;
 } extra;
 /*! Gen opcode */
 uint8_t opcode;
--
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH V2] Backend: Initialize the extra value for selection instruction

2016-12-08 Thread Xiuli Pan
From: Pan Xiuli 

If we do not initialize the extra, we may get some random result when
just use some bits of the extra, ex splitSend.
V2: Refine the value to be uint64_t to make sure all bits is set

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_insn_selection.cpp | 2 +-
 backend/src/backend/gen_insn_selection.hpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 6624337..7fc22fa 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -169,7 +169,7 @@ namespace gbe
   SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t dst, 
uint32_t src) :
 parent(NULL), opcode(op), dstNum(dst), srcNum(src)
   {
-extra.function = 0;
+extra.value = 0ul;
   }
 
   void SelectionInstruction::prepend(SelectionInstruction ) {
diff --git a/backend/src/backend/gen_insn_selection.hpp 
b/backend/src/backend/gen_insn_selection.hpp
index 7ce2b94..1ba5253 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -157,6 +157,7 @@ namespace gbe
 uint16_t printfSize;
   };
   uint32_t workgroupOp;
+  uint64_t value;
 } extra;
 /*! Gen opcode */
 uint8_t opcode;
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


Re: [Beignet] [PATCH 3/3] enable sends for typed write

2016-12-08 Thread Guo, Yejun
hi,

please ignore all my un-pushed patches.

I got a new idea after discussed with Xiuli, I'll add a function parameter 
'useSends' for the encoder, so we can switch instructions sends and send inside 
the encoder function even if all the message payloads are continuous.

thanks
yejun

-Original Message-
From: Guo, Yejun 
Sent: Wednesday, December 07, 2016 7:10 PM
To: beignet@lists.freedesktop.org
Cc: Guo, Yejun
Subject: [PATCH 3/3] enable sends for typed write

Signed-off-by: Guo, Yejun 
---
 backend/src/backend/gen9_encoder.cpp   | 20 +++
 backend/src/backend/gen9_encoder.hpp   |  1 +
 backend/src/backend/gen_context.cpp|  5 -
 backend/src/backend/gen_insn_selection.cpp | 31 -- 
 backend/src/backend/gen_insn_selection.hpp |  1 +
 5 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/backend/src/backend/gen9_encoder.cpp 
b/backend/src/backend/gen9_encoder.cpp
index b5be852..35fbcb9 100644
--- a/backend/src/backend/gen9_encoder.cpp
+++ b/backend/src/backend/gen9_encoder.cpp
@@ -144,6 +144,26 @@ namespace gbe
 }
   }
 
+  void Gen9Encoder::TYPED_WRITE(GenRegister header, GenRegister data, 
+ bool header_present, unsigned char bti)  {
+if (header.reg() == data.reg())
+  Gen8Encoder::TYPED_WRITE(header, data, header_present, bti);
+else {
+  GenNativeInstruction *insn = this->next(GEN_OPCODE_SENDS);
+  Gen9NativeInstruction *gen9_insn = >gen9_insn;
+  assert(header_present);
+
+  this->setHeader(insn);
+  insn->header.destreg_or_condmod = GEN_SFID_DATAPORT1_DATA;
+
+  setSendsOperands(gen9_insn, GenRegister::null(), header, data);
+  gen9_insn->bits2.sends.src1_length = 4;   //src0_length: 
5(header+u+v+w+lod), src1_length: 4(data)
+
+  gen9_insn->bits2.sends.sel_reg32_desc = 0;
+  setTypedWriteMessage(insn, bti, GEN_TYPED_WRITE, 5, header_present);
+}
+  }
+
   unsigned Gen9Encoder::setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize)
   {
 uint32_t msg_length = 0;
diff --git a/backend/src/backend/gen9_encoder.hpp 
b/backend/src/backend/gen9_encoder.hpp
index 1c40b92..20f269f 100644
--- a/backend/src/backend/gen9_encoder.hpp
+++ b/backend/src/backend/gen9_encoder.hpp
@@ -49,6 +49,7 @@ namespace gbe
 bool isUniform);
 void setSendsOperands(Gen9NativeInstruction *gen9_insn, GenRegister dst, 
GenRegister src0, GenRegister src1);
 virtual void UNTYPED_WRITE(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemNum);
+virtual void TYPED_WRITE(GenRegister header, GenRegister data, bool 
+ header_present, unsigned char bti);
 virtual unsigned setUntypedWriteSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemNum);
 virtual void BYTE_SCATTER(GenRegister addr, GenRegister data, GenRegister 
bti, uint32_t elemSize);
 virtual unsigned setByteScatterSendsMessageDesc(GenNativeInstruction 
*insn, unsigned bti, unsigned elemSize); diff --git 
a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 302a65b..090470f 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2461,8 +2461,11 @@ namespace gbe
 
   void GenContext::emitTypedWriteInstruction(const SelectionInstruction ) 
{
 const GenRegister header = GenRegister::retype(ra->genReg(insn.src(0)), 
GEN_TYPE_UD);
+GenRegister data = ra->genReg(insn.src(5));
+if (!insn.extra.typedWriteSplitSend)
+  data = header;
 const uint32_t bti = insn.getbti();
-p->TYPED_WRITE(header, header, true, bti);
+p->TYPED_WRITE(header, data, true, bti);
   }
 
   static void calcGID(GenRegister& reg, GenRegister& tmp, int flag, int 
subFlag, int dim, GenContext *gc) diff --git 
a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 94c5e9e..44d7fbc 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -2759,7 +2759,6 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 uint32_t elemID = 0;
 uint32_t i;
 SelectionInstruction *insn = this->appendInsn(SEL_OP_TYPED_WRITE, 0, 
msgNum);
-SelectionVector *msgVector = this->appendVector();;
 
 for( i = 0; i < msgNum; ++i, ++elemID)
   insn->src(elemID) = msgs[i];
@@ -2767,11 +2766,31 @@ extern bool OCL_DEBUGINFO; // first defined by calling 
BVAR in program.cpp
 insn->setbti(bti);
 insn->extra.msglen = msgNum;
 insn->extra.is3DWrite = is3D;
-// Sends require contiguous allocation
-msgVector->regNum = msgNum;
-msgVector->isSrc = 1;
-msgVector->offsetID = 0;
-msgVector->reg = >src(0);
+
+if (hasSends()) {
+  assert(msgNum == 9);
+  insn->extra.typedWriteSplitSend = 1;
+  //header + coords
+  SelectionVector *msgVector = this->appendVector();
+  msgVector->regNum = 5;
+  

Re: [Beignet] [PATCH] Runtime: Use cl_ulong as CL_DEVICE_MAX_MEM_ALLOC_SIZE's return type.

2016-12-08 Thread Simon Richter
Hi,

On Thu, Dec 08, 2016 at 03:47:28PM +0800, Yang Rong wrote:

> diff --git a/src/cl_device_id.c b/src/cl_device_id.c
> index 24334fd..71a7be1 100644
> --- a/src/cl_device_id.c
> +++ b/src/cl_device_id.c
> @@ -926,6 +926,7 @@ cl_get_device_ids(cl_platform_idplatform,
>  }   \
>  if (param_value_size < sizeof device->FIELD)\
>return CL_INVALID_VALUE;  \
> +memset(param_value, 0, param_value_size);   \
>  memcpy(param_value, >FIELD, sizeof device->FIELD);  \
>  return CL_SUCCESS;
>  

I don't see the point -- programs are not supposed to behave differently
here, and it might hide errors when running under valgrind. I don't have
any strong feelings on this though.

The rest of the patch looks good to me.

   Simon
___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] Backend: Initialize the extra value for selection instruction

2016-12-08 Thread Xiuli Pan
From: Pan Xiuli 

If we do not initialize the extra, we may get some random result when
just use some bits of the extra, ex splitSend.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen_insn_selection.cpp | 2 +-
 backend/src/backend/gen_insn_selection.hpp | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/backend/src/backend/gen_insn_selection.cpp 
b/backend/src/backend/gen_insn_selection.cpp
index 6624337..86ed5c3 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -169,7 +169,7 @@ namespace gbe
   SelectionInstruction::SelectionInstruction(SelectionOpcode op, uint32_t dst, 
uint32_t src) :
 parent(NULL), opcode(op), dstNum(dst), srcNum(src)
   {
-extra.function = 0;
+extra.value = 0;
   }
 
   void SelectionInstruction::prepend(SelectionInstruction ) {
diff --git a/backend/src/backend/gen_insn_selection.hpp 
b/backend/src/backend/gen_insn_selection.hpp
index 7ce2b94..b72b42e 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -157,6 +157,7 @@ namespace gbe
 uint16_t printfSize;
   };
   uint32_t workgroupOp;
+  uint32_t value;
 } extra;
 /*! Gen opcode */
 uint8_t opcode;
-- 
2.7.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet


[Beignet] [PATCH] eliminate build warnings in i386 system.

2016-12-08 Thread Yang Rong
From: Meng Mengmeng 

Signed-off-by: Yang Rong 
---
 backend/src/ir/profiling.cpp | 2 +-
 src/cl_command_queue_gen7.c  | 2 +-
 src/cl_kernel.c  | 8 
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/backend/src/ir/profiling.cpp b/backend/src/ir/profiling.cpp
index 09537fa..413ed40 100644
--- a/backend/src/ir/profiling.cpp
+++ b/backend/src/ir/profiling.cpp
@@ -58,7 +58,7 @@ namespace ir
   proLog = ((proLog << 32) & 0x) + log->timestampPrologLo;
   uint64_t epiLog = log->timestampEpilogHi;
   epiLog = ((epiLog << 32) & 0x) + log->timestampEpilogLo;
-  printf(" | dispatch Mask:%4x prolog:%10lu  epilog:%10lu |\n", 
log->dispatchMask, proLog, epiLog);
+  printf(" | dispatch Mask:%4x prolog:%10llu  epilog:%10llu |\n", 
log->dispatchMask, proLog, epiLog);
 
   printf(" | globalX:%4d~%4d  globalY:%4d~%4d  globalZ:%4d~%4d |\n", 
log->gidXStart, log->gidXEnd,
   log->gidYStart, log->gidYEnd, log->gidZStart, log->gidZEnd);
diff --git a/src/cl_command_queue_gen7.c b/src/cl_command_queue_gen7.c
index 4487360..3316375 100644
--- a/src/cl_command_queue_gen7.c
+++ b/src/cl_command_queue_gen7.c
@@ -133,7 +133,7 @@ cl_upload_constant_buffer(cl_command_queue queue, cl_kernel 
ker, cl_gpgpu gpgpu)
 if (constant_addrspace >= 0) {
   size_t global_const_size = 
interp_program_get_global_constant_size(ker->program->opaque);
   if (global_const_size > 0) {
-*(uint64_t*)(ker->curbe + constant_addrspace) = 
(uint64_t)ker->program->global_data_ptr;
+*(char **)(ker->curbe + constant_addrspace) = 
ker->program->global_data_ptr;
 cl_gpgpu_bind_buf(gpgpu, ker->program->global_data, 
constant_addrspace, 0, ALIGN(global_const_size, getpagesize()), BTI_CONSTANT);
   }
 }
diff --git a/src/cl_kernel.c b/src/cl_kernel.c
index 49bbaf0..6fb4a7e 100644
--- a/src/cl_kernel.c
+++ b/src/cl_kernel.c
@@ -337,13 +337,13 @@ cl_get_kernel_arg_info(cl_kernel k, cl_uint arg_index, 
cl_kernel_arg_info param_
 if (!param_value) return CL_SUCCESS;
 if (param_value_size < sizeof(cl_kernel_arg_address_qualifier))
   return CL_INVALID_VALUE;
-if ((cl_ulong)ret_info == 0) {
+if ((size_t)ret_info == 0) {
   *(cl_kernel_arg_address_qualifier *)param_value = 
CL_KERNEL_ARG_ADDRESS_PRIVATE;
-} else if ((cl_ulong)ret_info == 1 || (cl_ulong)ret_info == 4) {
+} else if ((size_t)ret_info == 1 || (size_t)ret_info == 4) {
   *(cl_kernel_arg_address_qualifier *)param_value = 
CL_KERNEL_ARG_ADDRESS_GLOBAL;
-} else if ((cl_ulong)ret_info == 2) {
+} else if ((size_t)ret_info == 2) {
   *(cl_kernel_arg_address_qualifier *)param_value = 
CL_KERNEL_ARG_ADDRESS_CONSTANT;
-} else if ((cl_ulong)ret_info == 3) {
+} else if ((size_t)ret_info == 3) {
   *(cl_kernel_arg_address_qualifier *)param_value = 
CL_KERNEL_ARG_ADDRESS_LOCAL;
 } else {
   /* If no address qualifier is specified, the default address qualifier
-- 
2.1.4

___
Beignet mailing list
Beignet@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/beignet