Re: [Beignet] [PATCH V4] Backend: Refine block read/write instruction selection

2016-12-25 Thread Yang, Rong R
Pushed, thanks.

> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Guo, Yejun
> Sent: Monday, December 19, 2016 16:43
> To: Pan, Xiuli <xiuli@intel.com>; beignet@lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli@intel.com>
> Subject: Re: [Beignet] [PATCH V4] Backend: Refine block read/write
> instruction selection
> 
> LGTM, thanks.
> 
> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> Xiuli Pan
> Sent: Monday, December 19, 2016 3:58 PM
> To: beignet@lists.freedesktop.org
> Cc: Pan, Xiuli
> Subject: [Beignet] [PATCH V4] Backend: Refine block read/write instruction
> selection
> 
> From: Pan Xiuli <xiuli@intel.com>
> 
> Move the block pack/unpack into instruction selection in order to get
> optimization. Also change some variable name to avoid misleading.
> And make some new function in GenEncoder class.
> V2: Use ud8grf instead of f8grf to save a retype.
> V3: Merge change name patch and fix some comments.
> V4: Fix some simd 8 related bug and comments typo.
> 
> Signed-off-by: Pan Xiuli <xiuli@intel.com>
> ---
>  backend/src/backend/gen8_encoder.cpp   |  40 ++-
>  backend/src/backend/gen_context.cpp| 459 
> ++---
>  backend/src/backend/gen_encoder.cpp| 105 ---
>  backend/src/backend/gen_encoder.hpp|  18 +-
>  backend/src/backend/gen_insn_selection.cpp | 448
> +---
>  5 files changed, 440 insertions(+), 630 deletions(-)
> 
> diff --git a/backend/src/backend/gen8_encoder.cpp
> b/backend/src/backend/gen8_encoder.cpp
> index 8f73346..39dcfd3 100644
> --- a/backend/src/backend/gen8_encoder.cpp
> +++ b/backend/src/backend/gen8_encoder.cpp
> @@ -840,20 +840,15 @@ namespace gbe
>  gen8_insn->bits3.gen8_block_rw_a64.header_present = 1;
>}
> 
> -  void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header,
> uint32_t bti, uint32_t size) {
> -   GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> +  void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header,
> uint32_t bti, uint32_t ow_size) {
> +GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
>  const uint32_t msg_length = 1;
> -uint32_t rsize = size / 2;
> -uint32_t msgsize = size;
> -// When size is 1 OWord, which means half a reg, we need to know which
> half to use
> -if (size == 1) {
> -  if (dst.subnr == 0)
> -msgsize = 0;
> -  else
> -msgsize = 1;
> -}
> -rsize = rsize == 0 ? 1 : rsize;
> -const uint32_t response_length = rsize; // Size is in regs
> +uint32_t sizeinreg = ow_size / 2;
> +// half reg should also have size 1
> +sizeinreg = sizeinreg == 0 ? 1 : sizeinreg;
> +const uint32_t block_size = getOBlockSize(ow_size, dst.subnr == 0);
> +const uint32_t response_length = sizeinreg; // Size is in reg
> +
>  this->setHeader(insn);
>  this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
>  this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
> @@ -861,21 +856,22 @@ namespace gbe
>  setOBlockRWA64(this,
> insn,
> bti,
> -   msgsize,
> +   block_size,
> GEN8_P1_BLOCK_READ_A64,
> msg_length,
> response_length);
> 
>}
> 
> -  void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti,
> uint32_t size) {
> -   GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> -uint32_t rsize = size / 2;
> -rsize = rsize == 0 ? 1 : rsize;
> -const uint32_t msg_length = 1 + rsize; // Size is in owords
> +  void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti,
> uint32_t ow_size) {
> +GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
> +uint32_t sizeinreg = ow_size / 2;
> +// half reg should also have size 1
> +sizeinreg = sizeinreg == 0 ? 1 : sizeinreg;
> +const uint32_t msg_length = 1 + sizeinreg; // Size is in reg and header
>  const uint32_t response_length = 0;
> -uint32_t msgsize = size;
> -msgsize = msgsize == 1 ? 0 : msgsize;
> +const uint32_t block_size = getOBlockSize(ow_size);
> +
>  this->setHeader(insn);
>  this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
>  this->setSrc1(insn, GenRegister::immud(0));
> @@ -883,7 +879,7 @@ namespace gbe
>  setOBlockRWA64(this,
> insn,
> bti,
> -   msgsize,
> +   block_size,
> GE

Re: [Beignet] [PATCH V4] Backend: Refine block read/write instruction selection

2016-12-19 Thread Guo, Yejun
LGTM, thanks.

-Original Message-
From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of Xiuli 
Pan
Sent: Monday, December 19, 2016 3:58 PM
To: beignet@lists.freedesktop.org
Cc: Pan, Xiuli
Subject: [Beignet] [PATCH V4] Backend: Refine block read/write instruction 
selection

From: Pan Xiuli <xiuli@intel.com>

Move the block pack/unpack into instruction selection in order to get
optimization. Also change some variable name to avoid misleading.
And make some new function in GenEncoder class.
V2: Use ud8grf instead of f8grf to save a retype.
V3: Merge change name patch and fix some comments.
V4: Fix some simd 8 related bug and comments typo.

Signed-off-by: Pan Xiuli <xiuli@intel.com>
---
 backend/src/backend/gen8_encoder.cpp   |  40 ++-
 backend/src/backend/gen_context.cpp| 459 ++---
 backend/src/backend/gen_encoder.cpp| 105 ---
 backend/src/backend/gen_encoder.hpp|  18 +-
 backend/src/backend/gen_insn_selection.cpp | 448 +---
 5 files changed, 440 insertions(+), 630 deletions(-)

diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 8f73346..39dcfd3 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -840,20 +840,15 @@ namespace gbe
 gen8_insn->bits3.gen8_block_rw_a64.header_present = 1;
   }
 
-  void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t 
bti, uint32_t size) {
-   GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+  void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t 
bti, uint32_t ow_size) {
+GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 const uint32_t msg_length = 1;
-uint32_t rsize = size / 2;
-uint32_t msgsize = size;
-// When size is 1 OWord, which means half a reg, we need to know which 
half to use
-if (size == 1) {
-  if (dst.subnr == 0)
-msgsize = 0;
-  else
-msgsize = 1;
-}
-rsize = rsize == 0 ? 1 : rsize;
-const uint32_t response_length = rsize; // Size is in regs
+uint32_t sizeinreg = ow_size / 2;
+// half reg should also have size 1
+sizeinreg = sizeinreg == 0 ? 1 : sizeinreg;
+const uint32_t block_size = getOBlockSize(ow_size, dst.subnr == 0);
+const uint32_t response_length = sizeinreg; // Size is in reg
+
 this->setHeader(insn);
 this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
 this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
@@ -861,21 +856,22 @@ namespace gbe
 setOBlockRWA64(this,
insn,
bti,
-   msgsize,
+   block_size,
GEN8_P1_BLOCK_READ_A64,
msg_length,
response_length);
 
   }
 
-  void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t 
size) {
-   GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
-uint32_t rsize = size / 2;
-rsize = rsize == 0 ? 1 : rsize;
-const uint32_t msg_length = 1 + rsize; // Size is in owords
+  void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t 
ow_size) {
+GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+uint32_t sizeinreg = ow_size / 2;
+// half reg should also have size 1
+sizeinreg = sizeinreg == 0 ? 1 : sizeinreg;
+const uint32_t msg_length = 1 + sizeinreg; // Size is in reg and header
 const uint32_t response_length = 0;
-uint32_t msgsize = size;
-msgsize = msgsize == 1 ? 0 : msgsize;
+const uint32_t block_size = getOBlockSize(ow_size);
+
 this->setHeader(insn);
 this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
 this->setSrc1(insn, GenRegister::immud(0));
@@ -883,7 +879,7 @@ namespace gbe
 setOBlockRWA64(this,
insn,
bti,
-   msgsize,
+   block_size,
GEN8_P1_BLOCK_WRITE_A64,
msg_length,
response_length);
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 8288fa5..791e607 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3551,458 +3551,39 @@ namespace gbe
   }
 
   void GenContext::emitOBReadInstruction(const SelectionInstruction ) {
-const GenRegister dst= ra->genReg(insn.dst(1));
-const GenRegister addrreg = ra->genReg(insn.src(0));
-uint32_t type = dst.type;
-uint32_t typesize = typeSize(type);
-const uint32_t vec_size = insn.extra.elem;
-const GenRegister tmp = GenRegister::retype(ra->genReg(insn.dst(1 + 
vec_size)), type);
-const uint32_t simdWidth = p->curr.execWidth;
-const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), 
GEN_TYPE_UD);
-const GenRegister addr = GenRegister::toUniform(ad

[Beignet] [PATCH V4] Backend: Refine block read/write instruction selection

2016-12-18 Thread Xiuli Pan
From: Pan Xiuli 

Move the block pack/unpack into instruction selection in order to get
optimization. Also change some variable name to avoid misleading.
And make some new function in GenEncoder class.
V2: Use ud8grf instead of f8grf to save a retype.
V3: Merge change name patch and fix some comments.
V4: Fix some simd 8 related bug and comments typo.

Signed-off-by: Pan Xiuli 
---
 backend/src/backend/gen8_encoder.cpp   |  40 ++-
 backend/src/backend/gen_context.cpp| 459 ++---
 backend/src/backend/gen_encoder.cpp| 105 ---
 backend/src/backend/gen_encoder.hpp|  18 +-
 backend/src/backend/gen_insn_selection.cpp | 448 +---
 5 files changed, 440 insertions(+), 630 deletions(-)

diff --git a/backend/src/backend/gen8_encoder.cpp 
b/backend/src/backend/gen8_encoder.cpp
index 8f73346..39dcfd3 100644
--- a/backend/src/backend/gen8_encoder.cpp
+++ b/backend/src/backend/gen8_encoder.cpp
@@ -840,20 +840,15 @@ namespace gbe
 gen8_insn->bits3.gen8_block_rw_a64.header_present = 1;
   }
 
-  void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t 
bti, uint32_t size) {
-   GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+  void Gen8Encoder::OBREADA64(GenRegister dst, GenRegister header, uint32_t 
bti, uint32_t ow_size) {
+GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
 const uint32_t msg_length = 1;
-uint32_t rsize = size / 2;
-uint32_t msgsize = size;
-// When size is 1 OWord, which means half a reg, we need to know which 
half to use
-if (size == 1) {
-  if (dst.subnr == 0)
-msgsize = 0;
-  else
-msgsize = 1;
-}
-rsize = rsize == 0 ? 1 : rsize;
-const uint32_t response_length = rsize; // Size is in regs
+uint32_t sizeinreg = ow_size / 2;
+// half reg should also have size 1
+sizeinreg = sizeinreg == 0 ? 1 : sizeinreg;
+const uint32_t block_size = getOBlockSize(ow_size, dst.subnr == 0);
+const uint32_t response_length = sizeinreg; // Size is in reg
+
 this->setHeader(insn);
 this->setDst(insn, GenRegister::uw16grf(dst.nr, 0));
 this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
@@ -861,21 +856,22 @@ namespace gbe
 setOBlockRWA64(this,
insn,
bti,
-   msgsize,
+   block_size,
GEN8_P1_BLOCK_READ_A64,
msg_length,
response_length);
 
   }
 
-  void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t 
size) {
-   GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
-uint32_t rsize = size / 2;
-rsize = rsize == 0 ? 1 : rsize;
-const uint32_t msg_length = 1 + rsize; // Size is in owords
+  void Gen8Encoder::OBWRITEA64(GenRegister header, uint32_t bti, uint32_t 
ow_size) {
+GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
+uint32_t sizeinreg = ow_size / 2;
+// half reg should also have size 1
+sizeinreg = sizeinreg == 0 ? 1 : sizeinreg;
+const uint32_t msg_length = 1 + sizeinreg; // Size is in reg and header
 const uint32_t response_length = 0;
-uint32_t msgsize = size;
-msgsize = msgsize == 1 ? 0 : msgsize;
+const uint32_t block_size = getOBlockSize(ow_size);
+
 this->setHeader(insn);
 this->setSrc0(insn, GenRegister::ud8grf(header.nr, 0));
 this->setSrc1(insn, GenRegister::immud(0));
@@ -883,7 +879,7 @@ namespace gbe
 setOBlockRWA64(this,
insn,
bti,
-   msgsize,
+   block_size,
GEN8_P1_BLOCK_WRITE_A64,
msg_length,
response_length);
diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 8288fa5..791e607 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -3551,458 +3551,39 @@ namespace gbe
   }
 
   void GenContext::emitOBReadInstruction(const SelectionInstruction ) {
-const GenRegister dst= ra->genReg(insn.dst(1));
-const GenRegister addrreg = ra->genReg(insn.src(0));
-uint32_t type = dst.type;
-uint32_t typesize = typeSize(type);
-const uint32_t vec_size = insn.extra.elem;
-const GenRegister tmp = GenRegister::retype(ra->genReg(insn.dst(1 + 
vec_size)), type);
-const uint32_t simdWidth = p->curr.execWidth;
-const GenRegister header = GenRegister::retype(ra->genReg(insn.dst(0)), 
GEN_TYPE_UD);
-const GenRegister addr = GenRegister::toUniform(addrreg, addrreg.type);
-GenRegister headeraddr;
-bool isA64 = insn.getbti() == 255;
+const GenRegister header = ra->genReg(insn.src(0));
+const GenRegister tmp = ra->genReg(insn.dst(0));
+const uint32_t bti = insn.getbti();
+const uint32_t ow_size = insn.extra.elem;
+bool isA64 = bti == 255;
 if (isA64)
-  headeraddr =