Re: [Beignet] [PATCH 11/19] Backend: Add profilingProlog function for GenContext.

2015-11-03 Thread Yang, Rong R


> -Original Message-
> From: Beignet [mailto:beignet-boun...@lists.freedesktop.org] On Behalf Of
> junyan...@inbox.com
> Sent: Wednesday, September 9, 2015 8:01
> To: beignet@lists.freedesktop.org
> Subject: [Beignet] [PATCH 11/19] Backend: Add profilingProlog function for
> GenContext.
> 
> From: Junyan He 
> 
> The profilingProlog will collect useful information for profiling, including 
> XYZ
> global range and prolog timestamp.
> 
> Signed-off-by: Junyan He 
> ---
>  backend/src/backend/gen_context.cpp |  116
> +++
>  backend/src/backend/gen_context.hpp |2 +
>  2 files changed, 118 insertions(+)
> 
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 435b224..696d86a 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -2185,6 +2185,119 @@ namespace gbe
>  p->TYPED_WRITE(header, true, bti);
>}
> 
> +  void GenContext::calcGlobalXYZRange(GenRegister& reg, GenRegister&
> +tmp, int flag, int subFlag)
> +  {
> +#define CALC_GID(dim)  do {\
> +  GenRegister g##dim##start = GenRegister::offset(reg, 0, 8 + dim*8); \
> +  GenRegister g##dim##end = GenRegister::offset(g##dim##start, 0, 4);
> +\
> +  GenRegister id##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud16grf(ir::ocl::lid##di
> +m)), GEN_TYPE_UD); \
> +  GenRegister localsz##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::lsize##d
> +im)), GEN_TYPE_UD); \
> +  GenRegister gid##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::groupid#
> +#dim)), GEN_TYPE_UD); \
> +  GenRegister goffset##dim =
> +GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::goffset#
> +#dim)), GEN_TYPE_UD); \
> +  p->MUL(g##dim##start, localsz##dim, gid##dim); \
> +  p->ADD(g##dim##start, g##dim##start, id##dim); \
> +  p->ADD(g##dim##start, g##dim##start, goffset##dim); \
> +  GenRegister ip; \
> +  p->MOV(flagReg, GenRegister::immuw(0x0)); \
> +  p->curr.useFlag(flag, subFlag); \
> +  p->curr.predicate = GEN_PREDICATE_NONE; \
> +  if (this->simdWidth == 16) \
> +  p->curr.execWidth = 16; \
> +  else \
> +  p->curr.execWidth = 8; \
> +  if (!isDWLabel()) { \
> +ip = ra->genReg(GenRegister::uw16grf(ir::ocl::blockip)); \
> +p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immuw(0x)); \
> +  } else { \
> +ip = ra->genReg(GenRegister::ud16grf(ir::ocl::dwblockip)); \
> +p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immud(0x)); \
> +  } \
> +  p->curr.execWidth = 1; \
> +  p->MOV(GenRegister::retype(tmp, GEN_TYPE_UW), flagReg); \
> +  if (this->simdWidth == 16) \
> +  p->OR(tmp, tmp, GenRegister::immud(0x)); \
> +  else \
> +  p->OR(tmp, tmp, GenRegister::immud(0xff00)); \
> +  p->FBL(tmp, tmp); \
> +  p->ADD(tmp, tmp, GenRegister::negate(GenRegister::immud(0x1))); \
> +  p->MUL(tmp, tmp, GenRegister::immud(4)); \
> +  p->MOV(GenRegister::addr1(0), GenRegister::retype(tmp,
> GEN_TYPE_UW));
> +\
> +  GenRegister dimEnd = GenRegister::to_indirect1xN(id##dim, 0); \
> +  p->MOV(tmp, dimEnd); \
> +  p->MUL(g##dim##end, localsz##dim, gid##dim); \
> +  p->ADD(g##dim##end, g##dim##end, tmp); \
> +  p->ADD(g##dim##end, g##dim##end, goffset##dim); \ } while(0)
> +
The macro CALC_GID is too long, it is hard to debug and read, actually the 
macro only to get
ir::ocl::lid, ir::ocl::lsize, ir::ocl::groupid and ir::ocl::goffset. Because 
the dim is 3, can you use the
if/else for it or define a macro only for these register.

> +GenRegister flagReg = GenRegister::flag(flag, subFlag);
> +p->push(); {
> +  p->curr.execWidth = 1;
> +  p->curr.predicate = GEN_PREDICATE_NONE;
> +  p->curr.noMask = 1;
> +  CALC_GID(0);
> +  CALC_GID(1);
> +  CALC_GID(2);
> +} p->pop();
> +
> +#undef CALC_GID
> +  }
> +
> +  void GenContext::profilingProlog(void) {
> +// record the prolog, globalXYZ and lasttimestamp at the very beginning.
> +GenRegister profilingReg2, profilingReg3, profilingReg4;
> +GenRegister tmArf = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
> +0xc0,
> +0,
> +GEN_TYPE_UW,
> +GEN_VERTICAL_STRIDE_4,
> +GEN_WIDTH_4,
> +GEN_HORIZONTAL_STRIDE_1);
> +if (this->simdWidth == 16) {
> +  profilingReg2 = 
> ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts1));
> +  profilingReg3 = GenRegister::offset(profilingReg2, 1);
> +  profilingReg4 = 
> ra->genReg(GenRegister::ud16grf(ir::ocl::p

[Beignet] [PATCH 11/19] Backend: Add profilingProlog function for GenContext.

2015-09-08 Thread junyan . he
From: Junyan He 

The profilingProlog will collect useful information
for profiling, including XYZ global range and prolog
timestamp.

Signed-off-by: Junyan He 
---
 backend/src/backend/gen_context.cpp |  116 +++
 backend/src/backend/gen_context.hpp |2 +
 2 files changed, 118 insertions(+)

diff --git a/backend/src/backend/gen_context.cpp 
b/backend/src/backend/gen_context.cpp
index 435b224..696d86a 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2185,6 +2185,119 @@ namespace gbe
 p->TYPED_WRITE(header, true, bti);
   }
 
+  void GenContext::calcGlobalXYZRange(GenRegister& reg, GenRegister& tmp, int 
flag, int subFlag)
+  {
+#define CALC_GID(dim)  do {\
+  GenRegister g##dim##start = GenRegister::offset(reg, 0, 8 + dim*8); \
+  GenRegister g##dim##end = GenRegister::offset(g##dim##start, 0, 4);  \
+  GenRegister id##dim = 
GenRegister::toUniform(ra->genReg(GenRegister::ud16grf(ir::ocl::lid##dim)), 
GEN_TYPE_UD); \
+  GenRegister localsz##dim = 
GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::lsize##dim)), 
GEN_TYPE_UD); \
+  GenRegister gid##dim = 
GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::groupid##dim)), 
GEN_TYPE_UD); \
+  GenRegister goffset##dim = 
GenRegister::toUniform(ra->genReg(GenRegister::ud1grf(ir::ocl::goffset##dim)), 
GEN_TYPE_UD); \
+  p->MUL(g##dim##start, localsz##dim, gid##dim); \
+  p->ADD(g##dim##start, g##dim##start, id##dim); \
+  p->ADD(g##dim##start, g##dim##start, goffset##dim); \
+  GenRegister ip; \
+  p->MOV(flagReg, GenRegister::immuw(0x0)); \
+  p->curr.useFlag(flag, subFlag); \
+  p->curr.predicate = GEN_PREDICATE_NONE; \
+  if (this->simdWidth == 16) \
+  p->curr.execWidth = 16; \
+  else \
+  p->curr.execWidth = 8; \
+  if (!isDWLabel()) { \
+ip = ra->genReg(GenRegister::uw16grf(ir::ocl::blockip)); \
+p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immuw(0x)); \
+  } else { \
+ip = ra->genReg(GenRegister::ud16grf(ir::ocl::dwblockip)); \
+p->CMP(GEN_CONDITIONAL_EQ, ip, GenRegister::immud(0x)); \
+  } \
+  p->curr.execWidth = 1; \
+  p->MOV(GenRegister::retype(tmp, GEN_TYPE_UW), flagReg); \
+  if (this->simdWidth == 16) \
+  p->OR(tmp, tmp, GenRegister::immud(0x)); \
+  else \
+  p->OR(tmp, tmp, GenRegister::immud(0xff00)); \
+  p->FBL(tmp, tmp); \
+  p->ADD(tmp, tmp, GenRegister::negate(GenRegister::immud(0x1))); \
+  p->MUL(tmp, tmp, GenRegister::immud(4)); \
+  p->MOV(GenRegister::addr1(0), GenRegister::retype(tmp, GEN_TYPE_UW)); \
+  GenRegister dimEnd = GenRegister::to_indirect1xN(id##dim, 0); \
+  p->MOV(tmp, dimEnd); \
+  p->MUL(g##dim##end, localsz##dim, gid##dim); \
+  p->ADD(g##dim##end, g##dim##end, tmp); \
+  p->ADD(g##dim##end, g##dim##end, goffset##dim); \
+} while(0)
+
+GenRegister flagReg = GenRegister::flag(flag, subFlag);
+p->push(); {
+  p->curr.execWidth = 1;
+  p->curr.predicate = GEN_PREDICATE_NONE;
+  p->curr.noMask = 1;
+  CALC_GID(0);
+  CALC_GID(1);
+  CALC_GID(2);
+} p->pop();
+
+#undef CALC_GID
+  }
+
+  void GenContext::profilingProlog(void) {
+// record the prolog, globalXYZ and lasttimestamp at the very beginning.
+GenRegister profilingReg2, profilingReg3, profilingReg4;
+GenRegister tmArf = GenRegister(GEN_ARCHITECTURE_REGISTER_FILE,
+0xc0,
+0,
+GEN_TYPE_UW,
+GEN_VERTICAL_STRIDE_4,
+GEN_WIDTH_4,
+GEN_HORIZONTAL_STRIDE_1);
+if (this->simdWidth == 16) {
+  profilingReg2 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts1));
+  profilingReg3 = GenRegister::offset(profilingReg2, 1);
+  profilingReg4 = ra->genReg(GenRegister::ud16grf(ir::ocl::profilingts2));
+} else {
+  GBE_ASSERT(this->simdWidth == 8);
+  profilingReg2 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts2));
+  profilingReg3 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts3));
+  profilingReg4 = ra->genReg(GenRegister::ud8grf(ir::ocl::profilingts4));
+}
+
+/* MOV(4)   prolog<1>:UW   arf_tm<4,4,1>:UW  */
+/* MOV(4)   lastTsReg<1>:UW  prolog<4,4,1>:UW  */
+GenRegister prolog = profilingReg2;
+prolog.type = GEN_TYPE_UW;
+prolog.hstride = GEN_HORIZONTAL_STRIDE_1;
+prolog.vstride = GEN_VERTICAL_STRIDE_4;
+prolog.width = GEN_WIDTH_4;
+prolog = GenRegister::offset(prolog, 0, 4*sizeof(uint32_t));
+
+GenRegister lastTsReg = GenRegister::toUniform(profilingReg3, GEN_TYPE_UL);
+lastTsReg = GenRegister::offset(lastTsReg, 0, 2*sizeof(uint64_t));
+lastTsReg.type = GEN_TYPE_UW;
+lastTsReg.hstride = GEN_HORIZONTAL_STRIDE_1;
+lastTsReg.vstride = GEN_VERTICAL_STRIDE_4;
+lastTsReg.width = GEN_WIDTH_4;
+
+GenRegister gids = GenRegister::toUniform(profilingReg4, GEN_TYPE_UD);
+GenRegister tmp = GenRegister::toUniform(profilingReg4, GEN_TYPE_UD);
+
+// X Y and Z
+this->calcGlobalXYZRange(gids, tmp, 0, 1);
+
+p->push(); {
+  p